diff --git a/src/coreclr/src/nativeaot/Bootstrap/CppCodeGen.h b/src/coreclr/src/nativeaot/Bootstrap/CppCodeGen.h
new file mode 100644
index 0000000000000..91709981afd8e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Bootstrap/CppCodeGen.h
@@ -0,0 +1,71 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// CppCodeGen.h : Facilities for the C++ code generation backend
+
+#ifndef __CPP_CODE_GEN_H
+#define __CPP_CODE_GEN_H
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#ifdef _MSC_VER
+// Warnings disabled for generated cpp code
+#pragma warning(disable:4200) // zero-sized array
+#pragma warning(disable:4101) // unreferenced local variable
+#pragma warning(disable:4102) // unreferenced label
+#pragma warning(disable:4244) // possible loss of data
+#pragma warning(disable:4717) // recursive on all control paths
+#pragma warning(disable:4307) // integral constant overflow
+#endif
+
+#ifdef _MSC_VER
+#define INT64VAL(x) (x##i64)
+#else
+#define INT64VAL(x) (x##LL)
+#endif
+
+#ifdef _MSC_VER
+#define CORERT_UNREACHABLE  __assume(0)
+#else
+#define CORERT_UNREACHABLE  __builtin_unreachable()
+#endif
+
+#ifdef _MSC_VER
+#define CORERT_THREAD __declspec(thread)
+#else
+#define CORERT_THREAD __thread
+#endif
+
+// Use the bit representation of uint64_t `v` as the bit representation of a double.
+inline double __uint64_to_double(uint64_t v)
+{
+    union
+    {
+        uint64_t u64;
+        double d;
+    } val;
+    val.u64 = v;
+    return val.d;
+}
+
+struct ReversePInvokeFrame
+{
+    void*   m_savedPInvokeTransitionFrame;
+    void*   m_savedThread;
+};
+
+struct PInvokeTransitionFrame
+{
+    void*       m_RIP;
+    void*       m_pThread;  // unused by stack crawler, this is so GetThread is only called once per method
+                            // can be an invalid pointer in universal transition cases (which never need to call GetThread)
+    uint32_t    m_Flags;  // PInvokeTransitionFrameFlags
+};
+
+// Should be synchronized with System.Private.CoreLib/src/System/Runtime/CompilerServices/StaticClassConstructionContext.cs
+struct StaticClassConstructionContext
+{
+    void*       m_cctorMethodAddress;
+    uint32_t    m_initialized;
+};
+#endif
diff --git a/src/coreclr/src/nativeaot/Bootstrap/common.cpp b/src/coreclr/src/nativeaot/Bootstrap/common.cpp
new file mode 100644
index 0000000000000..9efc4b2ad89b6
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Bootstrap/common.cpp
@@ -0,0 +1,11 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// common.cpp : source file that includes just the standard includes
+// testNative.pch will be the pre-compiled header
+// common.obj will contain the pre-compiled type information
+
+#include "common.h"
+
+// TODO: reference any additional headers you need in common.H
+// and not in this file
diff --git a/src/coreclr/src/nativeaot/Bootstrap/common.h b/src/coreclr/src/nativeaot/Bootstrap/common.h
new file mode 100644
index 0000000000000..697bfb544d404
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Bootstrap/common.h
@@ -0,0 +1,106 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// common.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#ifndef __COMMON_H
+#define __COMMON_H
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <math.h>
+
+#include <new>
+
+#ifndef _WIN32
+#include <pthread.h>
+#endif
+
+using namespace std;
+
+class MethodTable;
+class Object;
+
+#ifdef _MSC_VER
+#define __NORETURN __declspec(noreturn)
+#else
+#define __NORETURN __attribute((noreturn))
+#endif
+
+int __initialize_runtime();
+void __shutdown_runtime();
+
+extern "C" Object * __allocate_object(MethodTable * pMT);
+extern "C" Object * __allocate_array(size_t elements, MethodTable * pMT);
+extern "C" Object * __castclass(MethodTable * pMT, void * obj);
+extern "C" Object * __isinst(MethodTable * pMT, void * obj);
+extern "C" __NORETURN void __throw_exception(void * pEx);
+extern "C" void __debug_break();
+
+Object * __load_string_literal(const char * string);
+
+extern "C" void __range_check_fail();
+
+inline void __range_check(void * a, size_t elem)
+{
+    if (elem >= *((size_t*)a + 1))
+        __range_check_fail();
+}
+
+Object * __get_commandline_args(int argc, char * argv[]);
+
+// POD version of EEType to use for static initialization
+struct RawEEType
+{
+    uint16_t    m_componentSize;
+    uint16_t    m_flags;
+    uint32_t    m_baseSize;
+    MethodTable * m_pBaseType;
+    uint16_t    m_usNumVtableSlots;
+    uint16_t    m_usNumInterfaces;
+    uint32_t    m_uHashCode;
+};
+
+struct ReversePInvokeFrame;
+
+void __reverse_pinvoke(ReversePInvokeFrame* pRevFrame);
+void __reverse_pinvoke_return(ReversePInvokeFrame* pRevFrame);
+
+struct PInvokeTransitionFrame;
+
+void __pinvoke(PInvokeTransitionFrame* pFrame);
+void __pinvoke_return(PInvokeTransitionFrame* pFrame);
+
+typedef size_t UIntNative;
+
+inline bool IS_ALIGNED(UIntNative val, UIntNative alignment)
+{
+    //ASSERT(0 == (alignment & (alignment - 1)));
+    return 0 == (val & (alignment - 1));
+}
+
+template <typename T>
+inline bool IS_ALIGNED(T* val, UIntNative alignment)
+{
+    //ASSERT(0 == (alignment & (alignment - 1)));
+    return IS_ALIGNED(reinterpret_cast<UIntNative>(val), alignment);
+}
+
+#define RAW_MIN_OBJECT_SIZE (3*sizeof(void*))
+
+#define AlignBaseSize(s) ((s < RAW_MIN_OBJECT_SIZE) ? RAW_MIN_OBJECT_SIZE : ((s + (sizeof(void*)-1) & ~(sizeof(void*)-1))))
+
+#define ARRAY_BASE (2*sizeof(void*))
+
+#endif // __COMMON_H
diff --git a/src/coreclr/src/nativeaot/Bootstrap/main.cpp b/src/coreclr/src/nativeaot/Bootstrap/main.cpp
new file mode 100644
index 0000000000000..00dac83b5775a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Bootstrap/main.cpp
@@ -0,0 +1,464 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "common.h"
+
+#include "sal.h"
+#include "gcenv.structs.h"
+#include "gcenv.base.h"
+
+#include <stdlib.h>
+
+#ifndef CPPCODEGEN
+
+//
+// This is the mechanism whereby multiple linked modules contribute their global data for initialization at
+// startup of the application.
+//
+// ILC creates sections in the output obj file to mark the beginning and end of merged global data.
+// It defines sentinel symbols that are used to get the addresses of the start and end of global data 
+// at runtime. The section names are platform-specific to match platform-specific linker conventions.
+//
+#if defined(_MSC_VER)
+
+#pragma section(".modules$A", read)
+#pragma section(".modules$Z", read)
+extern "C" __declspec(allocate(".modules$A")) void * __modules_a[];
+extern "C" __declspec(allocate(".modules$Z")) void * __modules_z[];
+
+__declspec(allocate(".modules$A")) void * __modules_a[] = { nullptr };
+__declspec(allocate(".modules$Z")) void * __modules_z[] = { nullptr };
+
+//
+// Each obj file compiled from managed code has a .modules$I section containing a pointer to its ReadyToRun
+// data (which points at eager class constructors, frozen strings, etc).
+//
+// The #pragma ... /merge directive folds the book-end sections and all .modules$I sections from all input
+// obj files into .rdata in alphabetical order.
+//
+#pragma comment(linker, "/merge:.modules=.rdata")
+
+// 
+// Unboxing stubs need to be merged, folded and sorted. They are delimited by two special sections (.unbox$A
+// and .unbox$Z). All unboxing stubs are in .unbox$M sections.
+//
+#pragma comment(linker, "/merge:.unbox=.text")
+
+char _bookend_a;
+char _bookend_z;
+
+//
+// Generate bookends for the managed code section.
+// We give them unique bodies to prevent folding.
+//
+
+#pragma code_seg(".managedcode$A")
+void* __managedcode_a() { return &_bookend_a; }
+#pragma code_seg(".managedcode$Z")
+void* __managedcode_z() { return &_bookend_z; }
+#pragma code_seg()
+
+//
+// Generate bookends for the unboxing stub section.
+// We give them unique bodies to prevent folding.
+//
+
+#pragma code_seg(".unbox$A")
+void* __unbox_a() { return &_bookend_a; }
+#pragma code_seg(".unbox$Z")
+void* __unbox_z() { return &_bookend_z; }
+#pragma code_seg()
+
+#else // _MSC_VER
+
+#if defined(__APPLE__)
+
+extern void * __modules_a[] __asm("section$start$__DATA$__modules");
+extern void * __modules_z[] __asm("section$end$__DATA$__modules");
+extern char __managedcode_a __asm("section$start$__TEXT$__managedcode");
+extern char __managedcode_z __asm("section$end$__TEXT$__managedcode");
+extern char __unbox_a __asm("section$start$__TEXT$__unbox");
+extern char __unbox_z __asm("section$end$__TEXT$__unbox");
+
+#else // __APPLE__
+
+extern "C" void * __start___modules[];
+extern "C" void * __stop___modules[];
+static void * (&__modules_a)[] = __start___modules;
+static void * (&__modules_z)[] = __stop___modules;
+
+extern "C" char __start___managedcode;
+extern "C" char __stop___managedcode;
+static char& __managedcode_a = __start___managedcode;
+static char& __managedcode_z = __stop___managedcode;
+
+extern "C" char __start___unbox;
+extern "C" char __stop___unbox;
+static char& __unbox_a = __start___unbox;
+static char& __unbox_z = __stop___unbox;
+
+#endif // __APPLE__
+
+#endif // _MSC_VER
+
+#endif // !CPPCODEGEN
+
+// Do not warn that extern C methods throw exceptions. This is temporary
+// as long as we have unimplemented/throwing APIs in this file.
+#pragma warning(disable:4297)
+
+#ifdef CPPCODEGEN
+
+extern "C" Object * RhNewObject(MethodTable * pMT);
+extern "C" Object * RhNewArray(MethodTable * pMT, int32_t elements);
+extern "C" void * RhTypeCast_IsInstanceOf(MethodTable * pMT, void* pObject);
+extern "C" void * RhTypeCast_CheckCast(MethodTable * pMT, void* pObject);
+extern "C" void RhpStelemRef(void * pArray, int index, void * pObj);
+extern "C" void * RhpLdelemaRef(void * pArray, int index, MethodTable * pMT);
+extern "C" __NORETURN void RhpThrowEx(void * pEx);
+extern "C" void RhDebugBreak();
+
+extern "C" Object * __allocate_object(MethodTable * pMT)
+{
+    return RhNewObject(pMT);
+}
+
+extern "C" Object * __allocate_array(size_t elements, MethodTable * pMT)
+{
+    return RhNewArray(pMT, (int32_t)elements); // TODO: type mismatch
+}
+
+extern "C" Object * __castclass(MethodTable * pTargetMT, void* obj)
+{
+    return (Object *)RhTypeCast_CheckCast(pTargetMT, obj);
+}
+
+extern "C" Object * __isinst(MethodTable * pTargetMT, void* obj)
+{
+    return (Object *)RhTypeCast_IsInstanceOf(pTargetMT, obj);
+}
+
+extern "C" void __stelem_ref(void * pArray, unsigned idx, void * obj)
+{
+    RhpStelemRef(pArray, idx, obj);
+}
+
+extern "C" void* __ldelema_ref(void * pArray, unsigned idx, MethodTable * type)
+{
+    return RhpLdelemaRef(pArray, idx, type);
+}
+
+extern "C" void __throw_exception(void * pEx)
+{
+    RhpThrowEx(pEx);
+}
+
+extern "C" void __debug_break()
+{
+    RhDebugBreak();
+}
+
+void __range_check_fail()
+{
+    throw "ThrowRangeOverflowException";
+}
+
+extern "C" void RhpReversePInvoke2(ReversePInvokeFrame* pRevFrame);
+extern "C" void RhpReversePInvokeReturn2(ReversePInvokeFrame* pRevFrame);
+
+void __reverse_pinvoke(ReversePInvokeFrame* pRevFrame)
+{
+    RhpReversePInvoke2(pRevFrame);
+}
+
+void __reverse_pinvoke_return(ReversePInvokeFrame* pRevFrame)
+{
+    RhpReversePInvokeReturn2(pRevFrame);
+}
+
+extern "C" void RhpPInvoke2(PInvokeTransitionFrame* pFrame);
+extern "C" void RhpPInvokeReturn2(PInvokeTransitionFrame* pFrame);
+
+void __pinvoke(PInvokeTransitionFrame* pFrame)
+{
+    RhpPInvoke2(pFrame);
+}
+
+void __pinvoke_return(PInvokeTransitionFrame* pFrame)
+{
+    RhpPInvokeReturn2(pFrame);
+}
+
+namespace System_Private_CoreLib { namespace System { 
+
+    class Object {
+    public:
+        MethodTable * get_EEType() { return *(MethodTable **)this; }
+    };
+
+    class Array : public Object {
+    public:
+        int32_t GetArrayLength() {
+            return *(int32_t *)((void **)this + 1);
+        }
+        void * GetArrayData() {
+            return (void **)this + 2;
+        }
+    };
+
+    class String : public Object { public:
+        static MethodTable * __getMethodTable();
+    };
+
+    class String__Array : public Object { public:
+        static MethodTable * __getMethodTable();
+    };
+
+    class EETypePtr { public:
+        intptr_t m_value;
+    };
+
+}; };
+
+Object * __load_string_literal(const char * string)
+{
+    // TODO: Cache/intern string literals
+    // TODO: Unicode string literals
+
+    size_t len = strlen(string);
+
+    Object * pString = RhNewArray(System_Private_CoreLib::System::String::__getMethodTable(), (int32_t)len);
+
+    uint16_t * p = (uint16_t *)((char*)pString + sizeof(intptr_t) + sizeof(int32_t));
+    for (size_t i = 0; i < len; i++)
+        p[i] = string[i];
+    return pString;
+}
+
+#if defined(HOST_WASM)
+// Exception wrapper type that allows us to differentiate managed and native exceptions
+class ManagedExceptionWrapper : exception
+{
+public:
+    ManagedExceptionWrapper(void* pManagedException)
+    {
+        m_pManagedException = pManagedException;
+    }
+
+public:
+    void* m_pManagedException;
+};
+#endif
+
+extern "C" void RhpThrowEx(void * pEx)
+{
+#if defined(HOST_WASM)
+    throw ManagedExceptionWrapper(pEx);
+#else 
+    throw "RhpThrowEx";
+#endif
+}
+
+extern "C" void RhpThrowHwEx()
+{
+    throw "RhpThrowHwEx";
+}
+
+#if defined(HOST_WASM)
+// returns the Leave target
+extern "C" uint32_t LlvmCatchFunclet(void* pHandlerIP, void* pvRegDisplay); 
+extern "C" uint32_t RhpCallCatchFunclet(void * exceptionObj, void* pHandlerIP, void* pvRegDisplay, void *exInfo)
+{
+    return LlvmCatchFunclet(pHandlerIP, pvRegDisplay);
+}
+
+extern "C" uint32_t LlvmFilterFunclet(void* pHandlerIP, void* pvRegDisplay);
+extern "C" uint32_t RhpCallFilterFunclet(void* exceptionObj, void * pHandlerIP, void* shadowStack)
+{
+    return LlvmFilterFunclet(pHandlerIP, shadowStack);
+}
+#else 
+extern "C" uint32_t RhpCallCatchFunclet(void *, void*, void*, void*)
+{
+    throw "RhpCallCatchFunclet";
+}
+extern "C" void* RhpCallFilterFunclet(void*, void*, void*)
+{
+    throw "RhpCallFilterFunclet";
+}
+#endif
+
+#if defined(HOST_WASM)
+extern "C" void LlvmFinallyFunclet(void *finallyHandler, void *shadowStack);
+extern "C" void RhpCallFinallyFunclet(void *finallyHandler, void *shadowStack)
+{
+    LlvmFinallyFunclet(finallyHandler, shadowStack);
+}
+#else 
+extern "C" void RhpCallFinallyFunclet(void *, void*)
+{
+    throw "RhpCallFinallyFunclet";
+}
+#endif
+
+extern "C" void RhpUniversalTransition()
+{
+    throw "RhpUniversalTransition";
+}
+extern "C" void RhpUniversalTransition_DebugStepTailCall()
+{
+    throw "RhpUniversalTransition_DebugStepTailCall";
+}
+extern "C" void ConstrainedCallSupport_GetStubs(void*, void*)
+{
+    throw "ConstrainedCallSupport_GetStubs";
+}
+
+extern "C" void* RtRHeaderWrapper();
+#endif // CPPCODEGEN
+
+// This works around System.Private.Interop's references to Interop.Native.
+// This won't be needed once we stop dragging in S.P.Interop for basic p/invoke support.
+extern "C" void CCWAddRef()
+{
+    throw "CCWAddRef";
+}
+
+extern "C" void __fail_fast()
+{
+    // TODO: FailFast
+    printf("Call to an unimplemented runtime method; execution cannot continue.\n");
+    printf("Method: __fail_fast\n");
+    exit(-1);
+}
+
+extern "C" bool RhInitialize();
+extern "C" void RhpEnableConservativeStackReporting();
+extern "C" void RhpShutdown();
+extern "C" void RhSetRuntimeInitializationCallback(int (*fPtr)());
+
+#ifndef CPPCODEGEN
+
+extern "C" bool RhRegisterOSModule(void * pModule,
+    void * pvManagedCodeStartRange, uint32_t cbManagedCodeRange,
+    void * pvUnboxingStubsStartRange, uint32_t cbUnboxingStubsRange,
+    void ** pClasslibFunctions, uint32_t nClasslibFunctions);
+
+extern "C" void* PalGetModuleHandleFromPointer(void* pointer);
+
+#endif // !CPPCODEGEN
+
+extern "C" void GetRuntimeException();
+extern "C" void FailFast();
+extern "C" void AppendExceptionStackFrame();
+extern "C" void GetSystemArrayEEType();
+extern "C" void OnFirstChanceException();
+
+typedef void(*pfn)();
+
+static const pfn c_classlibFunctions[] = {
+    &GetRuntimeException,
+    &FailFast,
+    nullptr, // &UnhandledExceptionHandler,
+    &AppendExceptionStackFrame,
+    nullptr, // &CheckStaticClassConstruction,
+    &GetSystemArrayEEType,
+    &OnFirstChanceException,
+    nullptr, // &DebugFuncEvalHelper,
+    nullptr, // &DebugFuncEvalAbortHelper,
+};
+
+extern "C" void InitializeModules(void* osModule, void ** modules, int count, void ** pClasslibFunctions, int nClasslibFunctions);
+
+#ifndef CORERT_DLL
+#define CORERT_ENTRYPOINT __managed__Main
+#if defined(_WIN32)
+extern "C" int __managed__Main(int argc, wchar_t* argv[]);
+#else
+extern "C" int __managed__Main(int argc, char* argv[]);
+#endif
+#else
+#define CORERT_ENTRYPOINT __managed__Startup
+extern "C" void __managed__Startup();
+#endif // !CORERT_DLL
+
+static int InitializeRuntime()
+{
+    if (!RhInitialize())
+        return -1;
+
+#if defined(CPPCODEGEN) || defined(HOST_WASM)
+    RhpEnableConservativeStackReporting();
+#endif // CPPCODEGEN
+
+#ifndef CPPCODEGEN
+    void * osModule = PalGetModuleHandleFromPointer((void*)&CORERT_ENTRYPOINT);
+
+    // TODO: pass struct with parameters instead of the large signature of RhRegisterOSModule
+    if (!RhRegisterOSModule(
+        osModule,
+        (void*)&__managedcode_a, (uint32_t)((char *)&__managedcode_z - (char*)&__managedcode_a),
+        (void*)&__unbox_a, (uint32_t)((char *)&__unbox_z - (char*)&__unbox_a),
+        (void **)&c_classlibFunctions, _countof(c_classlibFunctions)))
+    {
+        return -1;
+    }
+#endif // !CPPCODEGEN
+
+#ifndef CPPCODEGEN
+    InitializeModules(osModule, __modules_a, (int)((__modules_z - __modules_a)), (void **)&c_classlibFunctions, _countof(c_classlibFunctions));
+#elif defined HOST_WASM
+    InitializeModules(nullptr, (void**)RtRHeaderWrapper(), 1, (void **)&c_classlibFunctions, _countof(c_classlibFunctions));
+#else // !CPPCODEGEN
+    InitializeModules(nullptr, (void**)RtRHeaderWrapper(), 2, (void **)&c_classlibFunctions, _countof(c_classlibFunctions));
+#endif // !CPPCODEGEN
+
+#ifdef CORERT_DLL
+    // Run startup method immediately for a native library
+    __managed__Startup();
+#endif // CORERT_DLL
+
+    return 0;
+}
+
+#ifndef CORERT_DLL
+#if defined(_WIN32)
+int __cdecl wmain(int argc, wchar_t* argv[])
+#else
+int main(int argc, char* argv[])
+#endif
+{
+    int initval = InitializeRuntime();
+    if (initval != 0)
+        return initval;
+
+    int retval;
+#ifdef CPPCODEGEN
+    try
+#endif
+    {
+        retval = __managed__Main(argc, argv);
+    }
+#ifdef CPPCODEGEN
+    catch (const char* &e)
+    {
+        printf("Call to an unimplemented runtime method; execution cannot continue.\n");
+        printf("Method: %s\n", e);
+        retval = -1;
+    }
+#endif
+    RhpShutdown();
+
+    return retval;
+}
+#endif // !CORERT_DLL
+
+#ifdef CORERT_DLL
+static struct InitializeRuntimePointerHelper
+{
+    InitializeRuntimePointerHelper()
+    {
+        RhSetRuntimeInitializationCallback(&InitializeRuntime);
+    }
+} initializeRuntimePointerHelper;
+#endif // CORERT_DLL
diff --git a/src/coreclr/src/nativeaot/Runtime/AsmOffsets.h b/src/coreclr/src/nativeaot/Runtime/AsmOffsets.h
new file mode 100644
index 0000000000000..0860aad4c8c2e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/AsmOffsets.h
@@ -0,0 +1,121 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This file is used by AsmOffsets.cpp to validate that our
+// assembly-code offsets always match their C++ counterparts.
+
+// You must #define PLAT_ASM_OFFSET and PLAT_ASM_SIZEOF before you #include this file
+
+#ifdef HOST_64BIT
+#define ASM_OFFSET(offset32, offset64, cls, member) PLAT_ASM_OFFSET(offset64, cls, member)
+#define ASM_SIZEOF(sizeof32, sizeof64, cls        ) PLAT_ASM_SIZEOF(sizeof64, cls)
+#define ASM_CONST(const32, const64, expr)           PLAT_ASM_CONST(const64, expr)
+#else
+#define ASM_OFFSET(offset32, offset64, cls, member) PLAT_ASM_OFFSET(offset32, cls, member)
+#define ASM_SIZEOF(sizeof32, sizeof64, cls        ) PLAT_ASM_SIZEOF(sizeof32, cls)
+#define ASM_CONST(const32, const64, expr)           PLAT_ASM_CONST(const32, expr)
+#endif
+
+// NOTE: the values MUST be in hex notation WITHOUT the 0x prefix
+
+//        32-bit,64-bit, constant symbol
+ASM_CONST( 14c08, 14c08, RH_LARGE_OBJECT_SIZE)
+ASM_CONST(   400,   800, CLUMP_SIZE)
+ASM_CONST(     a,     b, LOG2_CLUMP_SIZE)
+
+//        32-bit,64-bit, class, member
+ASM_OFFSET(    0,     0, Object, m_pEEType)
+
+ASM_OFFSET(    4,     8, Array, m_Length)
+
+ASM_OFFSET(    4,     8, String, m_Length)
+ASM_OFFSET(    8,     C, String, m_FirstChar)
+ASM_CONST(     2,     2, STRING_COMPONENT_SIZE)
+ASM_CONST(     E,    16, STRING_BASE_SIZE)
+ASM_CONST(3FFFFFDF,3FFFFFDF,MAX_STRING_LENGTH)
+
+ASM_OFFSET(    0,     0, EEType, m_usComponentSize)
+ASM_OFFSET(    2,     2, EEType, m_usFlags)
+ASM_OFFSET(    4,     4, EEType, m_uBaseSize)
+ASM_OFFSET(   14,    18, EEType, m_VTable)
+
+ASM_OFFSET(    0,     0, Thread, m_rgbAllocContextBuffer)
+ASM_OFFSET(   28,    38, Thread, m_ThreadStateFlags)
+ASM_OFFSET(   2c,    40, Thread, m_pTransitionFrame)
+ASM_OFFSET(   30,    48, Thread, m_pHackPInvokeTunnel)
+ASM_OFFSET(   40,    68, Thread, m_ppvHijackedReturnAddressLocation)
+ASM_OFFSET(   44,    70, Thread, m_pvHijackedReturnAddress)
+#ifdef HOST_64BIT
+ASM_OFFSET(    0,    78, Thread, m_uHijackedReturnValueFlags)
+#endif
+ASM_OFFSET(   48,    80, Thread, m_pExInfoStackHead)
+ASM_OFFSET(   4c,    88, Thread, m_threadAbortException)
+
+ASM_SIZEOF(   14,    20, EHEnum)
+
+ASM_OFFSET(    0,     0, gc_alloc_context, alloc_ptr)
+ASM_OFFSET(    4,     8, gc_alloc_context, alloc_limit)
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+ASM_OFFSET(    4,     8, InterfaceDispatchCell, m_pCache)
+#ifndef HOST_64BIT
+ASM_OFFSET(    8,     0, InterfaceDispatchCache, m_pCell)
+#endif
+ASM_OFFSET(   10,    20, InterfaceDispatchCache, m_rgEntries)
+ASM_SIZEOF(    8,    10, InterfaceDispatchCacheEntry)
+#endif
+
+ASM_OFFSET(    4,     8, StaticClassConstructionContext, m_initialized)
+
+#ifdef FEATURE_DYNAMIC_CODE
+ASM_OFFSET(    0,     0, CallDescrData, pSrc)
+ASM_OFFSET(    4,     8, CallDescrData, numStackSlots)
+ASM_OFFSET(    8,     C, CallDescrData, fpReturnSize)
+ASM_OFFSET(    C,    10, CallDescrData, pArgumentRegisters)
+ASM_OFFSET(   10,    18, CallDescrData, pFloatArgumentRegisters)
+ASM_OFFSET(   14,    20, CallDescrData, pTarget)
+ASM_OFFSET(   18,    28, CallDescrData, pReturnBuffer)
+#endif
+
+// Undefine macros that are only used in this header for convenience.
+#undef ASM_OFFSET
+#undef ASM_SIZEOF
+#undef ASM_CONST
+
+// Define platform specific offsets
+#include "AsmOffsetsCpu.h"
+
+//#define USE_COMPILE_TIME_CONSTANT_FINDER // Uncomment this line to use the constant finder
+#if defined(__cplusplus) && defined(USE_COMPILE_TIME_CONSTANT_FINDER)
+// This class causes the compiler to emit an error with the constant we're interested in
+// in the error message. This is useful if a size or offset changes. To use, comment out
+// the compile-time assert that is firing, enable the constant finder, add the appropriate
+// constant to find to BogusFunction(), and build.
+//
+// Here's a sample compiler error:
+// In file included from corert/src/Native/Runtime/AsmOffsetsVerify.cpp:38:
+// corert/src/Native/Runtime/Full/../AsmOffsets.h:117:61: error: calling a private constructor of class
+//      'AsmOffsets::FindCompileTimeConstant<25>'
+//    FindCompileTimeConstant<offsetof(ExInfo, m_passNumber)> bogus_variable;
+//                                                            ^
+// corert/src/Native/Runtime/Full/../AsmOffsets.h:111:5: note: declared private here
+//    FindCompileTimeConstant();
+//    ^
+template<size_t N>
+class FindCompileTimeConstant
+{
+private:
+    FindCompileTimeConstant();
+};
+
+void BogusFunction()
+{
+    // Sample usage to generate the error
+    FindCompileTimeConstant<sizeof(ExInfo)> bogus_variable;
+    FindCompileTimeConstant<offsetof(ExInfo, m_notifyDebuggerSP)> bogus_variable2;
+    FindCompileTimeConstant<sizeof(StackFrameIterator)> bogus_variable3;
+    FindCompileTimeConstant<sizeof(PAL_LIMITED_CONTEXT)> bogus_variable4;
+    FindCompileTimeConstant<offsetof(PAL_LIMITED_CONTEXT, IP)> bogus_variable5;
+}
+#endif // defined(__cplusplus) && defined(USE_COMPILE_TIME_CONSTANT_FINDER)
diff --git a/src/coreclr/src/nativeaot/Runtime/AsmOffsetsVerify.cpp b/src/coreclr/src/nativeaot/Runtime/AsmOffsetsVerify.cpp
new file mode 100644
index 0000000000000..6f4b87104e8d2
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/AsmOffsetsVerify.cpp
@@ -0,0 +1,48 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "gcenv.h"
+#include "gcheaputilities.h"
+#include "rhassert.h"
+#include "RedhawkWarnings.h"
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "TargetPtrs.h"
+#include "rhbinder.h"
+#include "RWLock.h"
+#include "RuntimeInstance.h"
+#include "CachedInterfaceDispatch.h"
+#include "shash.h"
+#include "CallDescr.h"
+
+class AsmOffsets
+{
+    static_assert(sizeof(Thread::m_rgbAllocContextBuffer) >= sizeof(gc_alloc_context), "Thread::m_rgbAllocContextBuffer is not big enough to hold a gc_alloc_context");
+
+    // Some assembly helpers for arrays and strings are shared and use the fact that arrays and strings have similar layouts)
+    static_assert(offsetof(Array, m_Length) == offsetof(String, m_Length), "The length field of String and Array have different offsets");
+    static_assert(sizeof(((Array*)0)->m_Length) == sizeof(((String*)0)->m_Length), "The length field of String and Array have different sizes");
+
+#define PLAT_ASM_OFFSET(offset, cls, member) \
+    static_assert((offsetof(cls, member) == 0x##offset) || (offsetof(cls, member) > 0x##offset), "Bad asm offset for '" #cls "." #member "', the actual offset is smaller than 0x" #offset "."); \
+    static_assert((offsetof(cls, member) == 0x##offset) || (offsetof(cls, member) < 0x##offset), "Bad asm offset for '" #cls "." #member "', the actual offset is larger than 0x" #offset ".");
+
+#define PLAT_ASM_SIZEOF(size,   cls        ) \
+    static_assert((sizeof(cls) == 0x##size) || (sizeof(cls) > 0x##size), "Bad asm size for '" #cls "', the actual size is smaller than 0x" #size "."); \
+    static_assert((sizeof(cls) == 0x##size) || (sizeof(cls) < 0x##size), "Bad asm size for '" #cls "', the actual size is larger than 0x" #size ".");
+
+#define PLAT_ASM_CONST(constant, expr) \
+    static_assert(((expr) == 0x##constant) || ((expr) > 0x##constant), "Bad asm constant for '" #expr "', the actual value is smaller than 0x" #constant "."); \
+    static_assert(((expr) == 0x##constant) || ((expr) < 0x##constant), "Bad asm constant for '" #expr "', the actual value is larger than 0x" #constant ".");
+
+#include "AsmOffsets.h"
+
+};
+
+#ifdef _MSC_VER
+namespace { char WorkaroundLNK4221Warning; };
+#endif
diff --git a/src/coreclr/src/nativeaot/Runtime/CachedInterfaceDispatch.cpp b/src/coreclr/src/nativeaot/Runtime/CachedInterfaceDispatch.cpp
new file mode 100644
index 0000000000000..1a9a15aafda89
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/CachedInterfaceDispatch.cpp
@@ -0,0 +1,543 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// ==--==
+//
+// Shared (non-architecture specific) portions of a mechanism to perform interface dispatch using an alternate
+// mechanism to VSD that does not require runtime generation of code.
+//
+// ============================================================================
+#include "common.h"
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "DebugMacrosExt.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "holder.h"
+#include "Crst.h"
+#include "RedhawkWarnings.h"
+#include "TargetPtrs.h"
+#include "eetype.h"
+#include "Range.h"
+#include "allocheap.h"
+#include "rhbinder.h"
+#include "ObjectLayout.h"
+#include "gcrhinterface.h"
+#include "shash.h"
+#include "RWLock.h"
+#include "TypeManager.h"
+#include "RuntimeInstance.h"
+#include "eetype.inl"
+
+#include "CachedInterfaceDispatch.h"
+
+// We always allocate cache sizes with a power of 2 number of entries. We have a maximum size we support,
+// defined below.
+#define CID_MAX_CACHE_SIZE_LOG2 6
+#define CID_MAX_CACHE_SIZE      (1 << CID_MAX_CACHE_SIZE_LOG2)
+
+//#define FEATURE_CID_STATS 1
+
+#ifdef FEATURE_CID_STATS
+
+// Some counters used for debugging and profiling the algorithms.
+extern "C"
+{
+    UInt32 CID_g_cLoadVirtFunc = 0;
+    UInt32 CID_g_cCacheMisses = 0;
+    UInt32 CID_g_cCacheSizeOverflows = 0;
+    UInt32 CID_g_cCacheOutOfMemory = 0;
+    UInt32 CID_g_cCacheReallocates = 0;
+    UInt32 CID_g_cCacheAllocates = 0;
+    UInt32 CID_g_cCacheDiscards = 0;
+    UInt32 CID_g_cInterfaceDispatches = 0;
+    UInt32 CID_g_cbMemoryAllocated = 0;
+    UInt32 CID_g_rgAllocatesBySize[CID_MAX_CACHE_SIZE_LOG2 + 1] = { 0 };
+};
+
+#define CID_COUNTER_INC(_counter_name) CID_g_c##_counter_name++
+
+#else
+
+#define CID_COUNTER_INC(_counter_name)
+
+#endif // FEATURE_CID_STATS
+
+// Helper function for updating two adjacent pointers (which are aligned on a double pointer-sized boundary)
+// atomically.
+//
+// This is used to update interface dispatch cache entries and also the stub/cache pair in
+// interface dispatch indirection cells. The cases have slightly different semantics: cache entry updates
+// (fFailOnNonNull == true) require that the existing values in the location are both NULL whereas indirection
+// cell updates have no such restriction. In both cases we'll try the update once; on failure we'll return the
+// new value of the second pointer and on success we'll the old value of the second pointer.
+//
+// This suits the semantics of both callers. For indirection cell updates the caller needs to know the address
+// of the cache that can now be scheduled for release and the cache pointer is the second one in the pair. For
+// cache entry updates the caller only needs a success/failure indication: on success the return value will be
+// NULL and on failure non-NULL.
+static void * UpdatePointerPairAtomically(void * pPairLocation,
+                                          void * pFirstPointer,
+                                          void * pSecondPointer,
+                                          bool fFailOnNonNull)
+{
+#if defined(HOST_64BIT)
+    // The same comments apply to the AMD64 version. The CompareExchange looks a little different since the
+    // API was refactored in terms of Int64 to avoid creating a 128-bit integer type.
+
+    Int64 rgComparand[2] = { 0 , 0 };
+    if (!fFailOnNonNull)
+    {
+        rgComparand[0] = *(Int64 volatile *)pPairLocation;
+        rgComparand[1] = *((Int64 volatile *)pPairLocation + 1);
+    }
+
+    UInt8 bResult = PalInterlockedCompareExchange128((Int64*)pPairLocation, (Int64)pSecondPointer, (Int64)pFirstPointer, rgComparand);
+    if (bResult == 1)
+    {
+        // Success, return old value of second pointer (rgComparand is updated by
+        // PalInterlockedCompareExchange128 with the old pointer values in this case).
+        return (void*)rgComparand[1];
+    }
+
+    // Failure, return the new second pointer value.
+    return pSecondPointer;
+#else
+    // Stuff the two pointers into a 64-bit value as the proposed new value for the CompareExchange64 below.
+    Int64 iNewValue = (Int64)((UInt64)(UIntNative)pFirstPointer | ((UInt64)(UIntNative)pSecondPointer << 32));
+
+    // Read the old value in the location. If fFailOnNonNull is set we just assume this was zero and we'll
+    // fail below if that's not the case.
+    Int64 iOldValue = fFailOnNonNull ? 0 : *(Int64 volatile *)pPairLocation;
+
+    Int64 iUpdatedOldValue = PalInterlockedCompareExchange64((Int64*)pPairLocation, iNewValue, iOldValue);
+    if (iUpdatedOldValue == iOldValue)
+    {
+        // Successful update. Return the previous value of the second pointer. For cache entry updates
+        // (fFailOnNonNull == true) this is guaranteed to be NULL in this case and the result being being
+        // NULL in the success case is all the caller cares about. For indirection cell updates the second
+        // pointer represents the old cache and the caller needs this data so they can schedule the cache
+        // for deletion once it becomes safe to do so.
+        return (void*)(UInt32)(iOldValue >> 32);
+    }
+
+    // The update failed due to a racing update to the same location. Return the new value of the second
+    // pointer (either a new cache that lost the race or a non-NULL pointer in the cache entry update case).
+    return pSecondPointer;
+#endif // HOST_64BIT
+}
+
+// Helper method for updating an interface dispatch cache entry atomically. See comments by the usage of
+// this method for the details of why we need this. If a racing update is detected false is returned and the
+// update abandoned. This is necessary since it's not safe to update a valid cache entry (one with a non-NULL
+// m_pInstanceType field) outside of a GC.
+static bool UpdateCacheEntryAtomically(InterfaceDispatchCacheEntry *pEntry,
+                                       EEType * pInstanceType,
+                                       void * pTargetCode)
+{
+    C_ASSERT(sizeof(InterfaceDispatchCacheEntry) == (sizeof(void*) * 2));
+    C_ASSERT(offsetof(InterfaceDispatchCacheEntry, m_pInstanceType) < offsetof(InterfaceDispatchCacheEntry, m_pTargetCode));
+
+    return UpdatePointerPairAtomically(pEntry, pInstanceType, pTargetCode, true) == NULL;
+}
+
+// Helper method for updating an interface dispatch indirection cell's stub and cache pointer atomically.
+// Returns the value of the cache pointer that is not referenced by the cell after this operation. This can be
+// NULL on the initial cell update, the value of the old cache pointer or the value of the new cache pointer
+// supplied (in the case where another thread raced with us for the update and won). In any case, if the
+// returned pointer is non-NULL it represents a cache that should be scheduled for release.
+static InterfaceDispatchCache * UpdateCellStubAndCache(InterfaceDispatchCell * pCell,
+                                                       void * pStub,
+                                                       UIntNative newCacheValue)
+{
+    C_ASSERT(offsetof(InterfaceDispatchCell, m_pStub) == 0);
+    C_ASSERT(offsetof(InterfaceDispatchCell, m_pCache) == sizeof(void*));
+
+    UIntNative oldCacheValue = (UIntNative)UpdatePointerPairAtomically(pCell, pStub, (void*)newCacheValue, false);
+
+    if (InterfaceDispatchCell::IsCache(oldCacheValue))
+    {
+        return (InterfaceDispatchCache *)oldCacheValue;
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+//
+// Cache allocation logic.
+//
+// We use the existing AllocHeap mechanism as our base allocator for cache blocks. This is because it can
+// provide the required 16-byte alignment with no padding or heap header costs. The downside is that there is
+// no deallocation support (which would be hard to implement without implementing a cache block compaction
+// scheme, which is certainly possible but not necessarily needed at this point).
+//
+// Instead, much like the original VSD algorithm, we keep discarded cache blocks and use them to satisfy new
+// allocation requests before falling back on AllocHeap.
+//
+// We can't re-use discarded cache blocks immediately since there may be code that is still using them.
+// Instead we link them into a global list and then at the next GC (when no code can hold a reference to these
+// any more) we can place them on one of several free lists based on their size.
+//
+
+#if defined(HOST_AMD64) || defined(HOST_ARM64)
+
+// Head of the list of discarded cache blocks that can't be re-used just yet.
+InterfaceDispatchCache * g_pDiscardedCacheList; // for AMD64 and ARM64, m_pCell is not used and we can link the discarded blocks themselves
+
+#else // defined(HOST_AMD64) || defined(HOST_ARM64)
+
+struct DiscardedCacheBlock
+{
+    DiscardedCacheBlock *       m_pNext;        // for x86 and ARM, we are short of registers, thus need the m_pCell back pointers
+    InterfaceDispatchCache *    m_pCache;       // and thus need this auxiliary list
+};
+
+// Head of the list of discarded cache blocks that can't be re-used just yet.
+static DiscardedCacheBlock * g_pDiscardedCacheList = NULL;
+
+// Free list of DiscardedCacheBlock items
+static DiscardedCacheBlock * g_pDiscardedCacheFree = NULL;
+
+#endif // defined(HOST_AMD64) || defined(HOST_ARM64)
+
+// Free lists for each cache size up to the maximum. We allocate from these in preference to new memory.
+static InterfaceDispatchCache * g_rgFreeLists[CID_MAX_CACHE_SIZE_LOG2 + 1];
+
+// Lock protecting both g_pDiscardedCacheList and g_rgFreeLists. We don't use the OS SLIST support here since
+// it imposes too much space overhead on list entries on 64-bit (each is actually 16 bytes).
+static CrstStatic g_sListLock;
+
+// The base memory allocator.
+static AllocHeap * g_pAllocHeap = NULL;
+
+// Each cache size has an associated stub used to perform lookup over that cache.
+extern "C" void RhpInterfaceDispatch1();
+extern "C" void RhpInterfaceDispatch2();
+extern "C" void RhpInterfaceDispatch4();
+extern "C" void RhpInterfaceDispatch8();
+extern "C" void RhpInterfaceDispatch16();
+extern "C" void RhpInterfaceDispatch32();
+extern "C" void RhpInterfaceDispatch64();
+
+extern "C" void RhpVTableOffsetDispatch();
+
+typedef void (*InterfaceDispatchStub)();
+
+static void * g_rgDispatchStubs[CID_MAX_CACHE_SIZE_LOG2 + 1] = {
+    (void *)&RhpInterfaceDispatch1,
+    (void *)&RhpInterfaceDispatch2,
+    (void *)&RhpInterfaceDispatch4,
+    (void *)&RhpInterfaceDispatch8,
+    (void *)&RhpInterfaceDispatch16,
+    (void *)&RhpInterfaceDispatch32,
+    (void *)&RhpInterfaceDispatch64,
+};
+
+// Map a cache size into a linear index.
+static UInt32 CacheSizeToIndex(UInt32 cCacheEntries)
+{
+    switch (cCacheEntries)
+    {
+    case 1:
+        return 0;
+    case 2:
+        return 1;
+    case 4:
+        return 2;
+    case 8:
+        return 3;
+    case 16:
+        return 4;
+    case 32:
+        return 5;
+    case 64:
+        return 6;
+    default:
+        UNREACHABLE();
+    }
+}
+
+// Allocates and initializes new cache of the given size. If given a previous version of the cache (guaranteed
+// to be smaller) it will also pre-populate the new cache with the contents of the old. Additionally the
+// address of the interface dispatch stub associated with this size of cache is returned.
+static UIntNative AllocateCache(UInt32 cCacheEntries, InterfaceDispatchCache * pExistingCache, const DispatchCellInfo *pNewCellInfo, void ** ppStub)
+{
+    if (pNewCellInfo->CellType == DispatchCellType::VTableOffset)
+    {
+        ASSERT(pNewCellInfo->VTableOffset < InterfaceDispatchCell::IDC_MaxVTableOffsetPlusOne);
+        *ppStub = (void *)&RhpVTableOffsetDispatch;
+        ASSERT(!InterfaceDispatchCell::IsCache(pNewCellInfo->VTableOffset));
+        return pNewCellInfo->VTableOffset;
+    }
+
+    ASSERT((cCacheEntries >= 1) && (cCacheEntries <= CID_MAX_CACHE_SIZE));
+    ASSERT((pExistingCache == NULL) || (pExistingCache->m_cEntries < cCacheEntries));
+
+    InterfaceDispatchCache * pCache = NULL;
+
+    // Transform cache size back into a linear index.
+    UInt32 idxCacheSize = CacheSizeToIndex(cCacheEntries);
+
+    // Attempt to allocate the head of the free list of the correct cache size.
+    if (g_rgFreeLists[idxCacheSize] != NULL)
+    {
+        CrstHolder lh(&g_sListLock);
+
+        pCache = g_rgFreeLists[idxCacheSize];
+        if (pCache != NULL)
+        {
+            g_rgFreeLists[idxCacheSize] = pCache->m_pNextFree;
+            CID_COUNTER_INC(CacheReallocates);
+        }
+    }
+
+    if (pCache == NULL)
+    {
+        // No luck with the free list, allocate the cache from via the AllocHeap.
+        pCache = (InterfaceDispatchCache*)g_pAllocHeap->AllocAligned(sizeof(InterfaceDispatchCache) +
+                                                                     (sizeof(InterfaceDispatchCacheEntry) * cCacheEntries),
+                                                                     sizeof(void*) * 2);
+        if (pCache == NULL)
+            return NULL;
+
+        CID_COUNTER_INC(CacheAllocates);
+#ifdef FEATURE_CID_STATS
+        CID_g_cbMemoryAllocated += sizeof(InterfaceDispatchCacheEntry) * cCacheEntries;
+        CID_g_rgAllocatesBySize[idxCacheSize]++;
+#endif
+    }
+
+    // We have a cache block, now initialize it.
+    pCache->m_pNextFree = NULL;
+    pCache->m_cEntries = cCacheEntries;
+    pCache->m_cacheHeader.Initialize(pNewCellInfo);
+
+    // Copy over entries from previous version of the cache (if any) and zero the rest.
+    if (pExistingCache)
+    {
+        memcpy(pCache->m_rgEntries,
+               pExistingCache->m_rgEntries,
+               sizeof(InterfaceDispatchCacheEntry) * pExistingCache->m_cEntries);
+        memset(&pCache->m_rgEntries[pExistingCache->m_cEntries],
+               0,
+               (cCacheEntries - pExistingCache->m_cEntries) * sizeof(InterfaceDispatchCacheEntry));
+    }
+    else
+    {
+        memset(pCache->m_rgEntries,
+               0,
+               cCacheEntries * sizeof(InterfaceDispatchCacheEntry));
+    }
+
+    // Pass back the stub the corresponds to this cache size.
+    *ppStub = g_rgDispatchStubs[idxCacheSize];
+
+    return (UIntNative)pCache;
+}
+
+// Discards a cache by adding it to a list of caches that may still be in use but will be made available for
+// re-allocation at the next GC.
+static void DiscardCache(InterfaceDispatchCache * pCache)
+{
+    CID_COUNTER_INC(CacheDiscards);
+
+    CrstHolder lh(&g_sListLock);
+
+#if defined(HOST_AMD64) || defined(HOST_ARM64)
+
+    // on AMD64 and ARM64, we can thread the list through the blocks directly
+    pCache->m_pNextFree = g_pDiscardedCacheList;
+    g_pDiscardedCacheList = pCache;
+
+#else // defined(HOST_AMD64) || defined(HOST_ARM64)
+
+    // on other architectures, we cannot overwrite pCache->m_pNextFree yet
+    // because it shares storage with m_pCell which may still be used as a back
+    // pointer to the dispatch cell.
+
+    // instead, allocate an auxiliary node (with its own auxiliary free list)
+    DiscardedCacheBlock * pDiscardedCacheBlock = g_pDiscardedCacheFree;
+    if (pDiscardedCacheBlock != NULL)
+        g_pDiscardedCacheFree = pDiscardedCacheBlock->m_pNext;
+    else
+        pDiscardedCacheBlock = (DiscardedCacheBlock *)g_pAllocHeap->Alloc(sizeof(DiscardedCacheBlock));
+
+    if (pDiscardedCacheBlock != NULL) // if we did NOT get the memory, we leak the discarded block
+    {
+        pDiscardedCacheBlock->m_pNext = g_pDiscardedCacheList;
+        pDiscardedCacheBlock->m_pCache = pCache;
+
+        g_pDiscardedCacheList = pDiscardedCacheBlock;
+    }
+#endif // defined(HOST_AMD64) || defined(HOST_ARM64)
+}
+
+// Called during a GC to empty the list of discarded caches (which we can now guarantee aren't being accessed)
+// and sort the results into the free lists we maintain for each cache size.
+void ReclaimUnusedInterfaceDispatchCaches()
+{
+    // No need for any locks, we're not racing with any other threads any more.
+
+    // Walk the list of discarded caches.
+#if defined(HOST_AMD64) || defined(HOST_ARM64)
+
+    // on AMD64, this is threaded directly through the cache blocks
+    InterfaceDispatchCache * pCache = g_pDiscardedCacheList;
+    while (pCache)
+    {
+        InterfaceDispatchCache * pNextCache = pCache->m_pNextFree;
+
+        // Transform cache size back into a linear index.
+        UInt32 idxCacheSize = CacheSizeToIndex(pCache->m_cEntries);
+
+        // Insert the cache onto the head of the correct free list.
+        pCache->m_pNextFree = g_rgFreeLists[idxCacheSize];
+        g_rgFreeLists[idxCacheSize] = pCache;
+
+        pCache = pNextCache;
+    }
+
+#else // defined(HOST_AMD64) || defined(HOST_ARM64)
+
+    // on other architectures, we use an auxiliary list instead
+    DiscardedCacheBlock * pDiscardedCacheBlock = g_pDiscardedCacheList;
+    while (pDiscardedCacheBlock)
+    {
+        InterfaceDispatchCache * pCache = pDiscardedCacheBlock->m_pCache;
+
+        // Transform cache size back into a linear index.
+        UInt32 idxCacheSize = CacheSizeToIndex(pCache->m_cEntries);
+
+        // Insert the cache onto the head of the correct free list.
+        pCache->m_pNextFree = g_rgFreeLists[idxCacheSize];
+        g_rgFreeLists[idxCacheSize] = pCache;
+
+        // Insert the container to its own free list
+        DiscardedCacheBlock * pNextDiscardedCacheBlock = pDiscardedCacheBlock->m_pNext;
+        pDiscardedCacheBlock->m_pNext = g_pDiscardedCacheFree;
+        g_pDiscardedCacheFree = pDiscardedCacheBlock;
+        pDiscardedCacheBlock = pNextDiscardedCacheBlock;
+    }
+
+#endif // defined(HOST_AMD64) || defined(HOST_ARM64)
+
+    // We processed all the discarded entries, so we can simply NULL the list head.
+    g_pDiscardedCacheList = NULL;
+}
+
+// One time initialization of interface dispatch.
+bool InitializeInterfaceDispatch()
+{
+    g_pAllocHeap = new AllocHeap();
+    if (g_pAllocHeap == NULL)
+        return false;
+
+    if (!g_pAllocHeap->Init())
+        return false;
+
+    g_sListLock.Init(CrstInterfaceDispatchGlobalLists, CRST_DEFAULT);
+
+    return true;
+}
+
+COOP_PINVOKE_HELPER(PTR_Code, RhpUpdateDispatchCellCache, (InterfaceDispatchCell * pCell, PTR_Code pTargetCode, EEType* pInstanceType, DispatchCellInfo *pNewCellInfo))
+{
+    // Attempt to update the cache with this new mapping (if we have any cache at all, the initial state
+    // is none).
+    InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache();
+    UInt32 cOldCacheEntries = 0;
+    if (pCache != NULL)
+    {
+        InterfaceDispatchCacheEntry * pCacheEntry = pCache->m_rgEntries;
+        for (UInt32 i = 0; i < pCache->m_cEntries; i++, pCacheEntry++)
+        {
+            if (pCacheEntry->m_pInstanceType == NULL)
+            {
+                if (UpdateCacheEntryAtomically(pCacheEntry, pInstanceType, pTargetCode))
+                    return (PTR_Code)pTargetCode;
+            }
+        }
+
+        cOldCacheEntries = pCache->m_cEntries;
+    }
+
+    // Failed to update an existing cache, we need to allocate a new cache. The old one, if any, might
+    // still be in use so we can't simply reclaim it. Instead we keep it around until the next GC at which
+    // point we know no code is holding a reference to it. Particular cache sizes are associated with a
+    // (globally shared) stub which implicitly knows the size of the cache.
+
+    if (cOldCacheEntries == CID_MAX_CACHE_SIZE)
+    {
+        // We already reached the maximum cache size we wish to allocate. For now don't attempt to cache
+        // the mapping we just did: there's no safe way to update the existing cache right now if it
+        // doesn't have an empty entries. There are schemes that would let us do this at the next GC point
+        // but it's not clear whether we should do this or re-tune the cache max size, we need to measure
+        // this.
+        CID_COUNTER_INC(CacheSizeOverflows);
+        return (PTR_Code)pTargetCode;
+    }
+
+    UInt32 cNewCacheEntries = cOldCacheEntries ? cOldCacheEntries * 2 : 1;
+    void *pStub;
+    UIntNative newCacheValue = AllocateCache(cNewCacheEntries, pCache, pNewCellInfo, &pStub);
+    if (newCacheValue == 0)
+    {
+        CID_COUNTER_INC(CacheOutOfMemory);
+        return (PTR_Code)pTargetCode;
+    }
+
+    if (InterfaceDispatchCell::IsCache(newCacheValue))
+    {
+        pCache = (InterfaceDispatchCache*)newCacheValue;
+#if !defined(HOST_AMD64) && !defined(HOST_ARM64)
+        // Set back pointer to interface dispatch cell for non-AMD64 and non-ARM64
+        // for AMD64 and ARM64, we have enough registers to make this trick unnecessary
+        pCache->m_pCell = pCell;
+#endif // !defined(HOST_AMD64) && !defined(HOST_ARM64)
+
+        // Add entry to the first unused slot.
+        InterfaceDispatchCacheEntry * pCacheEntry = &pCache->m_rgEntries[cOldCacheEntries];
+        pCacheEntry->m_pInstanceType = pInstanceType;
+        pCacheEntry->m_pTargetCode = pTargetCode;
+    }
+
+    // Publish the new cache by atomically updating both the cache and stub pointers in the indirection
+    // cell. This returns us a cache to discard which may be NULL (no previous cache), the previous cache
+    // value or the cache we just allocated (another thread performed an update first).
+    InterfaceDispatchCache * pDiscardedCache = UpdateCellStubAndCache(pCell, pStub, newCacheValue);
+    if (pDiscardedCache)
+        DiscardCache(pDiscardedCache);
+
+    return (PTR_Code)pTargetCode;
+}
+
+COOP_PINVOKE_HELPER(PTR_Code, RhpSearchDispatchCellCache, (InterfaceDispatchCell * pCell, EEType* pInstanceType))
+{
+    // This function must be implemented in native code so that we do not take a GC while walking the cache
+    InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache();
+    if (pCache != NULL)
+    {
+        InterfaceDispatchCacheEntry * pCacheEntry = pCache->m_rgEntries;
+        for (UInt32 i = 0; i < pCache->m_cEntries; i++, pCacheEntry++)
+            if (pCacheEntry->m_pInstanceType == pInstanceType)
+                return (PTR_Code)pCacheEntry->m_pTargetCode;
+    }
+
+    return nullptr;
+}
+
+// Given a dispatch cell, get the type and slot associated with it. This function MUST be implemented
+// in cooperative native code, as the m_pCache field on the cell is unsafe to access from managed
+// code due to its use of the GC state as a lock, and as lifetime control
+COOP_PINVOKE_HELPER(void, RhpGetDispatchCellInfo, (InterfaceDispatchCell * pCell, DispatchCellInfo* pDispatchCellInfo))
+{
+    *pDispatchCellInfo = pCell->GetDispatchCellInfo();
+}
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
diff --git a/src/coreclr/src/nativeaot/Runtime/CachedInterfaceDispatch.h b/src/coreclr/src/nativeaot/Runtime/CachedInterfaceDispatch.h
new file mode 100644
index 0000000000000..173dddff5513a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/CachedInterfaceDispatch.h
@@ -0,0 +1,46 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// ==--==
+//
+// Shared (non-architecture specific) portions of a mechanism to perform interface dispatch using an alternate
+// mechanism to VSD that does not require runtime generation of code.
+//
+// ============================================================================
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+bool InitializeInterfaceDispatch();
+void ReclaimUnusedInterfaceDispatchCaches();
+
+// Interface dispatch caches contain an array of these entries. An instance of a cache is paired with a stub
+// that implicitly knows how many entries are contained. These entries must be aligned to twice the alignment
+// of a pointer due to the synchonization mechanism used to update them at runtime.
+struct InterfaceDispatchCacheEntry
+{
+    EEType *    m_pInstanceType;    // Potential type of the object instance being dispatched on
+    void *      m_pTargetCode;      // Method to dispatch to if the actual instance type matches the above
+};
+
+// The interface dispatch cache itself. As well as the entries we include the cache size (since logic such as
+// cache miss processing needs to determine this value in a synchronized manner, so it can't be contained in
+// the owning interface dispatch indirection cell) and a list entry used to link the caches in one of a couple
+// of lists related to cache reclamation.
+#pragma warning(push)
+#pragma warning(disable:4200) // nonstandard extension used: zero-sized array in struct/union
+struct InterfaceDispatchCell;
+struct InterfaceDispatchCache
+{
+    InterfaceDispatchCacheHeader m_cacheHeader;
+    union
+    {
+        InterfaceDispatchCache *    m_pNextFree;    // next in free list
+#ifndef HOST_AMD64
+        InterfaceDispatchCell  *    m_pCell;        // pointer back to interface dispatch cell - not used for AMD64
+#endif
+    };
+    UInt32                      m_cEntries;
+    InterfaceDispatchCacheEntry m_rgEntries[];
+};
+#pragma warning(pop)
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
diff --git a/src/coreclr/src/nativeaot/Runtime/CallDescr.h b/src/coreclr/src/nativeaot/Runtime/CallDescr.h
new file mode 100644
index 0000000000000..946b96d2c8e7c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/CallDescr.h
@@ -0,0 +1,13 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+struct CallDescrData
+{
+    uint8_t* pSrc;
+    int numStackSlots;
+    int fpReturnSize;
+    uint8_t* pArgumentRegisters;
+    uint8_t* pFloatArgumentRegisters;
+    void* pTarget;
+    void* pReturnBuffer;
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/CommonMacros.h b/src/coreclr/src/nativeaot/Runtime/CommonMacros.h
new file mode 100644
index 0000000000000..54cb099bb2f8c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/CommonMacros.h
@@ -0,0 +1,228 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef __COMMONMACROS_H__
+#define __COMMONMACROS_H__
+
+#include "rhassert.h"
+
+#define EXTERN_C extern "C"
+#define FASTCALL __fastcall
+#define STDCALL __stdcall
+#define REDHAWK_API
+#define REDHAWK_CALLCONV __fastcall
+
+#ifdef _MSC_VER
+
+#define MSVC_SAVE_WARNING_STATE() __pragma(warning(push))
+#define MSVC_DISABLE_WARNING(warn_num) __pragma(warning(disable: warn_num))
+#define MSVC_RESTORE_WARNING_STATE() __pragma(warning(pop))
+
+#else
+
+#define MSVC_SAVE_WARNING_STATE()
+#define MSVC_DISABLE_WARNING(warn_num)
+#define MSVC_RESTORE_WARNING_STATE()
+
+#endif // _MSC_VER
+
+#ifndef COUNTOF
+template <typename _CountofType, size_t _SizeOfArray>
+char (*COUNTOF_helper(_CountofType (&_Array)[_SizeOfArray]))[_SizeOfArray];
+#define COUNTOF(_Array) sizeof(*COUNTOF_helper(_Array))
+#endif // COUNTOF
+
+#ifndef offsetof
+#define offsetof(s,m)   (UIntNative)( (IntNative)&reinterpret_cast<const volatile char&>((((s *)0)->m)) )
+#endif // offsetof
+
+#ifndef FORCEINLINE
+#define FORCEINLINE __forceinline
+#endif
+
+#ifndef NOINLINE
+#ifdef _MSC_VER
+#define NOINLINE __declspec(noinline)
+#else
+#define NOINLINE __attribute__((noinline))
+#endif
+#endif
+
+#ifndef __GCENV_BASE_INCLUDED__
+
+//
+// This macro returns val rounded up as necessary to be a multiple of alignment; alignment must be a power of 2
+//
+inline UIntNative ALIGN_UP(UIntNative val, UIntNative alignment);
+template <typename T>
+inline T* ALIGN_UP(T* val, UIntNative alignment);
+
+inline UIntNative ALIGN_DOWN(UIntNative val, UIntNative alignment);
+template <typename T>
+inline T* ALIGN_DOWN(T* val, UIntNative alignment);
+
+#endif // !__GCENV_BASE_INCLUDED__
+
+inline bool IS_ALIGNED(UIntNative val, UIntNative alignment);
+template <typename T>
+inline bool IS_ALIGNED(T* val, UIntNative alignment);
+
+#ifndef DACCESS_COMPILE
+
+#ifndef ZeroMemory
+#define ZeroMemory(_dst, _size) memset((_dst), 0, (_size))
+#endif
+
+//-------------------------------------------------------------------------------------------------
+// min/max
+
+#ifndef min
+#define min(_a, _b) ((_a) < (_b) ? (_a) : (_b))
+#endif
+#ifndef max
+#define max(_a, _b) ((_a) < (_b) ? (_b) : (_a))
+#endif
+
+#endif // !DACCESS_COMPILE
+
+//-------------------------------------------------------------------------------------------------
+// Platform-specific defines
+
+#if defined(HOST_AMD64)
+
+#define LOG2_PTRSIZE 3
+#define POINTER_SIZE 8
+
+#elif defined(HOST_X86)
+
+#define LOG2_PTRSIZE 2
+#define POINTER_SIZE 4
+
+#elif defined(HOST_ARM)
+
+#define LOG2_PTRSIZE 2
+#define POINTER_SIZE 4
+
+#elif defined(HOST_ARM64)
+
+#define LOG2_PTRSIZE 3
+#define POINTER_SIZE 8
+
+#elif defined (HOST_WASM)
+
+#define LOG2_PTRSIZE 2
+#define POINTER_SIZE 4
+
+#else
+#error Unsupported target architecture
+#endif
+
+#ifndef __GCENV_BASE_INCLUDED__
+#if defined(HOST_AMD64)
+
+#define DATA_ALIGNMENT  8
+#define OS_PAGE_SIZE    0x1000
+
+#elif defined(HOST_X86)
+
+#define DATA_ALIGNMENT  4
+#ifndef OS_PAGE_SIZE
+#define OS_PAGE_SIZE    0x1000
+#endif
+
+#elif defined(HOST_ARM)
+
+#define DATA_ALIGNMENT  4
+#ifndef OS_PAGE_SIZE
+#define OS_PAGE_SIZE    0x1000
+#endif
+
+#elif defined(HOST_ARM64)
+
+#define DATA_ALIGNMENT  8
+#ifndef OS_PAGE_SIZE
+#define OS_PAGE_SIZE    0x1000
+#endif
+
+#elif defined(HOST_WASM)
+
+#define DATA_ALIGNMENT  4
+#ifndef OS_PAGE_SIZE
+#define OS_PAGE_SIZE    0x4
+#endif
+
+#else
+#error Unsupported target architecture
+#endif
+#endif // __GCENV_BASE_INCLUDED__
+
+#if defined(TARGET_ARM)
+#define THUMB_CODE 1
+#endif
+
+//
+// Define an unmanaged function called from managed code that needs to execute in co-operative GC mode. (There
+// should be very few of these, most such functions will be simply p/invoked).
+//
+#define COOP_PINVOKE_HELPER(_rettype, _method, _args) EXTERN_C REDHAWK_API _rettype __fastcall _method _args
+#ifdef HOST_X86
+// We have helpers that act like memcpy and memset from the CRT, so they need to be __cdecl.
+#define COOP_PINVOKE_CDECL_HELPER(_rettype, _method, _args) EXTERN_C REDHAWK_API _rettype __cdecl _method _args
+#else
+#define COOP_PINVOKE_CDECL_HELPER COOP_PINVOKE_HELPER
+#endif
+
+#ifndef DACCESS_COMPILE
+#define IN_DAC(x)
+#define NOT_IN_DAC(x) x
+#else
+#define IN_DAC(x) x
+#define NOT_IN_DAC(x)
+#endif
+
+#define INLINE inline
+
+enum STARTUP_TIMELINE_EVENT_ID
+{
+    PROCESS_ATTACH_BEGIN = 0,
+    NONGC_INIT_COMPLETE,
+    GC_INIT_COMPLETE,
+    PROCESS_ATTACH_COMPLETE,
+
+    NUM_STARTUP_TIMELINE_EVENTS
+};
+
+#ifdef PROFILE_STARTUP
+extern unsigned __int64 g_startupTimelineEvents[NUM_STARTUP_TIMELINE_EVENTS];
+#define STARTUP_TIMELINE_EVENT(eventid) PalQueryPerformanceCounter((LARGE_INTEGER*)&g_startupTimelineEvents[eventid]);
+#else // PROFILE_STARTUP
+#define STARTUP_TIMELINE_EVENT(eventid)
+#endif // PROFILE_STARTUP
+
+#ifndef C_ASSERT
+#define C_ASSERT(e) static_assert(e, #e)
+#endif // C_ASSERT
+
+#ifdef __llvm__
+#define DECLSPEC_THREAD __thread
+#else // __llvm__
+#define DECLSPEC_THREAD __declspec(thread)
+#endif // !__llvm__
+
+#ifndef __GCENV_BASE_INCLUDED__
+#if !defined(_INC_WINDOWS)
+#ifdef _WIN32
+// this must exactly match the typedef used by windows.h
+typedef long HRESULT;
+#else
+typedef int32_t HRESULT;
+#endif
+
+#define S_OK  0x0
+#define E_FAIL 0x80004005
+
+#define UNREFERENCED_PARAMETER(P)          (void)(P)
+#endif // !defined(_INC_WINDOWS)
+#endif // __GCENV_BASE_INCLUDED__
+
+#endif // __COMMONMACROS_H__
diff --git a/src/coreclr/src/nativeaot/Runtime/CommonMacros.inl b/src/coreclr/src/nativeaot/Runtime/CommonMacros.inl
new file mode 100644
index 0000000000000..afc5032835ee1
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/CommonMacros.inl
@@ -0,0 +1,74 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef __GCENV_BASE_INCLUDED__
+
+//
+// This macro returns val rounded up as necessary to be a multiple of alignment; alignment must be a power of 2
+//
+inline UIntNative ALIGN_UP( UIntNative val, UIntNative alignment )
+{
+    // alignment must be a power of 2 for this implementation to work (need modulo otherwise)
+    ASSERT( 0 == (alignment & (alignment - 1)) ); 
+    UIntNative result = (val + (alignment - 1)) & ~(alignment - 1);
+    ASSERT( result >= val );      // check for overflow
+
+    return result;
+}
+
+template <typename T>
+inline T* ALIGN_UP(T* val, UIntNative alignment)
+{
+    return reinterpret_cast<T*>(ALIGN_UP(reinterpret_cast<UIntNative>(val), alignment));
+}
+
+inline UIntNative ALIGN_DOWN( UIntNative val, UIntNative alignment )
+{
+    // alignment must be a power of 2 for this implementation to work (need modulo otherwise)
+    ASSERT( 0 == (alignment & (alignment - 1)) );
+    UIntNative result = val & ~(alignment - 1);
+    return result;
+}
+
+template <typename T>
+inline T* ALIGN_DOWN(T* val, UIntNative alignment)
+{
+    return reinterpret_cast<T*>(ALIGN_DOWN(reinterpret_cast<UIntNative>(val), alignment));
+}
+
+#endif // !__GCENV_BASE_INCLUDED__
+
+inline bool IS_ALIGNED(UIntNative val, UIntNative alignment)
+{
+    ASSERT(0 == (alignment & (alignment - 1)));
+    return 0 == (val & (alignment - 1));
+}
+
+template <typename T>
+inline bool IS_ALIGNED(T* val, UIntNative alignment)
+{
+    ASSERT(0 == (alignment & (alignment - 1)));
+    return IS_ALIGNED(reinterpret_cast<UIntNative>(val), alignment);
+}
+
+// Convert from a PCODE to the corresponding PINSTR.  On many architectures this will be the identity function;
+// on ARM, this will mask off the THUMB bit.
+inline TADDR PCODEToPINSTR(PCODE pc)
+{
+#ifdef TARGET_ARM
+    return dac_cast<TADDR>(pc & ~THUMB_CODE);
+#else
+    return dac_cast<TADDR>(pc);
+#endif
+}
+
+// Convert from a PINSTR to the corresponding PCODE.  On many architectures this will be the identity function;
+// on ARM, this will raise the THUMB bit.
+inline PCODE PINSTRToPCODE(TADDR addr)
+{
+#ifdef TARGET_ARM
+    return dac_cast<PCODE>(addr | THUMB_CODE);
+#else
+    return dac_cast<PCODE>(addr);
+#endif
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/Crst.cpp b/src/coreclr/src/nativeaot/Runtime/Crst.cpp
new file mode 100644
index 0000000000000..1fe0fdb6817e0
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/Crst.cpp
@@ -0,0 +1,70 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "holder.h"
+#include "Crst.h"
+
+void CrstStatic::Init(CrstType eType, CrstFlags eFlags)
+{
+    UNREFERENCED_PARAMETER(eType);
+    UNREFERENCED_PARAMETER(eFlags);
+#ifndef DACCESS_COMPILE
+#if defined(_DEBUG)
+    m_uiOwnerId.Clear();
+#endif // _DEBUG
+    PalInitializeCriticalSectionEx(&m_sCritSec, 0, 0);
+#endif // !DACCESS_COMPILE
+}
+
+void CrstStatic::Destroy()
+{
+#ifndef DACCESS_COMPILE
+    PalDeleteCriticalSection(&m_sCritSec);
+#endif // !DACCESS_COMPILE
+}
+
+// static 
+void CrstStatic::Enter(CrstStatic *pCrst)
+{
+#ifndef DACCESS_COMPILE
+    PalEnterCriticalSection(&pCrst->m_sCritSec);
+#if defined(_DEBUG)
+    pCrst->m_uiOwnerId.SetToCurrentThread();
+#endif // _DEBUG
+#else
+    UNREFERENCED_PARAMETER(pCrst);
+#endif // !DACCESS_COMPILE
+}
+
+// static 
+void CrstStatic::Leave(CrstStatic *pCrst)
+{
+#ifndef DACCESS_COMPILE
+#if defined(_DEBUG)
+    pCrst->m_uiOwnerId.Clear();
+#endif // _DEBUG
+    PalLeaveCriticalSection(&pCrst->m_sCritSec);
+#else
+    UNREFERENCED_PARAMETER(pCrst);
+#endif // !DACCESS_COMPILE
+}
+
+#if defined(_DEBUG)
+bool CrstStatic::OwnedByCurrentThread()
+{
+#ifndef DACCESS_COMPILE
+    return m_uiOwnerId.IsCurrentThread();
+#else
+    return false;
+#endif
+}
+
+EEThreadId CrstStatic::GetHolderThreadId()
+{
+    return m_uiOwnerId;
+}
+#endif // _DEBUG
diff --git a/src/coreclr/src/nativeaot/Runtime/Crst.h b/src/coreclr/src/nativeaot/Runtime/Crst.h
new file mode 100644
index 0000000000000..658b0186429d0
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/Crst.h
@@ -0,0 +1,127 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// -----------------------------------------------------------------------------------------------------------
+//
+// Minimal Crst implementation based on CRITICAL_SECTION. Doesn't support much except for the basic locking
+// functionality (in particular there is no rank violation checking).
+//
+
+enum CrstType
+{
+    CrstHandleTable,
+    CrstDispatchCache,
+    CrstAllocHeap,
+    CrstGenericInstHashtab,
+    CrstMemAccessMgr,
+    CrstInterfaceDispatchGlobalLists,
+    CrstStressLog,
+    CrstRestrictedCallouts,
+    CrstGcStressControl,
+    CrstSuspendEE,
+    CrstCastCache,
+    CrstYieldProcessorNormalized,
+};
+
+enum CrstFlags
+{
+    CRST_DEFAULT            = 0x0,
+    CRST_REENTRANCY         = 0x0,
+    CRST_UNSAFE_SAMELEVEL   = 0x0,
+    CRST_UNSAFE_ANYMODE     = 0x0,
+    CRST_DEBUGGER_THREAD    = 0x0,
+};
+
+// Static version of Crst with no default constructor (user must call Init() before use).
+class CrstStatic
+{
+public:
+    void Init(CrstType eType, CrstFlags eFlags = CRST_DEFAULT);
+    bool InitNoThrow(CrstType eType, CrstFlags eFlags = CRST_DEFAULT) { Init(eType, eFlags); return true; }
+    void Destroy();
+    void Enter() { CrstStatic::Enter(this); }
+    void Leave() { CrstStatic::Leave(this); }
+    static void Enter(CrstStatic *pCrst);
+    static void Leave(CrstStatic *pCrst);
+#if defined(_DEBUG)
+    bool OwnedByCurrentThread();
+    EEThreadId GetHolderThreadId();
+#endif // _DEBUG
+
+private:
+    CRITICAL_SECTION    m_sCritSec;
+#if defined(_DEBUG)
+    EEThreadId          m_uiOwnerId;
+#endif // _DEBUG
+};
+
+// Non-static version that will initialize itself during construction.
+class Crst : public CrstStatic
+{
+public:
+    Crst(CrstType eType, CrstFlags eFlags = CRST_DEFAULT)
+        : CrstStatic()
+    { Init(eType, eFlags); }
+};
+
+// Holder for a Crst instance.
+class CrstHolder
+{
+    CrstStatic * m_pLock;
+
+public:
+    CrstHolder(CrstStatic * pLock)
+        : m_pLock(pLock)
+    {
+        m_pLock->Enter();
+    }
+
+    ~CrstHolder()
+    {
+        m_pLock->Leave();
+    }
+};
+
+class CrstHolderWithState
+{
+    CrstStatic * m_pLock;
+    bool m_fAcquired;
+
+public:
+    CrstHolderWithState(CrstStatic * pLock, bool fAcquire = true)
+        : m_pLock(pLock), m_fAcquired(fAcquire)
+    {
+        if (fAcquire)
+            m_pLock->Enter();
+    }
+
+    ~CrstHolderWithState()
+    {
+        if (m_fAcquired)
+            m_pLock->Leave();
+    }
+
+    void Acquire()
+    {
+        if (!m_fAcquired)
+        {
+            m_pLock->Enter();
+            m_fAcquired = true;
+        }
+    }
+
+    void Release()
+    {
+        if (m_fAcquired)
+        {
+            m_pLock->Leave();
+            m_fAcquired = false;
+        }
+    }
+
+    CrstStatic * GetValue()
+    {
+        return m_pLock;
+    }
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/Debug.h b/src/coreclr/src/nativeaot/Runtime/Debug.h
new file mode 100644
index 0000000000000..79c9e5924eb36
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/Debug.h
@@ -0,0 +1,101 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#pragma once
+
+// The following definitions are required for interop with the VS Debugger
+// Prior to making any changes to these, please reach out to the VS Debugger 
+// team to make sure that your changes are not going to prevent the debugger
+// from working.
+
+enum FuncEvalEntryPointMode : uint32_t
+{
+    FixedAddress = 0,
+    VirtualMethodSlotOnly = 1,
+    InterfaceDispatch = 2,
+};
+
+enum FuncEvalMode : uint32_t
+{
+    CallParameterizedFunction = 1,
+    NewStringWithLength = 2,
+    NewParameterizedArray = 3,
+    NewParameterizedObjectNoConstructor = 4,
+    NewParameterizedObject = 5,
+};
+
+enum DebuggerGcProtectionRequestKind : uint16_t
+{
+    EnsureConservativeReporting = 1,
+    RemoveConservativeReporting = 2,
+    EnsureHandle = 3,
+    RemoveHandle = 4
+};
+
+/**
+ * This structure represents a request from the debugger to perform a GC protection related work.
+ */
+struct DebuggerGcProtectionRequest
+{
+    DebuggerGcProtectionRequestKind kind;
+    union
+    {
+        uint16_t size;
+        uint16_t type;
+    };
+    uint32_t identifier;
+    uint64_t address;
+    uint64_t payload; /* TODO, FuncEval, what would be a better name for this? */
+};
+
+enum DebuggerResponseKind : uint32_t
+{
+    FuncEvalCompleteWithReturn = 0,
+    FuncEvalCompleteWithException = 1,
+    FuncEvalParameterBufferReady = 2,
+    RequestBufferReady = 3,
+    ConservativeReportingBufferReady = 4,
+    HandleReady = 5,
+    FuncEvalCrossThreadDependency = 6,
+};
+
+struct DebuggerResponse
+{
+    DebuggerResponseKind kind;
+};
+
+struct DebuggerGcProtectionResponse
+{
+    DebuggerResponseKind kind;
+    uint32_t padding;
+    uint64_t bufferAddress;
+};
+
+struct DebuggerGcProtectionHandleReadyResponse
+{
+    DebuggerResponseKind kind;
+    uint32_t padding;
+    uint64_t payload;
+    uint64_t handle;
+};
+
+struct DebuggerFuncEvalCompleteWithReturnResponse
+{
+    DebuggerResponseKind kind;
+    uint32_t returnHandleIdentifier;
+    uint64_t returnAddress;
+};
+
+struct DebuggerFuncEvalParameterBufferReadyResponse
+{
+    DebuggerResponseKind kind;
+    uint32_t padding;
+    uint64_t bufferAddress;
+};
+
+struct DebuggerFuncEvalCrossThreadDependencyNotification
+{
+    DebuggerResponseKind kind;
+    uint32_t padding;
+    uint64_t payload;
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/DebugEventSource.cpp b/src/coreclr/src/nativeaot/Runtime/DebugEventSource.cpp
new file mode 100644
index 0000000000000..21ed91ff9598e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/DebugEventSource.cpp
@@ -0,0 +1,209 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "type_traits.hpp"
+#include "slist.h"
+#include "holder.h"
+#include "Crst.h"
+#include "RWLock.h"
+#include "RuntimeInstance.h"
+#include "gcrhinterface.h"
+#include "shash.h"
+#include "DebugEventSource.h"
+
+#include "slist.inl"
+
+#include "DebugEvents.h"
+
+GVAL_IMPL_INIT(UInt32, g_DebuggerEventsFilter, 0);
+
+#ifndef DACCESS_COMPILE
+
+bool EventEnabled(DebugEventType eventType)
+{
+    return ((int)eventType > 0) && 
+           ((g_DebuggerEventsFilter & (1 << ((int)eventType-1))) != 0);
+}
+
+void DebugEventSource::SendModuleLoadEvent(void* pAddressInModule)
+{
+    if(!EventEnabled(DEBUG_EVENT_TYPE_LOAD_MODULE))
+        return;
+    DebugEventPayload payload;
+    payload.type = DEBUG_EVENT_TYPE_LOAD_MODULE;
+    payload.ModuleLoadUnload.pModuleHeader = (CORDB_ADDRESS)pAddressInModule;
+    SendRawEvent(&payload);
+}
+
+void DebugEventSource::SendExceptionThrownEvent(CORDB_ADDRESS faultingIP, CORDB_ADDRESS faultingFrameSP)
+{
+    if(!EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_THROWN))
+        return;
+    DebugEventPayload payload;
+    payload.type = DEBUG_EVENT_TYPE_EXCEPTION_THROWN;
+    payload.Exception.ip = faultingIP;
+    payload.Exception.sp = faultingFrameSP;
+    SendRawEvent(&payload);
+}
+
+void DebugEventSource::SendExceptionCatchHandlerFoundEvent(CORDB_ADDRESS handlerIP, CORDB_ADDRESS HandlerFrameSP)
+{
+    if(!EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_CATCH_HANDLER_FOUND))
+        return;
+    DebugEventPayload payload;
+    payload.type = DEBUG_EVENT_TYPE_EXCEPTION_CATCH_HANDLER_FOUND;
+    payload.Exception.ip = handlerIP;
+    payload.Exception.sp = HandlerFrameSP;
+    SendRawEvent(&payload);
+}
+
+void DebugEventSource::SendExceptionUnhandledEvent()
+{
+    if(!EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_UNHANDLED))
+        return;
+    DebugEventPayload payload;
+    payload.type = DEBUG_EVENT_TYPE_EXCEPTION_UNHANDLED;
+    payload.Exception.ip = (CORDB_ADDRESS)0;
+    payload.Exception.sp = (CORDB_ADDRESS)0;
+    SendRawEvent(&payload);
+}
+
+void DebugEventSource::SendExceptionFirstPassFrameEnteredEvent(CORDB_ADDRESS ipInFrame, CORDB_ADDRESS frameSP)
+{
+    if(!EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_FIRST_PASS_FRAME_ENTER))
+        return;
+    DebugEventPayload payload;
+    payload.type = DEBUG_EVENT_TYPE_EXCEPTION_FIRST_PASS_FRAME_ENTER;
+    payload.Exception.ip = ipInFrame;
+    payload.Exception.sp = frameSP;
+    SendRawEvent(&payload);
+}
+
+void DebugEventSource::SendCustomEvent(void* payload, int length)
+{
+    if (!EventEnabled(DEBUG_EVENT_TYPE_CUSTOM))
+        return;
+    DebugEventPayload rawPayload;
+    rawPayload.type = DEBUG_EVENT_TYPE_CUSTOM;
+    rawPayload.Custom.payload = (CORDB_ADDRESS)payload;
+    rawPayload.Custom.length = length;
+    SendRawEvent(&rawPayload);
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Sends a raw managed debug event to the debugger.
+//
+// Arguments:
+//      pPayload - managed debug event data
+//
+//
+// Notes:
+//    The entire process will get frozen by the debugger once we send.  The debugger
+//    needs to resume the process. It may detach as well.
+//    See CordbProcess::DecodeEvent in mscordbi for decoding this event. These methods must stay in sync.
+//
+//---------------------------------------------------------------------------------------
+void DebugEventSource::SendRawEvent(DebugEventPayload* pPayload)
+{
+#ifdef _MSC_VER
+    // We get to send an array of void* as data with the notification.
+    // The debugger can then use ReadProcessMemory to read through this array.
+    UInt64 rgData [] = {
+        (UInt64) CLRDBG_EXCEPTION_DATA_CHECKSUM, 
+        (UInt64) GetRuntimeInstance()->GetPalInstance(), 
+        (UInt64) pPayload
+    };
+
+    //
+    // Physically send the event via an OS Exception. We're using exceptions as a notification
+    // mechanism on top of the OS native debugging pipeline.
+    //
+    __try
+    {
+        const UInt32 dwFlags = 0; // continuable (eg, Debugger can continue GH)
+        // RaiseException treats arguments as pointer sized values, but we encoded 3 QWORDS.
+        // On 32 bit platforms we have 6 elements, on 64 bit platforms we have 3 elements
+        RaiseException(CLRDBG_NOTIFICATION_EXCEPTION_CODE, dwFlags, 3*sizeof(UInt64)/sizeof(UInt32*), (UInt32*)rgData);
+
+        // If debugger continues "GH" (DBG_CONTINUE), then we land here. 
+        // This is the expected path for a well-behaved ICorDebug debugger.
+    }
+    __except(1)
+    {
+        // We can get here if:
+        // An ICorDebug aware debugger enabled the debug events AND
+        // a) the debugger detached during the event OR
+        // b) the debugger continues "GN" (DBG_EXCEPTION_NOT_HANDLED) - this would be considered a badly written debugger
+        //
+        // there is no great harm in reaching here but it is a needless perf-cost
+    }
+#endif // _MSC_VER
+}
+
+//keep these synced with the enumeration in exceptionhandling.cs
+enum ExceptionEventKind
+{
+    EEK_Thrown=1,
+    EEK_CatchHandlerFound=2,
+    EEK_Unhandled=4,
+    EEK_FirstPassFrameEntered=8
+};
+
+//Called by the C# exception dispatch code with events to send to the debugger
+EXTERN_C REDHAWK_API void __cdecl RhpSendExceptionEventToDebugger(ExceptionEventKind eventKind, void* ip, void* sp)
+{
+    CORDB_ADDRESS cordbIP = (CORDB_ADDRESS)ip;
+    CORDB_ADDRESS cordbSP = (CORDB_ADDRESS)sp;
+#if HOST_ARM
+    // clear the THUMB-bit from IP
+    cordbIP &= ~1;
+#endif
+
+    if(eventKind == EEK_Thrown)
+    {
+        DebugEventSource::SendExceptionThrownEvent(cordbIP, cordbSP);
+    }
+    else if(eventKind == EEK_CatchHandlerFound)
+    {
+        DebugEventSource::SendExceptionCatchHandlerFoundEvent(cordbIP, cordbSP);
+    }
+    else if(eventKind == EEK_Unhandled)
+    {
+        DebugEventSource::SendExceptionUnhandledEvent();
+    }
+    else if(eventKind == EEK_FirstPassFrameEntered)
+    {
+        DebugEventSource::SendExceptionFirstPassFrameEnteredEvent(cordbIP, cordbSP);
+    }
+}
+
+// Called to cache the current events the debugger is listening for in the C# implemented exception layer
+// Filtering in managed code prevents making unneeded p/invokes
+COOP_PINVOKE_HELPER(ExceptionEventKind, RhpGetRequestedExceptionEvents, ())
+{
+    int mask = 0;
+    if(EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_THROWN))
+        mask |= EEK_Thrown;
+    if(EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_CATCH_HANDLER_FOUND))
+        mask |= EEK_CatchHandlerFound;
+    if(EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_UNHANDLED))
+        mask |= EEK_Unhandled;
+    if(EventEnabled(DEBUG_EVENT_TYPE_EXCEPTION_FIRST_PASS_FRAME_ENTER))
+        mask |= EEK_FirstPassFrameEntered;
+    return (ExceptionEventKind)mask;
+}
+
+//Called by the C# func eval code to hand shake with the debugger
+COOP_PINVOKE_HELPER(void, RhpSendCustomEventToDebugger, (void* payload, int length))
+{
+    DebugEventSource::SendCustomEvent(payload, length);
+}
+
+#endif //!DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/DebugEventSource.h b/src/coreclr/src/nativeaot/Runtime/DebugEventSource.h
new file mode 100644
index 0000000000000..51ea208f6b7ef
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/DebugEventSource.h
@@ -0,0 +1,40 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// -----------------------------------------------------------------------------------------------------------
+// Support for emitting debug events with particular payloads that a managed-aware debugger can listen for.
+// The events are generated using 1st chance SEH exceptions that the debugger should immediately continue
+// so the exception never dispatches back into runtime code. However just in case the debugger disconnects
+// or doesn't behave well we've got a backstop catch handler that will prevent it from escaping the code in
+// DebugEventSource.
+// -----------------------------------------------------------------------------------------------------------
+
+#ifndef __DEBUG_EVENT_SOURCE_H_
+#define __DEBUG_EVENT_SOURCE_H_
+
+// This global is set from out of process using the debugger. It controls which events are emitted.
+GVAL_DECL(UInt32, g_DebuggerEventsFilter);
+
+typedef UInt64 CORDB_ADDRESS;
+
+#ifndef DACCESS_COMPILE
+
+struct DebugEventPayload;
+
+class DebugEventSource
+{
+public:
+    static void SendModuleLoadEvent(void* addressInModule);
+    static void SendExceptionThrownEvent(CORDB_ADDRESS faultingIP, CORDB_ADDRESS faultingFrameSP);
+    static void SendExceptionCatchHandlerFoundEvent(CORDB_ADDRESS handlerIP, CORDB_ADDRESS HandlerFrameSP);
+    static void SendExceptionUnhandledEvent();
+    static void SendExceptionFirstPassFrameEnteredEvent(CORDB_ADDRESS ipInFrame, CORDB_ADDRESS frameSP);
+    static void SendCustomEvent(void* payload, int length);
+private:
+    static void SendRawEvent(DebugEventPayload* payload);
+};
+
+
+#endif //!DACCESS_COMPILE
+
+
+#endif // __DEBUG_EVENT_SOURCE_H_
diff --git a/src/coreclr/src/nativeaot/Runtime/DebugFuncEval.cpp b/src/coreclr/src/nativeaot/Runtime/DebugFuncEval.cpp
new file mode 100644
index 0000000000000..cf9764b2c7f95
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/DebugFuncEval.cpp
@@ -0,0 +1,88 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "common.h"
+#include "CommonTypes.h"
+#include "DebugFuncEval.h"
+#include "rhassert.h"
+#include "RWLock.h"
+#include "slist.h"
+#include "RuntimeInstance.h"
+
+GVAL_IMPL_INIT(UInt32, g_FuncEvalMode, 0);
+GVAL_IMPL_INIT(UInt32, g_FuncEvalParameterBufferSize, 0);
+GVAL_IMPL_INIT(UInt64, g_MostRecentFuncEvalHijackInstructionPointer, 0);
+
+#ifndef DACCESS_COMPILE
+
+/* static */ UInt32 DebugFuncEval::GetFuncEvalParameterBufferSize()
+{
+    return g_FuncEvalParameterBufferSize;
+}
+
+/* static */ UInt32 DebugFuncEval::GetFuncEvalMode()
+{
+    return g_FuncEvalMode;
+}
+
+/* static */ UInt64 DebugFuncEval::GetMostRecentFuncEvalHijackInstructionPointer()
+{
+    return g_MostRecentFuncEvalHijackInstructionPointer;
+}
+
+/// <summary>
+/// Retrieve the global FuncEval parameter buffer size.
+/// </summary>
+/// <remarks>
+/// During debugging, if a FuncEval is requested, 
+/// the func eval infrastructure needs to know how much buffer to allocate for the debugger to 
+/// write the parameter information in. The C# supporting code will call this API to obtain the 
+/// buffer size. By that time, the value should have been set through the UpdateFuncEvalParameterBufferSize() 
+/// method on the ISosRedhawk7 interface.
+/// </remarks>
+EXTERN_C REDHAWK_API UInt32 __cdecl RhpGetFuncEvalParameterBufferSize()
+{
+    return DebugFuncEval::GetFuncEvalParameterBufferSize();
+}
+
+/// <summary>
+/// Retrieve the global FuncEval mode.
+/// </summary>
+/// <remarks>
+/// During debugging, if a FuncEval is requested, 
+/// the func eval infrastructure needs to know what mode to execute the FuncEval request 
+/// The C# supporting code will call this API to obtain the mode. By that time, the value 
+/// should have been set through the UpdateFuncEvalMode() method on the ISosRedhawk7 interface.
+/// </remarks>
+EXTERN_C REDHAWK_API UInt32 __cdecl RhpGetFuncEvalMode()
+{
+    return DebugFuncEval::GetFuncEvalMode();
+}
+
+/// <summary>
+/// Initiate the func eval abort
+/// </summary>
+/// <remarks>
+/// This is the entry point of FuncEval abort
+/// When the debugger decides to abort the FuncEval, it will create a remote thread calling this function.
+/// This function will call back into the DebugFuncEvalAbortHelper to perform the abort.
+EXTERN_C REDHAWK_API void __cdecl RhpInitiateFuncEvalAbort(void* pointerFromDebugger)
+{
+    DebugFuncEvalAbortHelperFunctionType debugFuncEvalAbortHelperFunction = (DebugFuncEvalAbortHelperFunctionType)GetRuntimeInstance()->GetClasslibFunctionFromCodeAddress((void*)g_MostRecentFuncEvalHijackInstructionPointer, ClasslibFunctionId::DebugFuncEvalAbortHelper);
+    ASSERT(debugFuncEvalAbortHelperFunction != nullptr);
+    debugFuncEvalAbortHelperFunction((Int64)pointerFromDebugger);
+}
+
+#else
+
+UInt64 DebugFuncEval::GetMostRecentFuncEvalHijackInstructionPointer()
+{
+    return g_MostRecentFuncEvalHijackInstructionPointer;
+}
+
+#endif //!DACCESS_COMPILE
+
+EXTERN_C void RhpDebugFuncEvalHelper(void*, void*);
+GPTR_IMPL_INIT(PTR_VOID, g_RhpDebugFuncEvalHelperAddr, (void **)(&RhpDebugFuncEvalHelper));
+
+GPTR_IMPL_INIT(PTR_VOID, g_RhpInitiateFuncEvalAbortAddr, (void**)&RhpInitiateFuncEvalAbort);
diff --git a/src/coreclr/src/nativeaot/Runtime/DebugFuncEval.h b/src/coreclr/src/nativeaot/Runtime/DebugFuncEval.h
new file mode 100644
index 0000000000000..7ddac1cf95963
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/DebugFuncEval.h
@@ -0,0 +1,72 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// -----------------------------------------------------------------------------------------------------------
+// Support for evaluating expression in the debuggee during debugging
+// -----------------------------------------------------------------------------------------------------------
+
+#ifndef __DEBUG_FUNC_EVAL_H__
+#define __DEBUG_FUNC_EVAL_H__
+
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+
+#ifndef DACCESS_COMPILE
+
+typedef void(*DebugFuncEvalAbortHelperFunctionType)(UInt64);
+
+class DebugFuncEval
+{
+public:
+    /// <summary>
+    /// Retrieve the global FuncEval parameter buffer size.
+    /// </summary>
+    /// <remarks>
+    /// During debugging, if a FuncEval is requested, 
+    /// the func eval infrastructure needs to know how much buffer to allocate for the debugger to 
+    /// write the parameter information in. The C# supporting code will call this API to obtain the 
+    /// buffer size. By that time, the value should have been set through the UpdateFuncEvalParameterSize() 
+    /// method on the ISosRedhawk7 interface.
+    /// </remarks>
+    static UInt32 GetFuncEvalParameterBufferSize();
+
+    /// <summary>
+    /// Retrieve the global FuncEval mode.
+    /// </summary>
+    /// <remarks>
+    /// During debugging, if a FuncEval is requested, 
+    /// the func eval infrastructure needs to know what mode to execute the FuncEval request 
+    /// The C# supporting code will call this API to obtain the mode. By that time, the value 
+    /// should have been set through the UpdateFuncEvalMode() method on the ISosRedhawk7 interface.
+    /// </remarks>
+    static UInt32 GetFuncEvalMode();
+
+    /// <summary>
+    /// Retrieve the most recent FuncEval Hijack instruction pointer
+    /// </summary>
+    /// <remarks>
+    /// The most recent FuncEval Hijack instruction pointer is set through the debugger
+    /// It is used for the stack walker to understand the hijack frame
+    /// </remarks>
+    static UInt64 GetMostRecentFuncEvalHijackInstructionPointer();
+};
+
+#else
+
+class DebugFuncEval
+{
+public:
+    /// <summary>
+    /// Retrieve the most recent FuncEval Hijack instruction pointer
+    /// </summary>
+    /// <remarks>
+    /// The most recent FuncEval Hijack instruction pointer is set through the debugger
+    /// It is used for the stack walker to understand the hijack frame
+    /// </remarks>
+    static UInt64 GetMostRecentFuncEvalHijackInstructionPointer();
+};
+
+#endif //!DACCESS_COMPILE
+
+#endif // __DEBUG_FUNC_EVAL_H__
diff --git a/src/coreclr/src/nativeaot/Runtime/DebuggerHook.cpp b/src/coreclr/src/nativeaot/Runtime/DebuggerHook.cpp
new file mode 100644
index 0000000000000..0138ac0d156a9
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/DebuggerHook.cpp
@@ -0,0 +1,235 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "gcrhinterface.h"
+#include "DebuggerHook.h"
+#include "DebugEventSource.h"
+
+GVAL_IMPL_INIT(UInt32, g_numGcProtectionRequests, 0);
+
+#ifndef DACCESS_COMPILE
+
+/* static */ DebuggerProtectedBufferListNode* DebuggerHook::s_debuggerProtectedBuffers = nullptr;
+
+/* static */ DebuggerOwnedHandleListNode* DebuggerHook::s_debuggerOwnedHandles = nullptr;
+
+/* static */ UInt32 DebuggerHook::s_debuggeeInitiatedHandleIdentifier = 2;
+
+/* static */ void DebuggerHook::OnBeforeGcCollection()
+{
+    if (g_numGcProtectionRequests > 0)
+    {
+        // The debugger has some requests with respect to GC protection.
+        // Here we are allocating a buffer to store them
+        DebuggerGcProtectionRequest* requests = new (nothrow) DebuggerGcProtectionRequest[g_numGcProtectionRequests];
+
+        // Notifying the debugger the buffer is ready to use
+        DebuggerGcProtectionResponse response;
+        response.kind = DebuggerResponseKind::RequestBufferReady;
+        response.bufferAddress = (uint64_t)requests;
+        DebugEventSource::SendCustomEvent((void*)&response, sizeof(response));
+
+        // ... debugger magic happen here ...
+
+        // The debugger has filled the requests array
+        for (uint32_t i = 0; i < g_numGcProtectionRequests; i++)
+        {
+            if (requests[i].kind == DebuggerGcProtectionRequestKind::EnsureConservativeReporting)
+            {
+                // If the request requires extra memory, allocate for it
+                requests[i].address = (uint64_t)new (nothrow) uint8_t[requests[i].size];
+
+                // The debugger will handle the case when address is nullptr (we have to break our promise)
+            }
+        }
+
+        // TODO, FuncEval, consider an optimization to eliminate this message when they is nothing required from the
+        // debugger side to fill
+
+        response.kind = DebuggerResponseKind::ConservativeReportingBufferReady;
+        DebugEventSource::SendCustomEvent((void*)&response, sizeof(response));
+
+        // ... debugger magic happen here again ...
+
+        for (uint32_t i = 0; i < g_numGcProtectionRequests; i++)
+        {
+            DebuggerGcProtectionRequest* request = requests + i;
+            switch(request->kind)
+            {
+            case DebuggerGcProtectionRequestKind::EnsureConservativeReporting: 
+                EnsureConservativeReporting(request); 
+                break;
+
+            case DebuggerGcProtectionRequestKind::RemoveConservativeReporting:
+                RemoveConservativeReporting(request);
+                break;
+
+            case DebuggerGcProtectionRequestKind::EnsureHandle:
+                EnsureHandle(request);
+                break;
+
+            case DebuggerGcProtectionRequestKind::RemoveHandle:
+                RemoveHandle(request);
+                break;
+
+            default:
+                assert("Debugger is providing an invalid request kind." && false);
+            }
+        }
+
+        g_numGcProtectionRequests = 0;
+    }
+}
+
+/* static */ UInt32 DebuggerHook::RecordDebuggeeInitiatedHandle(void* objectHandle)
+{
+    DebuggerOwnedHandleListNode* head = new (nothrow) DebuggerOwnedHandleListNode();
+    if (head == nullptr)
+    {
+        return 0;
+    }
+
+    head->handle = objectHandle;
+    head->identifier = DebuggerHook::s_debuggeeInitiatedHandleIdentifier;
+    head->next = s_debuggerOwnedHandles;
+    s_debuggerOwnedHandles = head;
+
+    s_debuggeeInitiatedHandleIdentifier += 2;
+
+    return head->identifier;
+}
+
+/* static */ void DebuggerHook::EnsureConservativeReporting(DebuggerGcProtectionRequest* request)
+{
+    DebuggerProtectedBufferListNode* tail = DebuggerHook::s_debuggerProtectedBuffers;
+    s_debuggerProtectedBuffers = new (std::nothrow) DebuggerProtectedBufferListNode();
+    if (s_debuggerProtectedBuffers == nullptr)
+    {
+        s_debuggerProtectedBuffers = tail;
+        // TODO, FuncEval, we cannot handle the debugger request to protect a buffer (we have to break our promise)
+        // TODO, FuncEval, we need to figure out how to communicate this broken promise to the debugger
+    }
+    else
+    {
+        s_debuggerProtectedBuffers->address = request->address;
+        s_debuggerProtectedBuffers->size = request->size;
+        s_debuggerProtectedBuffers->identifier = request->identifier;
+        s_debuggerProtectedBuffers->next = tail;
+    }
+}
+
+/* static */ void DebuggerHook::RemoveConservativeReporting(DebuggerGcProtectionRequest* request)
+{
+    DebuggerProtectedBufferListNode* prev = nullptr;
+    DebuggerProtectedBufferListNode* curr = DebuggerHook::s_debuggerProtectedBuffers;
+    while (true)
+    {
+        if (curr == nullptr)
+        {
+            assert("Debugger is trying to remove a conservative reporting entry which is no longer exist." && false);
+            break;
+        }
+        if (curr->identifier == request->identifier)
+        {
+            DebuggerProtectedBufferListNode* toDelete = curr;
+            if (prev == nullptr)
+            {
+                // We are trying to remove the head of the linked list
+                DebuggerHook::s_debuggerProtectedBuffers = curr->next;
+            }
+            else
+            {
+                prev->next = curr->next;
+            }
+
+            delete toDelete;
+            break;
+        }
+        else
+        {
+            prev = curr;
+            curr = curr->next;
+        }
+    }
+}
+
+/* static */ void DebuggerHook::EnsureHandle(DebuggerGcProtectionRequest* request)
+{
+    DebuggerOwnedHandleListNode* tail = DebuggerHook::s_debuggerOwnedHandles;
+    s_debuggerOwnedHandles = new (std::nothrow) DebuggerOwnedHandleListNode();
+    if (s_debuggerOwnedHandles == nullptr)
+    {
+        s_debuggerOwnedHandles = tail;
+        // TODO, FuncEval, we cannot handle the debugger request to protect a buffer (we have to break our promise)
+        // TODO, FuncEval, we need to figure out how to communicate this broken promise to the debugger
+    }
+    else
+    {
+        int handleType = (int)request->type;
+        void* handle = RedhawkGCInterface::CreateTypedHandle((void*)request->address, handleType);
+
+        DebuggerGcProtectionHandleReadyResponse response;
+        response.kind = DebuggerResponseKind::HandleReady;
+        response.payload = request->payload;
+        response.handle = (uint64_t)handle;
+        DebugEventSource::SendCustomEvent((void*)&response, sizeof(response));
+
+        s_debuggerOwnedHandles->handle = handle;
+        s_debuggerOwnedHandles->identifier = request->identifier;
+        s_debuggerOwnedHandles->next = tail;
+    }
+}
+
+/* static */ void DebuggerHook::RemoveHandle(DebuggerGcProtectionRequest* request)
+{
+    DebuggerOwnedHandleListNode* prev = nullptr;
+    DebuggerOwnedHandleListNode* curr = DebuggerHook::s_debuggerOwnedHandles;
+    while (true)
+    {
+        if (curr == nullptr)
+        {
+            assert("Debugger is trying to remove a gc handle entry which is no longer exist." && false);
+            break;
+        }
+        if (curr->identifier == request->identifier)
+        {
+            DebuggerOwnedHandleListNode* toDelete = curr;
+            RedhawkGCInterface::DestroyTypedHandle(toDelete->handle);
+
+            if (prev == nullptr)
+            {
+                // We are trying to remove the head of the linked list
+                DebuggerHook::s_debuggerOwnedHandles = curr->next;
+            }
+            else
+            {
+                prev->next = curr->next;
+            }
+
+            delete toDelete;
+            break;
+        }
+        else
+        {
+            prev = curr;
+            curr = curr->next;
+        }
+    }
+}
+
+EXTERN_C REDHAWK_API UInt32 __cdecl RhpRecordDebuggeeInitiatedHandle(void* objectHandle)
+{
+    return DebuggerHook::RecordDebuggeeInitiatedHandle(objectHandle);
+}
+
+EXTERN_C REDHAWK_API void __cdecl RhpVerifyDebuggerCleanup()
+{
+    assert(DebuggerHook::s_debuggerOwnedHandles == nullptr);
+    assert(DebuggerHook::s_debuggerProtectedBuffers == nullptr);
+}
+
+#endif // !DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/DebuggerHook.h b/src/coreclr/src/nativeaot/Runtime/DebuggerHook.h
new file mode 100644
index 0000000000000..86ef5066f50e3
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/DebuggerHook.h
@@ -0,0 +1,52 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// -----------------------------------------------------------------------------------------------------------
+// Support for evaluating expression in the debuggee during debugging
+// -----------------------------------------------------------------------------------------------------------
+
+#ifndef __DEBUGGER_HOOK_H__
+#define __DEBUGGER_HOOK_H__
+
+#include "common.h"
+#include "CommonTypes.h"
+#ifdef DACCESS_COMPILE
+#include "CommonMacros.h"
+#endif
+#include "daccess.h"
+#include "Debug.h"
+
+#ifndef DACCESS_COMPILE
+
+struct DebuggerProtectedBufferListNode
+{
+    UInt64 address;
+    UInt16 size;
+    UInt32 identifier;
+    struct DebuggerProtectedBufferListNode* next;
+};
+
+struct DebuggerOwnedHandleListNode
+{
+    void* handle;
+    UInt32 identifier;
+    struct DebuggerOwnedHandleListNode* next;
+};
+
+class DebuggerHook
+{
+public:
+    static void OnBeforeGcCollection();
+    static UInt32 RecordDebuggeeInitiatedHandle(void* handle);
+    static DebuggerProtectedBufferListNode* s_debuggerProtectedBuffers;
+    static DebuggerOwnedHandleListNode* s_debuggerOwnedHandles;
+private:
+    static void EnsureConservativeReporting(DebuggerGcProtectionRequest* request);
+    static void RemoveConservativeReporting(DebuggerGcProtectionRequest* request);
+    static void EnsureHandle(DebuggerGcProtectionRequest* request);
+    static void RemoveHandle(DebuggerGcProtectionRequest* request);
+    static UInt32 s_debuggeeInitiatedHandleIdentifier;
+};
+
+#endif //!DACCESS_COMPILE
+
+#endif // __DEBUGGER_HOOK_H__
diff --git a/src/coreclr/src/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/EHHelpers.cpp
new file mode 100644
index 0000000000000..3df7fcecb196d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/EHHelpers.cpp
@@ -0,0 +1,484 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#ifndef DACCESS_COMPILE
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "shash.h"
+#include "RWLock.h"
+#include "TypeManager.h"
+#include "varint.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "holder.h"
+#include "Crst.h"
+#include "RuntimeInstance.h"
+#include "event.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "stressLog.h"
+#include "rhbinder.h"
+#include "eetype.h"
+#include "eetype.inl"
+
+COOP_PINVOKE_HELPER(Boolean, RhpEHEnumInitFromStackFrameIterator, (
+    StackFrameIterator* pFrameIter, void ** pMethodStartAddressOut, EHEnum* pEHEnum))
+{
+    ICodeManager * pCodeManager = pFrameIter->GetCodeManager();
+    pEHEnum->m_pCodeManager = pCodeManager;
+
+    return pCodeManager->EHEnumInit(pFrameIter->GetMethodInfo(), pMethodStartAddressOut, &pEHEnum->m_state);
+}
+
+COOP_PINVOKE_HELPER(Boolean, RhpEHEnumNext, (EHEnum* pEHEnum, EHClause* pEHClause))
+{
+    return pEHEnum->m_pCodeManager->EHEnumNext(&pEHEnum->m_state, pEHClause);
+}
+
+// Unmanaged helper to locate one of two classlib-provided functions that the runtime needs to 
+// implement throwing of exceptions out of Rtm, and fail-fast. This may return NULL if the classlib
+// found via the provided address does not have the necessary exports.
+COOP_PINVOKE_HELPER(void *, RhpGetClasslibFunctionFromCodeAddress, (void * address, ClasslibFunctionId functionId))
+{
+    return GetRuntimeInstance()->GetClasslibFunctionFromCodeAddress(address, functionId);
+}
+
+// Unmanaged helper to locate one of two classlib-provided functions that the runtime needs to 
+// implement throwing of exceptions out of Rtm, and fail-fast. This may return NULL if the classlib
+// found via the provided address does not have the necessary exports.
+COOP_PINVOKE_HELPER(void *, RhpGetClasslibFunctionFromEEType, (EEType * pEEType, ClasslibFunctionId functionId))
+{
+    return pEEType->GetTypeManagerPtr()->AsTypeManager()->GetClasslibFunction(functionId);
+}
+
+COOP_PINVOKE_HELPER(void, RhpValidateExInfoStack, ())
+{
+    Thread * pThisThread = ThreadStore::GetCurrentThread();
+    pThisThread->ValidateExInfoStack();
+}
+
+COOP_PINVOKE_HELPER(void, RhpClearThreadDoNotTriggerGC, ())
+{
+    Thread * pThisThread = ThreadStore::GetCurrentThread();
+
+    if (!pThisThread->IsDoNotTriggerGcSet())
+        RhFailFast();
+
+    pThisThread->ClearDoNotTriggerGc();
+}
+
+COOP_PINVOKE_HELPER(void, RhpSetThreadDoNotTriggerGC, ())
+{
+    Thread * pThisThread = ThreadStore::GetCurrentThread();
+
+    if (pThisThread->IsDoNotTriggerGcSet())
+        RhFailFast();
+
+    pThisThread->SetDoNotTriggerGc();
+}
+
+COOP_PINVOKE_HELPER(Int32, RhGetModuleFileName, (HANDLE moduleHandle, _Out_ const TCHAR** pModuleNameOut))
+{
+    return PalGetModuleFileName(pModuleNameOut, moduleHandle);
+}
+
+COOP_PINVOKE_HELPER(void, RhpCopyContextFromExInfo, (void * pOSContext, Int32 cbOSContext, PAL_LIMITED_CONTEXT * pPalContext))
+{
+    UNREFERENCED_PARAMETER(cbOSContext);
+    ASSERT(cbOSContext >= sizeof(CONTEXT));
+    CONTEXT* pContext = (CONTEXT *)pOSContext;
+#if defined(UNIX_AMD64_ABI)
+    pContext->Rip = pPalContext->IP;
+    pContext->Rsp = pPalContext->Rsp;
+    pContext->Rbp = pPalContext->Rbp;
+    pContext->Rdx = pPalContext->Rdx;
+    pContext->Rax = pPalContext->Rax;
+    pContext->Rbx = pPalContext->Rbx;
+    pContext->R12 = pPalContext->R12;
+    pContext->R13 = pPalContext->R13;
+    pContext->R14 = pPalContext->R14;
+    pContext->R15 = pPalContext->R15;
+#elif defined(HOST_AMD64)
+    pContext->Rip = pPalContext->IP;
+    pContext->Rsp = pPalContext->Rsp;
+    pContext->Rbp = pPalContext->Rbp;
+    pContext->Rdi = pPalContext->Rdi;
+    pContext->Rsi = pPalContext->Rsi;
+    pContext->Rax = pPalContext->Rax;
+    pContext->Rbx = pPalContext->Rbx;
+    pContext->R12 = pPalContext->R12;
+    pContext->R13 = pPalContext->R13;
+    pContext->R14 = pPalContext->R14;
+    pContext->R15 = pPalContext->R15;
+#elif defined(HOST_X86)
+    pContext->Eip = pPalContext->IP;
+    pContext->Esp = pPalContext->Rsp;
+    pContext->Ebp = pPalContext->Rbp;
+    pContext->Edi = pPalContext->Rdi;
+    pContext->Esi = pPalContext->Rsi;
+    pContext->Eax = pPalContext->Rax;
+    pContext->Ebx = pPalContext->Rbx;
+#elif defined(HOST_ARM)
+    pContext->R0  = pPalContext->R0;
+    pContext->R4  = pPalContext->R4;
+    pContext->R5  = pPalContext->R5;
+    pContext->R6  = pPalContext->R6;
+    pContext->R7  = pPalContext->R7;
+    pContext->R8  = pPalContext->R8;
+    pContext->R9  = pPalContext->R9;
+    pContext->R10 = pPalContext->R10;
+    pContext->R11 = pPalContext->R11;
+    pContext->Sp  = pPalContext->SP;
+    pContext->Lr  = pPalContext->LR;
+    pContext->Pc  = pPalContext->IP;
+#elif defined(HOST_ARM64)
+    pContext->X0 = pPalContext->X0;
+    pContext->X1 = pPalContext->X1;
+    // TODO: Copy registers X2-X7 when we start supporting HVA's
+    pContext->X19 = pPalContext->X19;
+    pContext->X20 = pPalContext->X20;
+    pContext->X21 = pPalContext->X21;
+    pContext->X22 = pPalContext->X22;
+    pContext->X23 = pPalContext->X23;
+    pContext->X24 = pPalContext->X24;
+    pContext->X25 = pPalContext->X25;
+    pContext->X26 = pPalContext->X26;
+    pContext->X27 = pPalContext->X27;
+    pContext->X28 = pPalContext->X28;
+    pContext->Fp = pPalContext->FP;
+    pContext->Sp = pPalContext->SP;
+    pContext->Lr = pPalContext->LR;
+    pContext->Pc = pPalContext->IP;
+#elif defined(HOST_WASM)
+    // No registers, no work to do yet
+#else
+#error Not Implemented for this architecture -- RhpCopyContextFromExInfo
+#endif
+}
+
+#if defined(HOST_AMD64) || defined(HOST_ARM) || defined(HOST_X86) || defined(HOST_ARM64)
+struct DISPATCHER_CONTEXT
+{
+    UIntNative  ControlPc;
+    // N.B. There is more here (so this struct isn't the right size), but we ignore everything else
+};
+
+#ifdef HOST_X86
+struct EXCEPTION_REGISTRATION_RECORD
+{
+    UIntNative Next;
+    UIntNative Handler;
+};
+#endif // HOST_X86
+
+EXTERN_C void __cdecl RhpFailFastForPInvokeExceptionPreemp(IntNative PInvokeCallsiteReturnAddr, 
+                                                           void* pExceptionRecord, void* pContextRecord);
+EXTERN_C void REDHAWK_CALLCONV RhpFailFastForPInvokeExceptionCoop(IntNative PInvokeCallsiteReturnAddr, 
+                                                                  void* pExceptionRecord, void* pContextRecord);
+Int32 __stdcall RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs);
+
+EXTERN_C Int32 __stdcall RhpPInvokeExceptionGuard(PEXCEPTION_RECORD       pExceptionRecord,
+                                                  UIntNative              EstablisherFrame,
+                                                  PCONTEXT                pContextRecord,
+                                                  DISPATCHER_CONTEXT *    pDispatcherContext)
+{
+    UNREFERENCED_PARAMETER(EstablisherFrame);
+#ifdef APP_LOCAL_RUNTIME
+    UNREFERENCED_PARAMETER(pDispatcherContext);
+    //
+    // When running on Windows 8.1 RTM, we cannot register our vectored exception handler, because that 
+    // version of MRT100.dll does not support it.  However, the binder sets this function as the personality 
+    // routine for every reverse p/invoke, so we can handle hardware exceptions from managed code here.  
+    //
+    EXCEPTION_POINTERS pointers;
+    pointers.ExceptionRecord = pExceptionRecord;
+    pointers.ContextRecord = pContextRecord;
+
+    if (RhpVectoredExceptionHandler(&pointers) == EXCEPTION_CONTINUE_EXECUTION)
+        return ExceptionContinueExecution;
+#endif //APP_LOCAL_RUNTIME
+
+    Thread * pThread = ThreadStore::GetCurrentThread();
+
+    // If the thread is currently in the "do not trigger GC" mode, we must not allocate, we must not reverse pinvoke, or
+    // return from a pinvoke.  All of these things will deadlock with the GC and they all become increasingly likely as
+    // exception dispatch kicks off.  So we just nip this in the bud as early as possible with a FailFast.  The most 
+    // likely case where this occurs is in our GC-callouts for Jupiter lifetime management -- in that case, we have 
+    // managed code that calls to native code (without pinvoking) which might have a bug that causes an AV.  
+    if (pThread->IsDoNotTriggerGcSet())
+        RhFailFast();
+
+    // We promote exceptions that were not converted to managed exceptions to a FailFast.  However, we have to
+    // be careful because we got here via OS SEH infrastructure and, therefore, don't know what GC mode we're
+    // currently in.  As a result, since we're calling back into managed code to handle the FailFast, we must
+    // correctly call either a UnmanagedCallersOnly or a RuntimeExport version of the same method.
+    if (pThread->IsCurrentThreadInCooperativeMode())
+    {
+        // Cooperative mode -- Typically, RhpVectoredExceptionHandler will handle this because the faulting IP will be
+        // in managed code.  But sometimes we AV on a bad call indirect or something similar.  In that situation, we can
+        // use the dispatcher context or exception registration record to find the relevant classlib.
+#ifdef HOST_X86
+        IntNative classlibBreadcrumb = ((EXCEPTION_REGISTRATION_RECORD*)EstablisherFrame)->Handler;
+#else
+        IntNative classlibBreadcrumb = pDispatcherContext->ControlPc;
+#endif
+        RhpFailFastForPInvokeExceptionCoop(classlibBreadcrumb, pExceptionRecord, pContextRecord);
+    }
+    else
+    {
+        // Preemptive mode -- the classlib associated with the last pinvoke owns the fail fast behavior.
+        IntNative pinvokeCallsiteReturnAddr = (IntNative)pThread->GetCurrentThreadPInvokeReturnAddress();
+        RhpFailFastForPInvokeExceptionPreemp(pinvokeCallsiteReturnAddr, pExceptionRecord, pContextRecord);
+    }
+
+    return 0;
+}
+#else
+EXTERN_C Int32 RhpPInvokeExceptionGuard()
+{
+    ASSERT_UNCONDITIONALLY("RhpPInvokeExceptionGuard NYI for this architecture!");
+    RhFailFast();
+    return 0;
+}
+#endif
+
+#if defined(HOST_AMD64) || defined(HOST_ARM) || defined(HOST_X86) || defined(HOST_ARM64) || defined(HOST_WASM)
+EXTERN_C REDHAWK_API void __fastcall RhpThrowHwEx();
+#else
+COOP_PINVOKE_HELPER(void, RhpThrowHwEx, ())
+{
+    ASSERT_UNCONDITIONALLY("RhpThrowHwEx NYI for this architecture!");
+}
+COOP_PINVOKE_HELPER(void, RhpThrowEx, ())
+{
+    ASSERT_UNCONDITIONALLY("RhpThrowEx NYI for this architecture!");
+}
+COOP_PINVOKE_HELPER(void, RhpCallCatchFunclet, ())
+{
+    ASSERT_UNCONDITIONALLY("RhpCallCatchFunclet NYI for this architecture!");
+}
+COOP_PINVOKE_HELPER(void, RhpCallFinallyFunclet, ())
+{
+    ASSERT_UNCONDITIONALLY("RhpCallFinallyFunclet NYI for this architecture!");
+}
+COOP_PINVOKE_HELPER(void, RhpCallFilterFunclet, ())
+{
+    ASSERT_UNCONDITIONALLY("RhpCallFilterFunclet NYI for this architecture!");
+}
+COOP_PINVOKE_HELPER(void, RhpRethrow, ())
+{
+    ASSERT_UNCONDITIONALLY("RhpRethrow NYI for this architecture!");
+}
+
+EXTERN_C void* RhpCallCatchFunclet2 = NULL;
+EXTERN_C void* RhpCallFinallyFunclet2 = NULL;
+EXTERN_C void* RhpCallFilterFunclet2 = NULL;
+EXTERN_C void* RhpThrowEx2   = NULL;
+EXTERN_C void* RhpThrowHwEx2 = NULL;
+EXTERN_C void* RhpRethrow2   = NULL;
+#endif
+
+EXTERN_C void * RhpAssignRefAVLocation;
+EXTERN_C void * RhpCheckedAssignRefAVLocation;
+EXTERN_C void * RhpCheckedLockCmpXchgAVLocation;
+EXTERN_C void * RhpCheckedXchgAVLocation;
+EXTERN_C void * RhpLockCmpXchg32AVLocation;
+EXTERN_C void * RhpLockCmpXchg64AVLocation;
+EXTERN_C void * RhpCopyMultibyteDestAVLocation;
+EXTERN_C void * RhpCopyMultibyteSrcAVLocation;
+EXTERN_C void * RhpCopyMultibyteNoGCRefsDestAVLocation;
+EXTERN_C void * RhpCopyMultibyteNoGCRefsSrcAVLocation;
+EXTERN_C void * RhpCopyMultibyteWithWriteBarrierDestAVLocation;
+EXTERN_C void * RhpCopyMultibyteWithWriteBarrierSrcAVLocation;
+EXTERN_C void * RhpCopyAnyWithWriteBarrierDestAVLocation;
+EXTERN_C void * RhpCopyAnyWithWriteBarrierSrcAVLocation;
+
+static bool InWriteBarrierHelper(UIntNative faultingIP)
+{
+#ifndef USE_PORTABLE_HELPERS
+    static UIntNative writeBarrierAVLocations[] = 
+    {
+        (UIntNative)&RhpAssignRefAVLocation,
+        (UIntNative)&RhpCheckedAssignRefAVLocation,
+        (UIntNative)&RhpCheckedLockCmpXchgAVLocation,
+        (UIntNative)&RhpCheckedXchgAVLocation,
+        (UIntNative)&RhpLockCmpXchg32AVLocation,
+        (UIntNative)&RhpLockCmpXchg64AVLocation,
+    };
+
+    // compare the IP against the list of known possible AV locations in the write barrier helpers
+    for (size_t i = 0; i < sizeof(writeBarrierAVLocations)/sizeof(writeBarrierAVLocations[0]); i++)
+    {
+#if defined(HOST_AMD64) || defined(HOST_X86)
+        // Verify that the runtime is not linked with incremental linking enabled. Incremental linking
+        // wraps every method symbol with a jump stub that breaks the following check.
+        ASSERT(*(UInt8*)writeBarrierAVLocations[i] != 0xE9); // jmp XXXXXXXX
+#endif
+
+        if (writeBarrierAVLocations[i] == faultingIP)
+            return true;
+    }
+#endif // USE_PORTABLE_HELPERS
+
+    return false;
+}
+
+static UIntNative UnwindWriteBarrierToCaller(
+#ifdef TARGET_UNIX
+    PAL_LIMITED_CONTEXT * pContext
+#else
+    _CONTEXT * pContext
+#endif
+    )
+{
+#if defined(_DEBUG)
+    UIntNative faultingIP = pContext->GetIp();
+    ASSERT(InWriteBarrierHelper(faultingIP));
+#endif
+#if defined(HOST_AMD64) || defined(HOST_X86)
+    // simulate a ret instruction
+    UIntNative sp = pContext->GetSp();
+    UIntNative adjustedFaultingIP = *(UIntNative *)sp;
+    pContext->SetSp(sp+sizeof(UIntNative)); // pop the stack
+#elif defined(HOST_ARM) || defined(HOST_ARM64)
+    UIntNative adjustedFaultingIP = pContext->GetLr();
+#else
+    UIntNative adjustedFaultingIP = 0; // initializing to make the compiler happy
+    PORTABILITY_ASSERT("UnwindWriteBarrierToCaller");
+#endif
+    return adjustedFaultingIP;
+}
+
+#ifdef TARGET_UNIX
+
+Int32 __stdcall RhpHardwareExceptionHandler(UIntNative faultCode, UIntNative faultAddress,
+    PAL_LIMITED_CONTEXT* palContext, UIntNative* arg0Reg, UIntNative* arg1Reg)
+{
+    UIntNative faultingIP = palContext->GetIp();
+
+    ICodeManager * pCodeManager = GetRuntimeInstance()->FindCodeManagerByAddress((PTR_VOID)faultingIP);
+    if ((pCodeManager != NULL) || (faultCode == STATUS_ACCESS_VIOLATION && InWriteBarrierHelper(faultingIP)))
+    {
+        // Make sure that the OS does not use our internal fault codes
+        ASSERT(faultCode != STATUS_REDHAWK_NULL_REFERENCE && faultCode != STATUS_REDHAWK_WRITE_BARRIER_NULL_REFERENCE);
+
+        if (faultCode == STATUS_ACCESS_VIOLATION)
+        {
+            if (faultAddress < NULL_AREA_SIZE)
+            {
+                faultCode = pCodeManager ? STATUS_REDHAWK_NULL_REFERENCE : STATUS_REDHAWK_WRITE_BARRIER_NULL_REFERENCE;
+            }
+
+            if (pCodeManager == NULL)
+            {
+                // we were AV-ing in a write barrier helper - unwind our way to our caller
+                faultingIP = UnwindWriteBarrierToCaller(palContext);
+            }
+        }
+        else if (faultCode == STATUS_STACK_OVERFLOW)
+        {
+            // Do not use ASSERT_UNCONDITIONALLY here. It will crash because of it consumes too much stack.
+
+            PalPrintFatalError("\nProcess is terminating due to StackOverflowException.\n");
+            RhFailFast();
+        }
+
+        *arg0Reg = faultCode;
+        *arg1Reg = faultingIP;
+        palContext->SetIp((UIntNative)&RhpThrowHwEx);
+
+        return EXCEPTION_CONTINUE_EXECUTION;
+    }
+
+    return EXCEPTION_CONTINUE_SEARCH;
+}
+
+#else // TARGET_UNIX
+
+Int32 __stdcall RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs)
+{
+    UIntNative faultingIP = pExPtrs->ContextRecord->GetIp();
+
+    ICodeManager * pCodeManager = GetRuntimeInstance()->FindCodeManagerByAddress((PTR_VOID)faultingIP);
+    UIntNative faultCode = pExPtrs->ExceptionRecord->ExceptionCode;
+    if ((pCodeManager != NULL) || (faultCode == STATUS_ACCESS_VIOLATION && InWriteBarrierHelper(faultingIP)))
+    {
+        // Make sure that the OS does not use our internal fault codes
+        ASSERT(faultCode != STATUS_REDHAWK_NULL_REFERENCE && faultCode != STATUS_REDHAWK_WRITE_BARRIER_NULL_REFERENCE);
+
+        if (faultCode == STATUS_ACCESS_VIOLATION)
+        {
+            if (pExPtrs->ExceptionRecord->ExceptionInformation[1] < NULL_AREA_SIZE)
+            {
+                faultCode = pCodeManager ? STATUS_REDHAWK_NULL_REFERENCE : STATUS_REDHAWK_WRITE_BARRIER_NULL_REFERENCE;
+            }
+
+            if (pCodeManager == NULL)
+            {
+                // we were AV-ing in a write barrier helper - unwind our way to our caller
+                faultingIP = UnwindWriteBarrierToCaller(pExPtrs->ContextRecord);
+            }
+        }
+        else if (faultCode == STATUS_STACK_OVERFLOW)
+        {
+            // Do not use ASSERT_UNCONDITIONALLY here. It will crash because of it consumes too much stack.
+
+            PalPrintFatalError("\nProcess is terminating due to StackOverflowException.\n");
+            PalRaiseFailFastException(pExPtrs->ExceptionRecord, pExPtrs->ContextRecord, 0);
+        }
+
+        pExPtrs->ContextRecord->SetIp((UIntNative)&RhpThrowHwEx);
+        pExPtrs->ContextRecord->SetArg0Reg(faultCode);
+        pExPtrs->ContextRecord->SetArg1Reg(faultingIP);
+
+        return EXCEPTION_CONTINUE_EXECUTION;
+    }
+
+    {
+        static UInt8 *s_pbRuntimeModuleLower = NULL;
+        static UInt8 *s_pbRuntimeModuleUpper = NULL;
+
+        // If this is the first time through this path then calculate the upper and lower bounds of the
+        // runtime module. Note we could be racing to calculate this but it doesn't matter since the results
+        // should always agree.
+        if ((s_pbRuntimeModuleLower == NULL) || (s_pbRuntimeModuleUpper == NULL))
+        {
+            // Get the module handle for this runtime. Do this by passing an address definitely within the
+            // module (the address of this function) to GetModuleHandleEx with the "from address" flag.
+            HANDLE hRuntimeModule = PalGetModuleHandleFromPointer(reinterpret_cast<void*>(RhpVectoredExceptionHandler));
+            if (!hRuntimeModule)
+            {
+                ASSERT_UNCONDITIONALLY("Failed to locate our own module handle");
+                RhFailFast();
+            }
+
+            PalGetModuleBounds(hRuntimeModule, &s_pbRuntimeModuleLower, &s_pbRuntimeModuleUpper);
+        }
+
+        if (((UInt8*)faultingIP >= s_pbRuntimeModuleLower) && ((UInt8*)faultingIP < s_pbRuntimeModuleUpper))
+        {
+            // Generally any form of hardware exception within the runtime itself is considered a fatal error.
+            // Note this includes the managed code within the runtime.
+            ASSERT_UNCONDITIONALLY("Hardware exception raised inside the runtime.");
+            PalRaiseFailFastException(pExPtrs->ExceptionRecord, pExPtrs->ContextRecord, 0);
+        }
+    }
+
+    return EXCEPTION_CONTINUE_SEARCH;
+}
+
+#endif // TARGET_UNIX
+
+COOP_PINVOKE_HELPER(void, RhpFallbackFailFast, ())
+{
+    RhFailFast();
+}
+
+#endif // !DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/EtwEvents.h b/src/coreclr/src/nativeaot/Runtime/EtwEvents.h
new file mode 100644
index 0000000000000..a023af9d464ea
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/EtwEvents.h
@@ -0,0 +1,904 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// **** This file is auto-generated. Do not edit by hand. ****
+//
+// Instead ensure this file and EtwEvents.man are checked-out from source code control, locate the PUCLR ETW
+// manifest file (it should be in puclr\ndp\clr\src\VM\ClrEtwAll.man), copy it into the rh\src\rtetw
+// directory and run the following command from an rhenv window:
+//     perl EtwImportClrEvents.pl
+//
+// This script consults EtwEventFilter.txt to determine which events to extract from the CLR manifest. It then
+// merges any additional Redhawk-specific events from EtwRedhawkEvents.xml. The result is an updated version
+// of this header file plus EtwEvents.man, a new ETW manifest file describing the final Redhawk events which
+// can be registered with the system via the following command:
+//     wevtutil im EtwEvents.man
+//
+
+#ifndef __RH_ETW_DEFS_INCLUDED
+#define __RH_ETW_DEFS_INCLUDED
+
+#if defined(FEATURE_ETW) && !defined(DACCESS_COMPILE)
+
+#ifndef RH_ETW_INLINE
+#define RH_ETW_INLINE __declspec(noinline) __inline
+#endif
+
+struct RH_ETW_CONTEXT
+{
+    TRACEHANDLE               RegistrationHandle;
+    TRACEHANDLE               Logger;
+    UInt64                    MatchAnyKeyword;
+    UInt64                    MatchAllKeyword;
+    EVENT_FILTER_DESCRIPTOR * FilterData;
+    UInt32                    Flags;
+    UInt32                    IsEnabled;
+    UInt8                     Level; 
+    UInt8                     Reserve;
+};
+
+UInt32 EtwCallback(UInt32 IsEnabled, RH_ETW_CONTEXT * CallbackContext);
+
+__declspec(noinline) __inline void __stdcall
+RhEtwControlCallback(GUID * /*SourceId*/, UInt32 IsEnabled, UInt8 Level, UInt64 MatchAnyKeyword, UInt64 MatchAllKeyword, EVENT_FILTER_DESCRIPTOR * FilterData, void * CallbackContext)
+{
+    RH_ETW_CONTEXT * Ctx = (RH_ETW_CONTEXT*)CallbackContext;
+    if (Ctx == NULL)
+        return;
+    Ctx->Level = Level;
+    Ctx->MatchAnyKeyword = MatchAnyKeyword;
+    Ctx->MatchAllKeyword = MatchAllKeyword;
+    Ctx->FilterData = FilterData;
+    Ctx->IsEnabled = IsEnabled;
+    EtwCallback(IsEnabled, (RH_ETW_CONTEXT*)CallbackContext);
+}
+
+__declspec(noinline) __inline bool __stdcall
+ RhEventTracingEnabled(RH_ETW_CONTEXT * EnableInfo,
+                       const EVENT_DESCRIPTOR * EventDescriptor)
+{
+    if (!EnableInfo)
+        return false;
+    if ((EventDescriptor->Level <= EnableInfo->Level) || (EnableInfo->Level == 0))
+    {
+        if ((EventDescriptor->Keyword == (ULONGLONG)0) ||
+            ((EventDescriptor->Keyword & EnableInfo->MatchAnyKeyword) &&
+             ((EventDescriptor->Keyword & EnableInfo->MatchAllKeyword) == EnableInfo->MatchAllKeyword)))
+            return true;
+    }
+    return false;
+}
+
+#define ETW_EVENT_ENABLED(Context, EventDescriptor) (Context.IsEnabled && RhEventTracingEnabled(&Context, &EventDescriptor))
+
+extern "C" __declspec(selectany) const GUID MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER = {0x1095638c, 0x8748, 0x4c7a, {0xb3, 0x9e, 0xba, 0xea, 0x27, 0xb9, 0xc5, 0x89}};
+
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGC1stConEnd = {0xd, 0x0, 0x10, 0x4, 0x1b, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGC1stNonConEnd = {0xc, 0x0, 0x10, 0x4, 0x1a, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGC2ndConBegin = {0x10, 0x0, 0x10, 0x4, 0x1e, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGC2ndConEnd = {0x11, 0x0, 0x10, 0x4, 0x1f, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGC2ndNonConBegin = {0xe, 0x0, 0x10, 0x4, 0x1c, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGC2ndNonConEnd = {0xf, 0x0, 0x10, 0x4, 0x1d, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCAllocWaitBegin = {0x17, 0x0, 0x10, 0x4, 0x25, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCAllocWaitEnd = {0x18, 0x0, 0x10, 0x4, 0x26, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCBegin = {0xb, 0x0, 0x10, 0x4, 0x19, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCDrainMark = {0x14, 0x0, 0x10, 0x4, 0x22, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCOverflow = {0x16, 0x0, 0x10, 0x4, 0x24, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCPlanEnd = {0x12, 0x0, 0x10, 0x4, 0x20, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCRevisit = {0x15, 0x0, 0x10, 0x4, 0x23, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BGCSweepEnd = {0x13, 0x0, 0x10, 0x4, 0x21, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCFullNotify_V1 = {0x19, 0x1, 0x10, 0x4, 0x13, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCGlobalHeapHistory_V1 = {0x5, 0x1, 0x10, 0x4, 0x12, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCJoin_V1 = {0x6, 0x1, 0x10, 0x5, 0x14, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCOptimized_V1 = {0x3, 0x1, 0x10, 0x5, 0x10, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCPerHeapHistory = {0x4, 0x2, 0x10, 0x4, 0x11, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCSettings = {0x2, 0x0, 0x10, 0x4, 0xe, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PinPlugAtGCTime = {0xc7, 0x0, 0x10, 0x5, 0x2c, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PrvDestroyGCHandle = {0xc3, 0x0, 0x10, 0x5, 0x2b, 0x1, 0x8000000000004000};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PrvGCMarkCards_V1 = {0xa, 0x1, 0x10, 0x4, 0x18, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PrvGCMarkFinalizeQueueRoots_V1 = {0x8, 0x1, 0x10, 0x4, 0x16, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PrvGCMarkHandles_V1 = {0x9, 0x1, 0x10, 0x4, 0x17, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PrvGCMarkStackRoots_V1 = {0x7, 0x1, 0x10, 0x4, 0x15, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR PrvSetGCHandle = {0xc2, 0x0, 0x10, 0x5, 0x2a, 0x1, 0x8000000000004000};
+
+extern "C" __declspec(selectany) REGHANDLE Microsoft_Windows_Redhawk_GC_PrivateHandle;
+extern "C" __declspec(selectany) RH_ETW_CONTEXT MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context;
+
+#define RH_ETW_REGISTER_Microsoft_Windows_Redhawk_GC_Private() do { PalEventRegister(&MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER, RhEtwControlCallback, &MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context, &Microsoft_Windows_Redhawk_GC_PrivateHandle); } while (false)
+#define RH_ETW_UNREGISTER_Microsoft_Windows_Redhawk_GC_Private() do { PalEventUnregister(Microsoft_Windows_Redhawk_GC_PrivateHandle); } while (false)
+
+#define FireEtwBGC1stConEnd(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC1stConEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC1stConEnd, ClrInstanceID) : 0
+
+#define FireEtwBGC1stNonConEnd(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC1stNonConEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC1stNonConEnd, ClrInstanceID) : 0
+
+#define FireEtwBGC2ndConBegin(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndConBegin)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndConBegin, ClrInstanceID) : 0
+
+#define FireEtwBGC2ndConEnd(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndConEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndConEnd, ClrInstanceID) : 0
+
+#define FireEtwBGC2ndNonConBegin(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndNonConBegin)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndNonConBegin, ClrInstanceID) : 0
+
+#define FireEtwBGC2ndNonConEnd(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndNonConEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGC2ndNonConEnd, ClrInstanceID) : 0
+
+#define FireEtwBGCAllocWaitBegin(Reason, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCAllocWaitBegin)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCAllocWait(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCAllocWaitBegin, Reason, ClrInstanceID) : 0
+
+#define FireEtwBGCAllocWaitEnd(Reason, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCAllocWaitEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCAllocWait(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCAllocWaitEnd, Reason, ClrInstanceID) : 0
+
+#define FireEtwBGCBegin(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCBegin)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCBegin, ClrInstanceID) : 0
+
+#define FireEtwBGCDrainMark(Objects, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCDrainMark)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCDrainMark(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCDrainMark, Objects, ClrInstanceID) : 0
+
+#define FireEtwBGCOverflow(Min, Max, Objects, IsLarge, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCOverflow)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCOverflow(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCOverflow, Min, Max, Objects, IsLarge, ClrInstanceID) : 0
+
+#define FireEtwBGCPlanEnd(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCPlanEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCPlanEnd, ClrInstanceID) : 0
+
+#define FireEtwBGCRevisit(Pages, Objects, IsLarge, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCRevisit)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCRevisit(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCRevisit, Pages, Objects, IsLarge, ClrInstanceID) : 0
+
+#define FireEtwBGCSweepEnd(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCSweepEnd)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PrivateHandle, &BGCSweepEnd, ClrInstanceID) : 0
+
+#define FireEtwGCFullNotify_V1(GenNumber, IsAlloc, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCFullNotify_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCFullNotify_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCFullNotify_V1, GenNumber, IsAlloc, ClrInstanceID) : 0
+
+#define FireEtwGCGlobalHeapHistory_V1(FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCGlobalHeapHistory_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCGlobalHeap_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCGlobalHeapHistory_V1, FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID) : 0
+
+#define FireEtwGCJoin_V1(Heap, JoinTime, JoinType, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCJoin_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCJoin_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCJoin_V1, Heap, JoinTime, JoinType, ClrInstanceID) : 0
+
+#define FireEtwGCOptimized_V1(DesiredAllocation, NewAllocation, GenerationNumber, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCOptimized_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCOptimized_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCOptimized_V1, DesiredAllocation, NewAllocation, GenerationNumber, ClrInstanceID) : 0
+
+#define FireEtwGCPerHeapHistory() (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCPerHeapHistory)) ? TemplateEventDescriptor(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCPerHeapHistory) : 0
+
+#define FireEtwGCSettings(SegmentSize, LargeObjectSegmentSize, ServerGC) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCSettings)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCSettings(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCSettings, SegmentSize, LargeObjectSegmentSize, ServerGC) : 0
+
+#define FireEtwPinPlugAtGCTime(PlugStart, PlugEnd, GapBeforeSize, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PinPlugAtGCTime)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PinPlugAtGCTime(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PinPlugAtGCTime, PlugStart, PlugEnd, GapBeforeSize, ClrInstanceID) : 0
+
+#define FireEtwPrvDestroyGCHandle(HandleID, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvDestroyGCHandle)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvDestroyGCHandle(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvDestroyGCHandle, HandleID, ClrInstanceID) : 0
+
+#define FireEtwPrvGCMarkCards_V1(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkCards_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvGCMark_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkCards_V1, HeapNum, ClrInstanceID) : 0
+
+#define FireEtwPrvGCMarkFinalizeQueueRoots_V1(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkFinalizeQueueRoots_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvGCMark_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkFinalizeQueueRoots_V1, HeapNum, ClrInstanceID) : 0
+
+#define FireEtwPrvGCMarkHandles_V1(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkHandles_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvGCMark_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkHandles_V1, HeapNum, ClrInstanceID) : 0
+
+#define FireEtwPrvGCMarkStackRoots_V1(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkStackRoots_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvGCMark_V1(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvGCMarkStackRoots_V1, HeapNum, ClrInstanceID) : 0
+
+#define FireEtwPrvSetGCHandle(HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvSetGCHandle)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvSetGCHandle(Microsoft_Windows_Redhawk_GC_PrivateHandle, &PrvSetGCHandle, HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID) : 0
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCAllocWait(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Reason, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[2];
+    EventDataDescCreate(&EventData[0], &Reason, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 2, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCDrainMark(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 Objects, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[2];
+    EventDataDescCreate(&EventData[0], &Objects, sizeof(UInt64));
+    EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 2, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCOverflow(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 Min, UInt64 Max, UInt64 Objects, UInt32 IsLarge, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[5];
+    EventDataDescCreate(&EventData[0], &Min, sizeof(UInt64));
+    EventDataDescCreate(&EventData[1], &Max, sizeof(UInt64));
+    EventDataDescCreate(&EventData[2], &Objects, sizeof(UInt64));
+    EventDataDescCreate(&EventData[3], &IsLarge, sizeof(UInt32));
+    EventDataDescCreate(&EventData[4], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 5, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_BGCRevisit(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 Pages, UInt64 Objects, UInt32 IsLarge, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[4];
+    EventDataDescCreate(&EventData[0], &Pages, sizeof(UInt64));
+    EventDataDescCreate(&EventData[1], &Objects, sizeof(UInt64));
+    EventDataDescCreate(&EventData[2], &IsLarge, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 4, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCFullNotify_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 GenNumber, UInt32 IsAlloc, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[3];
+    EventDataDescCreate(&EventData[0], &GenNumber, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &IsAlloc, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 3, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCGlobalHeap_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 FinalYoungestDesired, Int32 NumHeaps, UInt32 CondemnedGeneration, UInt32 Gen0ReductionCount, UInt32 Reason, UInt32 GlobalMechanisms, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[7];
+    EventDataDescCreate(&EventData[0], &FinalYoungestDesired, sizeof(UInt64));
+    EventDataDescCreate(&EventData[1], &NumHeaps, sizeof(Int32));
+    EventDataDescCreate(&EventData[2], &CondemnedGeneration, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &Gen0ReductionCount, sizeof(UInt32));
+    EventDataDescCreate(&EventData[4], &Reason, sizeof(UInt32));
+    EventDataDescCreate(&EventData[5], &GlobalMechanisms, sizeof(UInt32));
+    EventDataDescCreate(&EventData[6], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 7, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCJoin_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Heap, UInt32 JoinTime, UInt32 JoinType, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[4];
+    EventDataDescCreate(&EventData[0], &Heap, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &JoinTime, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &JoinType, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 4, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCNoUserData(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[1];
+    EventDataDescCreate(&EventData[0], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 1, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCOptimized_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 DesiredAllocation, UInt64 NewAllocation, UInt32 GenerationNumber, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[4];
+    EventDataDescCreate(&EventData[0], &DesiredAllocation, sizeof(UInt64));
+    EventDataDescCreate(&EventData[1], &NewAllocation, sizeof(UInt64));
+    EventDataDescCreate(&EventData[2], &GenerationNumber, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 4, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_GCSettings(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 SegmentSize, UInt64 LargeObjectSegmentSize, UInt32_BOOL ServerGC)
+{
+    EVENT_DATA_DESCRIPTOR EventData[3];
+    EventDataDescCreate(&EventData[0], &SegmentSize, sizeof(UInt64));
+    EventDataDescCreate(&EventData[1], &LargeObjectSegmentSize, sizeof(UInt64));
+    EventDataDescCreate(&EventData[2], &ServerGC, sizeof(UInt32_BOOL));
+    return PalEventWrite(RegHandle, Descriptor, 3, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PinPlugAtGCTime(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, void* PlugStart, void* PlugEnd, void* GapBeforeSize, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[4];
+    EventDataDescCreate(&EventData[0], &PlugStart, sizeof(void*));
+    EventDataDescCreate(&EventData[1], &PlugEnd, sizeof(void*));
+    EventDataDescCreate(&EventData[2], &GapBeforeSize, sizeof(void*));
+    EventDataDescCreate(&EventData[3], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 4, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvDestroyGCHandle(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, void* HandleID, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[2];
+    EventDataDescCreate(&EventData[0], &HandleID, sizeof(void*));
+    EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 2, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvGCMark_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 HeapNum, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[2];
+    EventDataDescCreate(&EventData[0], &HeapNum, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 2, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_PrvSetGCHandle(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, void* HandleID, void* ObjectID, UInt32 Kind, UInt32 Generation, UInt64 AppDomainID, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[6];
+    EventDataDescCreate(&EventData[0], &HandleID, sizeof(void*));
+    EventDataDescCreate(&EventData[1], &ObjectID, sizeof(void*));
+    EventDataDescCreate(&EventData[2], &Kind, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &Generation, sizeof(UInt32));
+    EventDataDescCreate(&EventData[4], &AppDomainID, sizeof(UInt64));
+    EventDataDescCreate(&EventData[5], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 6, EventData);
+}
+
+extern "C" __declspec(selectany) const GUID MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER = {0x47c3ba0c, 0x77f1, 0x4eb0, {0x8d, 0x4d, 0xae, 0xf4, 0x47, 0xf1, 0x6a, 0x85}};
+
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR BulkType = {0xf, 0x0, 0x10, 0x4, 0xa, 0x15, 0x8000000000080000};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR DestroyGCHandle = {0x1f, 0x0, 0x10, 0x4, 0x22, 0x1, 0x8000000000000002};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR ExceptionThrown_V1 = {0x50, 0x1, 0x10, 0x2, 0x1, 0x7, 0x8000000200008000};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCAllocationTick_V1 = {0xa, 0x1, 0x10, 0x5, 0xb, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCAllocationTick_V2 = {0xa, 0x2, 0x10, 0x5, 0xb, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCAllocationTick_V3 = {0xa, 0x3, 0x10, 0x5, 0xb, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkEdge = {0x13, 0x0, 0x10, 0x4, 0x17, 0x1, 0x8000000000100000};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkMovedObjectRanges = {0x16, 0x0, 0x10, 0x4, 0x1a, 0x1, 0x8000000000400000};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkNode = {0x12, 0x0, 0x10, 0x4, 0x16, 0x1, 0x8000000000100000};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkRCW = {0x25, 0x0, 0x10, 0x4, 0x27, 0x1, 0x8000000000100000};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkRootCCW = {0x24, 0x0, 0x10, 0x4, 0x26, 0x1, 0x8000000000100000};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkRootConditionalWeakTableElementEdge = {0x11, 0x0, 0x10, 0x4, 0x15, 0x1, 0x8000000000100000};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkRootEdge = {0x10, 0x0, 0x10, 0x4, 0x14, 0x1, 0x8000000000100000};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCBulkSurvivingObjectRanges = {0x15, 0x0, 0x10, 0x4, 0x19, 0x1, 0x8000000000400000};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCCreateConcurrentThread_V1 = {0xb, 0x1, 0x10, 0x4, 0xc, 0x1, 0x8000000000010001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCCreateSegment_V1 = {0x5, 0x1, 0x10, 0x4, 0x86, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCEnd_V1 = {0x2, 0x1, 0x10, 0x4, 0x2, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCFreeSegment_V1 = {0x6, 0x1, 0x10, 0x4, 0x87, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCGenerationRange = {0x17, 0x0, 0x10, 0x4, 0x1b, 0x1, 0x8000000000400000};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCGlobalHeapHistory_V2 = {0xcd, 0x2, 0x10, 0x4, 0xcd, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCHeapStats_V1 = {0x4, 0x1, 0x10, 0x4, 0x85, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCJoin_V2 = {0xcb, 0x2, 0x10, 0x5, 0xcb, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCMarkFinalizeQueueRoots = {0x1a, 0x0, 0x10, 0x4, 0x1d, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCMarkHandles = {0x1b, 0x0, 0x10, 0x4, 0x1e, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCMarkOlderGenerationRoots = {0x1c, 0x0, 0x10, 0x4, 0x1f, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCMarkStackRoots = {0x19, 0x0, 0x10, 0x4, 0x1c, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCMarkWithType = {0xca, 0x0, 0x10, 0x4, 0xca, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCPerHeapHistory_V3 = {0xcc, 0x3, 0x10, 0x4, 0xcc, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCRestartEEBegin_V1 = {0x7, 0x1, 0x10, 0x4, 0x88, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCRestartEEEnd_V1 = {0x3, 0x1, 0x10, 0x4, 0x84, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCStart_V1 = {0x1, 0x1, 0x10, 0x4, 0x1, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCStart_V2 = {0x1, 0x2, 0x10, 0x4, 0x1, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCSuspendEEBegin_V1 = {0x9, 0x1, 0x10, 0x4, 0xa, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCSuspendEEEnd_V1 = {0x8, 0x1, 0x10, 0x4, 0x89, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCTerminateConcurrentThread_V1 = {0xc, 0x1, 0x10, 0x4, 0xd, 0x1, 0x8000000000010001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR GCTriggered = {0x23, 0x0, 0x10, 0x4, 0x23, 0x1, 0x8000000000000001};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR ModuleLoad_V2 = {0x98, 0x2, 0x10, 0x4, 0x21, 0xa, 0x8000000020000008};
+extern "C" __declspec(selectany) const EVENT_DESCRIPTOR SetGCHandle = {0x1e, 0x0, 0x10, 0x4, 0x21, 0x1, 0x8000000000000002};
+
+extern "C" __declspec(selectany) REGHANDLE Microsoft_Windows_Redhawk_GC_PublicHandle;
+extern "C" __declspec(selectany) RH_ETW_CONTEXT MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context;
+
+#define RH_ETW_REGISTER_Microsoft_Windows_Redhawk_GC_Public() do { PalEventRegister(&MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER, RhEtwControlCallback, &MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context, &Microsoft_Windows_Redhawk_GC_PublicHandle); } while (false)
+#define RH_ETW_UNREGISTER_Microsoft_Windows_Redhawk_GC_Public() do { PalEventUnregister(Microsoft_Windows_Redhawk_GC_PublicHandle); } while (false)
+
+#define FireEtwBulkType(Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &BulkType)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_BulkType(Microsoft_Windows_Redhawk_GC_PublicHandle, &BulkType, Count, ClrInstanceID, Values_Len_, Values) : 0
+
+#define FireEtwDestroyGCHandle(HandleID, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &DestroyGCHandle)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_DestroyGCHandle(Microsoft_Windows_Redhawk_GC_PublicHandle, &DestroyGCHandle, HandleID, ClrInstanceID) : 0
+
+#define FireEtwExceptionThrown_V1(ExceptionType, ExceptionMessage, ExceptionEIP, ExceptionHRESULT, ExceptionFlags, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &ExceptionThrown_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Exception(Microsoft_Windows_Redhawk_GC_PublicHandle, &ExceptionThrown_V1, ExceptionType, ExceptionMessage, ExceptionEIP, ExceptionHRESULT, ExceptionFlags, ClrInstanceID) : 0
+
+#define FireEtwGCAllocationTick_V1(AllocationAmount, AllocationKind, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCAllocationTick_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCAllocationTick_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCAllocationTick_V1, AllocationAmount, AllocationKind, ClrInstanceID) : 0
+
+#define FireEtwGCAllocationTick_V2(AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCAllocationTick_V2)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCAllocationTick_V2(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCAllocationTick_V2, AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex) : 0
+
+#define FireEtwGCAllocationTick_V3(AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex, Address) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCAllocationTick_V3)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCAllocationTick_V3(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCAllocationTick_V3, AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex, Address) : 0
+
+#define FireEtwGCBulkEdge(Index, Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkEdge)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkEdge(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkEdge, Index, Count, ClrInstanceID, Values_Len_, Values) : 0
+
+#define FireEtwGCBulkMovedObjectRanges(Index, Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkMovedObjectRanges)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkMovedObjectRanges(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkMovedObjectRanges, Index, Count, ClrInstanceID, Values_Len_, Values) : 0
+
+#define FireEtwGCBulkNode(Index, Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkNode)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkNode(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkNode, Index, Count, ClrInstanceID, Values_Len_, Values) : 0
+
+#define FireEtwGCBulkRCW(Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRCW)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRCW(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRCW, Count, ClrInstanceID, Values_Len_, Values) : 0
+
+#define FireEtwGCBulkRootCCW(Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRootCCW)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRootCCW(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRootCCW, Count, ClrInstanceID, Values_Len_, Values) : 0
+
+#define FireEtwGCBulkRootConditionalWeakTableElementEdge(Index, Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRootConditionalWeakTableElementEdge)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRootConditionalWeakTableElementEdge(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRootConditionalWeakTableElementEdge, Index, Count, ClrInstanceID, Values_Len_, Values) : 0
+
+#define FireEtwGCBulkRootEdge(Index, Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRootEdge)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRootEdge(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkRootEdge, Index, Count, ClrInstanceID, Values_Len_, Values) : 0
+
+#define FireEtwGCBulkSurvivingObjectRanges(Index, Count, ClrInstanceID, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkSurvivingObjectRanges)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkSurvivingObjectRanges(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCBulkSurvivingObjectRanges, Index, Count, ClrInstanceID, Values_Len_, Values) : 0
+
+#define FireEtwGCCreateConcurrentThread_V1(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCCreateConcurrentThread_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCCreateConcurrentThread(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCCreateConcurrentThread_V1, ClrInstanceID) : 0
+
+#define FireEtwGCCreateSegment_V1(Address, Size, Type, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCCreateSegment_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCCreateSegment_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCCreateSegment_V1, Address, Size, Type, ClrInstanceID) : 0
+
+#define FireEtwGCEnd_V1(Count, Depth, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCEnd_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCEnd_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCEnd_V1, Count, Depth, ClrInstanceID) : 0
+
+#define FireEtwGCFreeSegment_V1(Address, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCFreeSegment_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCFreeSegment_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCFreeSegment_V1, Address, ClrInstanceID) : 0
+
+#define FireEtwGCGenerationRange(Generation, RangeStart, RangeUsedLength, RangeReservedLength, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCGenerationRange)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCGenerationRange(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCGenerationRange, Generation, RangeStart, RangeUsedLength, RangeReservedLength, ClrInstanceID) : 0
+
+#define FireEtwGCGlobalHeapHistory_V2(FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID, PauseMode, MemoryPressure) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCGlobalHeapHistory_V2)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCGlobalHeap_V2(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCGlobalHeapHistory_V2, FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID, PauseMode, MemoryPressure) : 0
+
+#define FireEtwGCHeapStats_V1(GenerationSize0, TotalPromotedSize0, GenerationSize1, TotalPromotedSize1, GenerationSize2, TotalPromotedSize2, GenerationSize3, TotalPromotedSize3, FinalizationPromotedSize, FinalizationPromotedCount, PinnedObjectCount, SinkBlockCount, GCHandleCount, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCHeapStats_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCHeapStats_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCHeapStats_V1, GenerationSize0, TotalPromotedSize0, GenerationSize1, TotalPromotedSize1, GenerationSize2, TotalPromotedSize2, GenerationSize3, TotalPromotedSize3, FinalizationPromotedSize, FinalizationPromotedCount, PinnedObjectCount, SinkBlockCount, GCHandleCount, ClrInstanceID) : 0
+
+#define FireEtwGCJoin_V2(Heap, JoinTime, JoinType, ClrInstanceID, JoinID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCJoin_V2)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCJoin_V2(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCJoin_V2, Heap, JoinTime, JoinType, ClrInstanceID, JoinID) : 0
+
+#define FireEtwGCMarkFinalizeQueueRoots(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkFinalizeQueueRoots)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMark(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkFinalizeQueueRoots, HeapNum, ClrInstanceID) : 0
+
+#define FireEtwGCMarkHandles(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkHandles)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMark(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkHandles, HeapNum, ClrInstanceID) : 0
+
+#define FireEtwGCMarkOlderGenerationRoots(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkOlderGenerationRoots)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMark(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkOlderGenerationRoots, HeapNum, ClrInstanceID) : 0
+
+#define FireEtwGCMarkStackRoots(HeapNum, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkStackRoots)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMark(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkStackRoots, HeapNum, ClrInstanceID) : 0
+
+#define FireEtwGCMarkWithType(HeapNum, ClrInstanceID, Type, Bytes) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkWithType)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMarkWithType(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCMarkWithType, HeapNum, ClrInstanceID, Type, Bytes) : 0
+
+#define FireEtwGCPerHeapHistory_V3(ClrInstanceID, FreeListAllocated, FreeListRejected, EndOfSegAllocated, CondemnedAllocated, PinnedAllocated, PinnedAllocatedAdvance, RunningFreeListEfficiency, CondemnReasons0, CondemnReasons1, CompactMechanisms, ExpandMechanisms, HeapIndex, ExtraGen0Commit, Count, Values_Len_, Values) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCPerHeapHistory_V3)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCPerHeapHistory_V3(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCPerHeapHistory_V3, ClrInstanceID, FreeListAllocated, FreeListRejected, EndOfSegAllocated, CondemnedAllocated, PinnedAllocated, PinnedAllocatedAdvance, RunningFreeListEfficiency, CondemnReasons0, CondemnReasons1, CompactMechanisms, ExpandMechanisms, HeapIndex, ExtraGen0Commit, Count, Values_Len_, Values) : 0
+
+#define FireEtwGCRestartEEBegin_V1(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCRestartEEBegin_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCRestartEEBegin_V1, ClrInstanceID) : 0
+
+#define FireEtwGCRestartEEEnd_V1(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCRestartEEEnd_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCRestartEEEnd_V1, ClrInstanceID) : 0
+
+#define FireEtwGCStart_V1(Count, Depth, Reason, Type, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCStart_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCStart_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCStart_V1, Count, Depth, Reason, Type, ClrInstanceID) : 0
+
+#define FireEtwGCStart_V2(Count, Depth, Reason, Type, ClrInstanceID, ClientSequenceNumber) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCStart_V2)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCStart_V2(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCStart_V2, Count, Depth, Reason, Type, ClrInstanceID, ClientSequenceNumber) : 0
+
+#define FireEtwGCSuspendEEBegin_V1(Reason, Count, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCSuspendEEBegin_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCSuspendEE_V1(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCSuspendEEBegin_V1, Reason, Count, ClrInstanceID) : 0
+
+#define FireEtwGCSuspendEEEnd_V1(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCSuspendEEEnd_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCNoUserData(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCSuspendEEEnd_V1, ClrInstanceID) : 0
+
+#define FireEtwGCTerminateConcurrentThread_V1(ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCTerminateConcurrentThread_V1)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCTerminateConcurrentThread(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCTerminateConcurrentThread_V1, ClrInstanceID) : 0
+
+#define FireEtwGCTriggered(Reason, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCTriggered)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCTriggered(Microsoft_Windows_Redhawk_GC_PublicHandle, &GCTriggered, Reason, ClrInstanceID) : 0
+
+#define FireEtwModuleLoad_V2(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID, ManagedPdbSignature, ManagedPdbAge, ManagedPdbBuildPath, NativePdbSignature, NativePdbAge, NativePdbBuildPath) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &ModuleLoad_V2)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_ModuleLoadUnload_V2(Microsoft_Windows_Redhawk_GC_PublicHandle, &ModuleLoad_V2, ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID, ManagedPdbSignature, ManagedPdbAge, ManagedPdbBuildPath, NativePdbSignature, NativePdbAge, NativePdbBuildPath) : 0
+
+#define FireEtwSetGCHandle(HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID) (MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PublicHandle, &SetGCHandle)) ? Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_SetGCHandle(Microsoft_Windows_Redhawk_GC_PublicHandle, &SetGCHandle, HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID) : 0
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_BulkType(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values)
+{
+    EVENT_DATA_DESCRIPTOR EventData[11];
+    EventDataDescCreate(&EventData[0], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[2], Values, Count * Values_Len_);
+    return PalEventWrite(RegHandle, Descriptor, 3, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_DestroyGCHandle(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, void* HandleID, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[2];
+    EventDataDescCreate(&EventData[0], &HandleID, sizeof(void*));
+    EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 2, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Exception(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, LPCWSTR ExceptionType, LPCWSTR ExceptionMessage, void* ExceptionEIP, UInt32 ExceptionHRESULT, UInt16 ExceptionFlags, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[6];
+    EventDataDescCreate(&EventData[0], (ExceptionType != NULL) ? ExceptionType : L"", (ExceptionType != NULL) ? (ULONG)((wcslen(ExceptionType) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L""));
+    EventDataDescCreate(&EventData[1], (ExceptionMessage != NULL) ? ExceptionMessage : L"", (ExceptionMessage != NULL) ? (ULONG)((wcslen(ExceptionMessage) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L""));
+    EventDataDescCreate(&EventData[2], &ExceptionEIP, sizeof(void*));
+    EventDataDescCreate(&EventData[3], &ExceptionHRESULT, sizeof(UInt32));
+    EventDataDescCreate(&EventData[4], &ExceptionFlags, sizeof(UInt16));
+    EventDataDescCreate(&EventData[5], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 6, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCAllocationTick_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 AllocationAmount, UInt32 AllocationKind, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[3];
+    EventDataDescCreate(&EventData[0], &AllocationAmount, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &AllocationKind, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 3, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCAllocationTick_V2(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 AllocationAmount, UInt32 AllocationKind, UInt16 ClrInstanceID, UInt64 AllocationAmount64, void* TypeID, LPCWSTR TypeName, UInt32 HeapIndex)
+{
+    EVENT_DATA_DESCRIPTOR EventData[7];
+    EventDataDescCreate(&EventData[0], &AllocationAmount, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &AllocationKind, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[3], &AllocationAmount64, sizeof(UInt64));
+    EventDataDescCreate(&EventData[4], &TypeID, sizeof(void*));
+    EventDataDescCreate(&EventData[5], (TypeName != NULL) ? TypeName : L"", (TypeName != NULL) ? (ULONG)((wcslen(TypeName) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L""));
+    EventDataDescCreate(&EventData[6], &HeapIndex, sizeof(UInt32));
+    return PalEventWrite(RegHandle, Descriptor, 7, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCAllocationTick_V3(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 AllocationAmount, UInt32 AllocationKind, UInt16 ClrInstanceID, UInt64 AllocationAmount64, void* TypeID, LPCWSTR TypeName, UInt32 HeapIndex, void* Address)
+{
+    EVENT_DATA_DESCRIPTOR EventData[8];
+    EventDataDescCreate(&EventData[0], &AllocationAmount, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &AllocationKind, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[3], &AllocationAmount64, sizeof(UInt64));
+    EventDataDescCreate(&EventData[4], &TypeID, sizeof(void*));
+    EventDataDescCreate(&EventData[5], (TypeName != NULL) ? TypeName : L"", (TypeName != NULL) ? (ULONG)((wcslen(TypeName) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L""));
+    EventDataDescCreate(&EventData[6], &HeapIndex, sizeof(UInt32));
+    EventDataDescCreate(&EventData[7], &Address, sizeof(void*));
+    return PalEventWrite(RegHandle, Descriptor, 8, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkEdge(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Index, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values)
+{
+    EVENT_DATA_DESCRIPTOR EventData[6];
+    EventDataDescCreate(&EventData[0], &Index, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[3], Values, Count * Values_Len_);
+    return PalEventWrite(RegHandle, Descriptor, 4, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkMovedObjectRanges(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Index, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values)
+{
+    EVENT_DATA_DESCRIPTOR EventData[7];
+    EventDataDescCreate(&EventData[0], &Index, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[3], Values, Count * Values_Len_);
+    return PalEventWrite(RegHandle, Descriptor, 4, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkNode(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Index, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values)
+{
+    EVENT_DATA_DESCRIPTOR EventData[8];
+    EventDataDescCreate(&EventData[0], &Index, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[3], Values, Count * Values_Len_);
+    return PalEventWrite(RegHandle, Descriptor, 4, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRCW(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values)
+{
+    EVENT_DATA_DESCRIPTOR EventData[9];
+    EventDataDescCreate(&EventData[0], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[2], Values, Count * Values_Len_);
+    return PalEventWrite(RegHandle, Descriptor, 3, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRootCCW(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values)
+{
+    EVENT_DATA_DESCRIPTOR EventData[10];
+    EventDataDescCreate(&EventData[0], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[2], Values, Count * Values_Len_);
+    return PalEventWrite(RegHandle, Descriptor, 3, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRootConditionalWeakTableElementEdge(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Index, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values)
+{
+    EVENT_DATA_DESCRIPTOR EventData[7];
+    EventDataDescCreate(&EventData[0], &Index, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[3], Values, Count * Values_Len_);
+    return PalEventWrite(RegHandle, Descriptor, 4, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkRootEdge(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Index, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values)
+{
+    EVENT_DATA_DESCRIPTOR EventData[8];
+    EventDataDescCreate(&EventData[0], &Index, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[3], Values, Count * Values_Len_);
+    return PalEventWrite(RegHandle, Descriptor, 4, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCBulkSurvivingObjectRanges(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Index, UInt32 Count, UInt16 ClrInstanceID, ULONG Values_Len_, const PVOID Values)
+{
+    EVENT_DATA_DESCRIPTOR EventData[6];
+    EventDataDescCreate(&EventData[0], &Index, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[3], Values, Count * Values_Len_);
+    return PalEventWrite(RegHandle, Descriptor, 4, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCCreateConcurrentThread(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[1];
+    EventDataDescCreate(&EventData[0], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 1, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCCreateSegment_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 Address, UInt64 Size, UInt32 Type, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[4];
+    EventDataDescCreate(&EventData[0], &Address, sizeof(UInt64));
+    EventDataDescCreate(&EventData[1], &Size, sizeof(UInt64));
+    EventDataDescCreate(&EventData[2], &Type, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 4, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCEnd_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Count, UInt32 Depth, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[3];
+    EventDataDescCreate(&EventData[0], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &Depth, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 3, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCFreeSegment_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 Address, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[2];
+    EventDataDescCreate(&EventData[0], &Address, sizeof(UInt64));
+    EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 2, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCGenerationRange(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt8 Generation, void* RangeStart, UInt64 RangeUsedLength, UInt64 RangeReservedLength, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[5];
+    EventDataDescCreate(&EventData[0], &Generation, sizeof(UInt8));
+    EventDataDescCreate(&EventData[1], &RangeStart, sizeof(void*));
+    EventDataDescCreate(&EventData[2], &RangeUsedLength, sizeof(UInt64));
+    EventDataDescCreate(&EventData[3], &RangeReservedLength, sizeof(UInt64));
+    EventDataDescCreate(&EventData[4], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 5, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCGlobalHeap_V2(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 FinalYoungestDesired, Int32 NumHeaps, UInt32 CondemnedGeneration, UInt32 Gen0ReductionCount, UInt32 Reason, UInt32 GlobalMechanisms, UInt16 ClrInstanceID, UInt32 PauseMode, UInt32 MemoryPressure)
+{
+    EVENT_DATA_DESCRIPTOR EventData[9];
+    EventDataDescCreate(&EventData[0], &FinalYoungestDesired, sizeof(UInt64));
+    EventDataDescCreate(&EventData[1], &NumHeaps, sizeof(Int32));
+    EventDataDescCreate(&EventData[2], &CondemnedGeneration, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &Gen0ReductionCount, sizeof(UInt32));
+    EventDataDescCreate(&EventData[4], &Reason, sizeof(UInt32));
+    EventDataDescCreate(&EventData[5], &GlobalMechanisms, sizeof(UInt32));
+    EventDataDescCreate(&EventData[6], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[7], &PauseMode, sizeof(UInt32));
+    EventDataDescCreate(&EventData[8], &MemoryPressure, sizeof(UInt32));
+    return PalEventWrite(RegHandle, Descriptor, 9, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCHeapStats_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 GenerationSize0, UInt64 TotalPromotedSize0, UInt64 GenerationSize1, UInt64 TotalPromotedSize1, UInt64 GenerationSize2, UInt64 TotalPromotedSize2, UInt64 GenerationSize3, UInt64 TotalPromotedSize3, UInt64 FinalizationPromotedSize, UInt64 FinalizationPromotedCount, UInt32 PinnedObjectCount, UInt32 SinkBlockCount, UInt32 GCHandleCount, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[14];
+    EventDataDescCreate(&EventData[0], &GenerationSize0, sizeof(UInt64));
+    EventDataDescCreate(&EventData[1], &TotalPromotedSize0, sizeof(UInt64));
+    EventDataDescCreate(&EventData[2], &GenerationSize1, sizeof(UInt64));
+    EventDataDescCreate(&EventData[3], &TotalPromotedSize1, sizeof(UInt64));
+    EventDataDescCreate(&EventData[4], &GenerationSize2, sizeof(UInt64));
+    EventDataDescCreate(&EventData[5], &TotalPromotedSize2, sizeof(UInt64));
+    EventDataDescCreate(&EventData[6], &GenerationSize3, sizeof(UInt64));
+    EventDataDescCreate(&EventData[7], &TotalPromotedSize3, sizeof(UInt64));
+    EventDataDescCreate(&EventData[8], &FinalizationPromotedSize, sizeof(UInt64));
+    EventDataDescCreate(&EventData[9], &FinalizationPromotedCount, sizeof(UInt64));
+    EventDataDescCreate(&EventData[10], &PinnedObjectCount, sizeof(UInt32));
+    EventDataDescCreate(&EventData[11], &SinkBlockCount, sizeof(UInt32));
+    EventDataDescCreate(&EventData[12], &GCHandleCount, sizeof(UInt32));
+    EventDataDescCreate(&EventData[13], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 14, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCJoin_V2(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Heap, UInt32 JoinTime, UInt32 JoinType, UInt16 ClrInstanceID, UInt32 JoinID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[5];
+    EventDataDescCreate(&EventData[0], &Heap, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &JoinTime, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &JoinType, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[4], &JoinID, sizeof(UInt32));
+    return PalEventWrite(RegHandle, Descriptor, 5, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMark(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 HeapNum, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[2];
+    EventDataDescCreate(&EventData[0], &HeapNum, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 2, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCMarkWithType(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 HeapNum, UInt16 ClrInstanceID, UInt32 Type, UInt64 Bytes)
+{
+    EVENT_DATA_DESCRIPTOR EventData[4];
+    EventDataDescCreate(&EventData[0], &HeapNum, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[2], &Type, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &Bytes, sizeof(UInt64));
+    return PalEventWrite(RegHandle, Descriptor, 4, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCNoUserData(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[1];
+    EventDataDescCreate(&EventData[0], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 1, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCPerHeapHistory_V3(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt16 ClrInstanceID, void* FreeListAllocated, void* FreeListRejected, void* EndOfSegAllocated, void* CondemnedAllocated, void* PinnedAllocated, void* PinnedAllocatedAdvance, UInt32 RunningFreeListEfficiency, UInt32 CondemnReasons0, UInt32 CondemnReasons1, UInt32 CompactMechanisms, UInt32 ExpandMechanisms, UInt32 HeapIndex, void* ExtraGen0Commit, UInt32 Count, ULONG Values_Len_, const PVOID Values)
+{
+    EVENT_DATA_DESCRIPTOR EventData[26];
+    EventDataDescCreate(&EventData[0], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[1], &FreeListAllocated, sizeof(void*));
+    EventDataDescCreate(&EventData[2], &FreeListRejected, sizeof(void*));
+    EventDataDescCreate(&EventData[3], &EndOfSegAllocated, sizeof(void*));
+    EventDataDescCreate(&EventData[4], &CondemnedAllocated, sizeof(void*));
+    EventDataDescCreate(&EventData[5], &PinnedAllocated, sizeof(void*));
+    EventDataDescCreate(&EventData[6], &PinnedAllocatedAdvance, sizeof(void*));
+    EventDataDescCreate(&EventData[7], &RunningFreeListEfficiency, sizeof(UInt32));
+    EventDataDescCreate(&EventData[8], &CondemnReasons0, sizeof(UInt32));
+    EventDataDescCreate(&EventData[9], &CondemnReasons1, sizeof(UInt32));
+    EventDataDescCreate(&EventData[10], &CompactMechanisms, sizeof(UInt32));
+    EventDataDescCreate(&EventData[11], &ExpandMechanisms, sizeof(UInt32));
+    EventDataDescCreate(&EventData[12], &HeapIndex, sizeof(UInt32));
+    EventDataDescCreate(&EventData[13], &ExtraGen0Commit, sizeof(void*));
+    EventDataDescCreate(&EventData[14], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[15], Values, Count * Values_Len_);
+    return PalEventWrite(RegHandle, Descriptor, 16, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCStart_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Count, UInt32 Depth, UInt32 Reason, UInt32 Type, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[5];
+    EventDataDescCreate(&EventData[0], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &Depth, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &Reason, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &Type, sizeof(UInt32));
+    EventDataDescCreate(&EventData[4], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 5, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCStart_V2(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Count, UInt32 Depth, UInt32 Reason, UInt32 Type, UInt16 ClrInstanceID, UInt64 ClientSequenceNumber)
+{
+    EVENT_DATA_DESCRIPTOR EventData[6];
+    EventDataDescCreate(&EventData[0], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &Depth, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &Reason, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &Type, sizeof(UInt32));
+    EventDataDescCreate(&EventData[4], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[5], &ClientSequenceNumber, sizeof(UInt64));
+    return PalEventWrite(RegHandle, Descriptor, 6, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCSuspendEE_V1(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Reason, UInt32 Count, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[3];
+    EventDataDescCreate(&EventData[0], &Reason, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &Count, sizeof(UInt32));
+    EventDataDescCreate(&EventData[2], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 3, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCTerminateConcurrentThread(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[1];
+    EventDataDescCreate(&EventData[0], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 1, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_GCTriggered(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt32 Reason, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[2];
+    EventDataDescCreate(&EventData[0], &Reason, sizeof(UInt32));
+    EventDataDescCreate(&EventData[1], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 2, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_ModuleLoadUnload_V2(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, UInt64 ModuleID, UInt64 AssemblyID, UInt32 ModuleFlags, UInt32 Reserved1, LPCWSTR ModuleILPath, LPCWSTR ModuleNativePath, UInt16 ClrInstanceID, const GUID* ManagedPdbSignature, UInt32 ManagedPdbAge, LPCWSTR ManagedPdbBuildPath, const GUID* NativePdbSignature, UInt32 NativePdbAge, LPCWSTR NativePdbBuildPath)
+{
+    EVENT_DATA_DESCRIPTOR EventData[13];
+    EventDataDescCreate(&EventData[0], &ModuleID, sizeof(UInt64));
+    EventDataDescCreate(&EventData[1], &AssemblyID, sizeof(UInt64));
+    EventDataDescCreate(&EventData[2], &ModuleFlags, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &Reserved1, sizeof(UInt32));
+    EventDataDescCreate(&EventData[4], (ModuleILPath != NULL) ? ModuleILPath : L"", (ModuleILPath != NULL) ? (ULONG)((wcslen(ModuleILPath) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L""));
+    EventDataDescCreate(&EventData[5], (ModuleNativePath != NULL) ? ModuleNativePath : L"", (ModuleNativePath != NULL) ? (ULONG)((wcslen(ModuleNativePath) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L""));
+    EventDataDescCreate(&EventData[6], &ClrInstanceID, sizeof(UInt16));
+    EventDataDescCreate(&EventData[7], ManagedPdbSignature, sizeof(*(ManagedPdbSignature)));
+    EventDataDescCreate(&EventData[8], &ManagedPdbAge, sizeof(UInt32));
+    EventDataDescCreate(&EventData[9], (ManagedPdbBuildPath != NULL) ? ManagedPdbBuildPath : L"", (ManagedPdbBuildPath != NULL) ? (ULONG)((wcslen(ManagedPdbBuildPath) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L""));
+    EventDataDescCreate(&EventData[10], NativePdbSignature, sizeof(*(NativePdbSignature)));
+    EventDataDescCreate(&EventData[11], &NativePdbAge, sizeof(UInt32));
+    EventDataDescCreate(&EventData[12], (NativePdbBuildPath != NULL) ? NativePdbBuildPath : L"", (NativePdbBuildPath != NULL) ? (ULONG)((wcslen(NativePdbBuildPath) + 1) * sizeof(WCHAR)) : (ULONG)sizeof(L""));
+    return PalEventWrite(RegHandle, Descriptor, 13, EventData);
+}
+
+RH_ETW_INLINE UInt32
+Template_MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_SetGCHandle(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor, void* HandleID, void* ObjectID, UInt32 Kind, UInt32 Generation, UInt64 AppDomainID, UInt16 ClrInstanceID)
+{
+    EVENT_DATA_DESCRIPTOR EventData[6];
+    EventDataDescCreate(&EventData[0], &HandleID, sizeof(void*));
+    EventDataDescCreate(&EventData[1], &ObjectID, sizeof(void*));
+    EventDataDescCreate(&EventData[2], &Kind, sizeof(UInt32));
+    EventDataDescCreate(&EventData[3], &Generation, sizeof(UInt32));
+    EventDataDescCreate(&EventData[4], &AppDomainID, sizeof(UInt64));
+    EventDataDescCreate(&EventData[5], &ClrInstanceID, sizeof(UInt16));
+    return PalEventWrite(RegHandle, Descriptor, 6, EventData);
+}
+
+RH_ETW_INLINE UInt32
+TemplateEventDescriptor(REGHANDLE RegHandle, const EVENT_DESCRIPTOR * Descriptor)
+{
+    return PalEventWrite(RegHandle, Descriptor, 0, NULL);
+}
+
+#else // FEATURE_ETW
+
+#define ETW_EVENT_ENABLED(Context, EventDescriptor) false
+
+#define FireEtwBGC1stConEnd(ClrInstanceID)
+#define FireEtwBGC1stNonConEnd(ClrInstanceID)
+#define FireEtwBGC2ndConBegin(ClrInstanceID)
+#define FireEtwBGC2ndConEnd(ClrInstanceID)
+#define FireEtwBGC2ndNonConBegin(ClrInstanceID)
+#define FireEtwBGC2ndNonConEnd(ClrInstanceID)
+#define FireEtwBGCAllocWaitBegin(Reason, ClrInstanceID)
+#define FireEtwBGCAllocWaitEnd(Reason, ClrInstanceID)
+#define FireEtwBGCBegin(ClrInstanceID)
+#define FireEtwBGCDrainMark(Objects, ClrInstanceID)
+#define FireEtwBGCOverflow(Min, Max, Objects, IsLarge, ClrInstanceID)
+#define FireEtwBGCPlanEnd(ClrInstanceID)
+#define FireEtwBGCRevisit(Pages, Objects, IsLarge, ClrInstanceID)
+#define FireEtwBGCSweepEnd(ClrInstanceID)
+#define FireEtwGCFullNotify_V1(GenNumber, IsAlloc, ClrInstanceID)
+#define FireEtwGCGlobalHeapHistory_V1(FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID)
+#define FireEtwGCJoin_V1(Heap, JoinTime, JoinType, ClrInstanceID)
+#define FireEtwGCOptimized_V1(DesiredAllocation, NewAllocation, GenerationNumber, ClrInstanceID)
+#define FireEtwGCPerHeapHistory()
+#define FireEtwGCSettings(SegmentSize, LargeObjectSegmentSize, ServerGC)
+#define FireEtwPinPlugAtGCTime(PlugStart, PlugEnd, GapBeforeSize, ClrInstanceID)
+#define FireEtwPrvDestroyGCHandle(HandleID, ClrInstanceID)
+#define FireEtwPrvGCMarkCards_V1(HeapNum, ClrInstanceID)
+#define FireEtwPrvGCMarkFinalizeQueueRoots_V1(HeapNum, ClrInstanceID)
+#define FireEtwPrvGCMarkHandles_V1(HeapNum, ClrInstanceID)
+#define FireEtwPrvGCMarkStackRoots_V1(HeapNum, ClrInstanceID)
+#define FireEtwPrvSetGCHandle(HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID)
+
+#define FireEtwBulkType(Count, ClrInstanceID, Values_Len_, Values)
+#define FireEtwDestroyGCHandle(HandleID, ClrInstanceID)
+#define FireEtwExceptionThrown_V1(ExceptionType, ExceptionMessage, ExceptionEIP, ExceptionHRESULT, ExceptionFlags, ClrInstanceID)
+#define FireEtwGCAllocationTick_V1(AllocationAmount, AllocationKind, ClrInstanceID)
+#define FireEtwGCAllocationTick_V2(AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex)
+#define FireEtwGCAllocationTick_V3(AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex, Address)
+#define FireEtwGCBulkEdge(Index, Count, ClrInstanceID, Values_Len_, Values)
+#define FireEtwGCBulkMovedObjectRanges(Index, Count, ClrInstanceID, Values_Len_, Values)
+#define FireEtwGCBulkNode(Index, Count, ClrInstanceID, Values_Len_, Values)
+#define FireEtwGCBulkRCW(Count, ClrInstanceID, Values_Len_, Values)
+#define FireEtwGCBulkRootCCW(Count, ClrInstanceID, Values_Len_, Values)
+#define FireEtwGCBulkRootConditionalWeakTableElementEdge(Index, Count, ClrInstanceID, Values_Len_, Values)
+#define FireEtwGCBulkRootEdge(Index, Count, ClrInstanceID, Values_Len_, Values)
+#define FireEtwGCBulkSurvivingObjectRanges(Index, Count, ClrInstanceID, Values_Len_, Values)
+#define FireEtwGCCreateConcurrentThread_V1(ClrInstanceID)
+#define FireEtwGCCreateSegment_V1(Address, Size, Type, ClrInstanceID)
+#define FireEtwGCEnd_V1(Count, Depth, ClrInstanceID)
+#define FireEtwGCFreeSegment_V1(Address, ClrInstanceID)
+#define FireEtwGCGenerationRange(Generation, RangeStart, RangeUsedLength, RangeReservedLength, ClrInstanceID)
+#define FireEtwGCGlobalHeapHistory_V2(FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID, PauseMode, MemoryPressure)
+#define FireEtwGCHeapStats_V1(GenerationSize0, TotalPromotedSize0, GenerationSize1, TotalPromotedSize1, GenerationSize2, TotalPromotedSize2, GenerationSize3, TotalPromotedSize3, FinalizationPromotedSize, FinalizationPromotedCount, PinnedObjectCount, SinkBlockCount, GCHandleCount, ClrInstanceID)
+#define FireEtwGCJoin_V2(Heap, JoinTime, JoinType, ClrInstanceID, JoinID)
+#define FireEtwGCMarkFinalizeQueueRoots(HeapNum, ClrInstanceID)
+#define FireEtwGCMarkHandles(HeapNum, ClrInstanceID)
+#define FireEtwGCMarkOlderGenerationRoots(HeapNum, ClrInstanceID)
+#define FireEtwGCMarkStackRoots(HeapNum, ClrInstanceID)
+#define FireEtwGCMarkWithType(HeapNum, ClrInstanceID, Type, Bytes)
+#define FireEtwGCPerHeapHistory_V3(ClrInstanceID, FreeListAllocated, FreeListRejected, EndOfSegAllocated, CondemnedAllocated, PinnedAllocated, PinnedAllocatedAdvance, RunningFreeListEfficiency, CondemnReasons0, CondemnReasons1, CompactMechanisms, ExpandMechanisms, HeapIndex, ExtraGen0Commit, Count, Values_Len_, Values)
+#define FireEtwGCRestartEEBegin_V1(ClrInstanceID)
+#define FireEtwGCRestartEEEnd_V1(ClrInstanceID)
+#define FireEtwGCStart_V1(Count, Depth, Reason, Type, ClrInstanceID)
+#define FireEtwGCStart_V2(Count, Depth, Reason, Type, ClrInstanceID, ClientSequenceNumber)
+#define FireEtwGCSuspendEEBegin_V1(Reason, Count, ClrInstanceID)
+#define FireEtwGCSuspendEEEnd_V1(ClrInstanceID)
+#define FireEtwGCTerminateConcurrentThread_V1(ClrInstanceID)
+#define FireEtwGCTriggered(Reason, ClrInstanceID)
+#define FireEtwModuleLoad_V2(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID, ManagedPdbSignature, ManagedPdbAge, ManagedPdbBuildPath, NativePdbSignature, NativePdbAge, NativePdbBuildPath)
+#define FireEtwSetGCHandle(HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID)
+
+#endif // FEATURE_ETW
+
+#endif // !__RH_ETW_DEFS_INCLUDED
diff --git a/src/coreclr/src/nativeaot/Runtime/FinalizerHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/FinalizerHelpers.cpp
new file mode 100644
index 0000000000000..84104ab005ded
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/FinalizerHelpers.cpp
@@ -0,0 +1,262 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Unmanaged helpers called by the managed finalizer thread.
+//
+#include "common.h"
+#include "gcenv.h"
+#include "gcheaputilities.h"
+
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "RWLock.h"
+#include "RuntimeInstance.h"
+#include "shash.h"
+
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+
+#include "thread.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "thread.inl"
+
+#include "yieldprocessornormalized.h"
+
+GPTR_DECL(Thread, g_pFinalizerThread);
+
+CLREventStatic g_FinalizerEvent;
+CLREventStatic g_FinalizerDoneEvent;
+
+// Finalizer method implemented by redhawkm.
+extern "C" void __cdecl ProcessFinalizers();
+
+// Unmanaged front-end to the finalizer thread. We require this because at the point the GC creates the
+// finalizer thread we're still executing the DllMain for RedhawkU. At that point we can't run managed code
+// successfully (in particular module initialization code has not run for RedhawkM). Instead this method waits
+// for the first finalization request (by which time everything must be up and running) and kicks off the
+// managed portion of the thread at that point.
+UInt32 WINAPI FinalizerStart(void* pContext)
+{
+    HANDLE hFinalizerEvent = (HANDLE)pContext;
+
+    ThreadStore::AttachCurrentThread();
+    Thread * pThread = ThreadStore::GetCurrentThread();
+
+    // Disallow gcstress on this thread to work around the current implementation's limitation that it will 
+    // get into an infinite loop if performed on the finalizer thread.
+    pThread->SetSuppressGcStress();
+
+    g_pFinalizerThread = PTR_Thread(pThread);
+
+    // We have some time until the first finalization request - use the time to calibrate normalized waits.
+    EnsureYieldProcessorNormalizedInitialized();
+
+    // Wait for a finalization request.
+    UInt32 uResult = PalWaitForSingleObjectEx(hFinalizerEvent, INFINITE, FALSE);
+    ASSERT(uResult == WAIT_OBJECT_0);
+
+    // Since we just consumed the request (and the event is auto-reset) we must set the event again so the
+    // managed finalizer code will immediately start processing the queue when we run it.
+    UInt32_BOOL fResult = PalSetEvent(hFinalizerEvent);
+    ASSERT(fResult);
+
+    // Run the managed portion of the finalizer. Until we implement (non-process) shutdown this call will
+    // never return.
+
+    ProcessFinalizers();
+
+    ASSERT(!"Finalizer thread should never return");
+    return 0;
+}
+
+bool RhStartFinalizerThread()
+{
+#ifdef APP_LOCAL_RUNTIME
+
+    //
+    // On app-local runtimes, if we're running with the fallback PAL code (meaning we don't have IManagedRuntimeServices)
+    // then we use the WinRT ThreadPool to create the finalizer thread.  This might fail at startup, if the current thread
+    // hasn't been CoInitialized.  So we need to retry this later.  We use fFinalizerThreadCreated to track whether we've
+    // successfully created the finalizer thread yet, and also as a sort of lock to make sure two threads don't try
+    // to create the finalizer thread at the same time.
+    //
+    static volatile Int32 fFinalizerThreadCreated;
+
+    if (Interlocked::Exchange(&fFinalizerThreadCreated, 1) != 1)
+    {
+        if (!PalStartFinalizerThread(FinalizerStart, (void*)g_FinalizerEvent.GetOSEvent()))
+        {
+            // Need to try again another time...
+            Interlocked::Exchange(&fFinalizerThreadCreated, 0);
+        }
+    }
+
+    // We always return true, so the GC can start even if we failed. 
+    return true;
+
+#else // APP_LOCAL_RUNTIME
+
+    //
+    // If this isn't an app-local runtime, then the PAL will just call CreateThread directly, which should succeed
+    // under normal circumstances.
+    //
+    if (PalStartFinalizerThread(FinalizerStart, (void*)g_FinalizerEvent.GetOSEvent()))
+        return true;
+    else
+        return false;
+
+#endif // APP_LOCAL_RUNTIME
+}
+
+bool RhInitializeFinalization()
+{
+    // Allocate the events the GC expects the finalizer thread to have. The g_FinalizerEvent event is signalled
+    // by the GC whenever it completes a collection where it found otherwise unreachable finalizable objects.
+    // The g_FinalizerDoneEvent is set by the finalizer thread every time it wakes up and drains the
+    // queue of finalizable objects. It's mainly used by GC.WaitForPendingFinalizers().
+    if (!g_FinalizerEvent.CreateAutoEventNoThrow(false))
+        return false;
+    if (!g_FinalizerDoneEvent.CreateManualEventNoThrow(false))
+        return false;
+
+    // Create the finalizer thread itself.
+    if (!RhStartFinalizerThread())
+        return false;
+
+    return true;
+}
+
+void RhEnableFinalization()
+{
+    g_FinalizerEvent.Set();
+}
+
+EXTERN_C REDHAWK_API void __cdecl RhInitializeFinalizerThread()
+{
+#ifdef APP_LOCAL_RUNTIME
+    // We may have failed to create the finalizer thread at startup.
+    // Try again now.
+    RhStartFinalizerThread();
+#endif
+
+    g_FinalizerEvent.Set();
+}
+
+EXTERN_C REDHAWK_API void __cdecl RhWaitForPendingFinalizers(UInt32_BOOL allowReentrantWait)
+{
+    // This must be called via p/invoke rather than RuntimeImport since it blocks and could starve the GC if
+    // called in cooperative mode.
+    ASSERT(!ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode());
+
+    // Can't call this from the finalizer thread itself.
+    if (ThreadStore::GetCurrentThread() != g_pFinalizerThread)
+    {
+        // Clear any current indication that a finalization pass is finished and wake the finalizer thread up
+        // (if there's no work to do it'll set the done event immediately).
+        g_FinalizerDoneEvent.Reset();
+        g_FinalizerEvent.Set();
+
+#ifdef APP_LOCAL_RUNTIME
+        // We may have failed to create the finalizer thread at startup.
+        // Try again now.
+        RhStartFinalizerThread();
+#endif
+
+        // Wait for the finalizer thread to get back to us.
+        g_FinalizerDoneEvent.Wait(INFINITE, false, allowReentrantWait);
+    }
+}
+
+// Block the current thread until at least one object needs to be finalized (returns true) or memory is low
+// (returns false and the finalizer thread should initiate a garbage collection).
+EXTERN_C REDHAWK_API UInt32_BOOL __cdecl RhpWaitForFinalizerRequest()
+{
+    // We can wait for two events; finalization queue has been populated and low memory resource notification.
+    // But if the latter is signalled we shouldn't wait on it again immediately -- if the garbage collection
+    // the finalizer thread initiates as a result is not sufficient to remove the low memory condition the
+    // event will still be signalled and we'll end up looping doing cpu intensive collections, which won't
+    // help the situation at all and could make it worse. So we remember whether the last event we reported
+    // was low memory and if so we'll wait at least two seconds (the CLR value) on just a finalization
+    // request.
+    static bool fLastEventWasLowMemory = false;
+
+    IGCHeap * pHeap = GCHeapUtilities::GetGCHeap();
+
+    // Wait in a loop because we may have to retry if we decide to only wait for finalization events but the
+    // two second timeout expires.
+    do
+    {
+        HANDLE  lowMemEvent = NULL;
+#if 0 // TODO: hook up low memory notification
+        lowMemEvent = pHeap->GetLowMemoryNotificationEvent();
+        HANDLE  rgWaitHandles[] = { g_FinalizerEvent.GetOSEvent(), lowMemEvent };
+        UInt32  cWaitHandles = (fLastEventWasLowMemory || (lowMemEvent == NULL)) ? 1 : 2;
+        UInt32  uTimeout = fLastEventWasLowMemory ? 2000 : INFINITE;
+
+        UInt32 uResult = PalWaitForMultipleObjectsEx(cWaitHandles, rgWaitHandles, FALSE, uTimeout, FALSE);
+#else
+        UInt32 uResult = PalWaitForSingleObjectEx(g_FinalizerEvent.GetOSEvent(), INFINITE, FALSE);
+#endif
+
+        switch (uResult)
+        {
+        case WAIT_OBJECT_0:
+            // At least one object is ready for finalization.
+            return TRUE;
+
+        case WAIT_OBJECT_0 + 1:
+            // Memory is low, tell the finalizer thread to garbage collect.
+            ASSERT(!fLastEventWasLowMemory);
+            fLastEventWasLowMemory = true;
+            return FALSE;
+
+        case WAIT_TIMEOUT:
+            // We were waiting only for finalization events but didn't get one within the timeout period. Go
+            // back to waiting for any event.
+            ASSERT(fLastEventWasLowMemory);
+            fLastEventWasLowMemory = false;
+            break;
+
+        default:
+            ASSERT(!"Unexpected PalWaitForMultipleObjectsEx() result");
+            return FALSE;
+        }
+    } while (true);
+}
+
+// Indicate that the current round of finalizations is complete.
+EXTERN_C REDHAWK_API void __cdecl RhpSignalFinalizationComplete()
+{
+    g_FinalizerDoneEvent.Set();
+}
+
+//
+// The following helpers are special in that they interact with internal GC state or directly manipulate
+// managed references so they're called with a special co-operative p/invoke.
+//
+
+// Fetch next object which needs finalization or return null if we've reached the end of the list.
+COOP_PINVOKE_HELPER(OBJECTREF, RhpGetNextFinalizableObject, ())
+{
+    while (true)
+    {
+        // Get the next finalizable object. If we get back NULL we've reached the end of the list.
+        OBJECTREF refNext = GCHeapUtilities::GetGCHeap()->GetNextFinalizable();
+        if (refNext == NULL)
+            return NULL;
+
+        // The queue may contain objects which have been marked as finalized already (via GC.SuppressFinalize()
+        // for instance). Skip finalization for these but reset the flag so that the object can be put back on
+        // the list with RegisterForFinalization().
+        if (refNext->GetHeader()->GetBits() & BIT_SBLK_FINALIZER_RUN)
+        {
+            refNext->GetHeader()->ClrBit(BIT_SBLK_FINALIZER_RUN);
+            continue;
+        }
+
+        // We've found the first finalizable object, return it to the caller.
+        return refNext;
+    }
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/GCHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/GCHelpers.cpp
new file mode 100644
index 0000000000000..66f04a1b0a2c5
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/GCHelpers.cpp
@@ -0,0 +1,433 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Unmanaged helpers exposed by the System.GC managed class.
+//
+
+#include "common.h"
+#include "gcenv.h"
+#include "gcenv.ee.h"
+#include "gcheaputilities.h"
+#include "RestrictedCallouts.h"
+
+#include "gcrhinterface.h"
+
+#include "PalRedhawkCommon.h"
+#include "slist.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+
+#include "thread.h"
+#include "RWLock.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "thread.inl"
+
+EXTERN_C REDHAWK_API void __cdecl RhpCollect(UInt32 uGeneration, UInt32 uMode)
+{
+    // This must be called via p/invoke rather than RuntimeImport to make the stack crawlable.
+
+    Thread * pCurThread = ThreadStore::GetCurrentThread();
+
+    pCurThread->SetupHackPInvokeTunnel();
+    pCurThread->DisablePreemptiveMode();
+
+    ASSERT(!pCurThread->IsDoNotTriggerGcSet());
+    GCHeapUtilities::GetGCHeap()->GarbageCollect(uGeneration, FALSE, uMode);
+
+    pCurThread->EnablePreemptiveMode();
+}
+
+EXTERN_C REDHAWK_API Int64 __cdecl RhpGetGcTotalMemory()
+{
+    // This must be called via p/invoke rather than RuntimeImport to make the stack crawlable.
+
+    Thread * pCurThread = ThreadStore::GetCurrentThread();
+
+    pCurThread->SetupHackPInvokeTunnel();
+    pCurThread->DisablePreemptiveMode();
+
+    Int64 ret = GCHeapUtilities::GetGCHeap()->GetTotalBytesInUse();
+
+    pCurThread->EnablePreemptiveMode();
+
+    return ret;
+}
+
+EXTERN_C REDHAWK_API Int32 __cdecl RhpStartNoGCRegion(Int64 totalSize, Boolean hasLohSize, Int64 lohSize, Boolean disallowFullBlockingGC)
+{
+    Thread *pCurThread = ThreadStore::GetCurrentThread();
+    ASSERT(!pCurThread->IsCurrentThreadInCooperativeMode());
+
+    pCurThread->SetupHackPInvokeTunnel();
+    pCurThread->DisablePreemptiveMode();
+
+    int result = GCHeapUtilities::GetGCHeap()->StartNoGCRegion(totalSize, hasLohSize, lohSize, disallowFullBlockingGC);
+
+    pCurThread->EnablePreemptiveMode();
+
+    return result;
+}
+
+EXTERN_C REDHAWK_API Int32 __cdecl RhpEndNoGCRegion()
+{
+    ASSERT(!ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode());
+
+    return GCHeapUtilities::GetGCHeap()->EndNoGCRegion();
+}
+
+COOP_PINVOKE_HELPER(void, RhSuppressFinalize, (OBJECTREF refObj))
+{
+    if (!refObj->get_EEType()->HasFinalizer())
+        return;
+    GCHeapUtilities::GetGCHeap()->SetFinalizationRun(refObj);
+}
+
+COOP_PINVOKE_HELPER(Boolean, RhReRegisterForFinalize, (OBJECTREF refObj))
+{
+    if (!refObj->get_EEType()->HasFinalizer())
+        return Boolean_true;
+    return GCHeapUtilities::GetGCHeap()->RegisterForFinalization(-1, refObj) ? Boolean_true : Boolean_false;
+}
+
+COOP_PINVOKE_HELPER(Int32, RhGetMaxGcGeneration, ())
+{
+    return GCHeapUtilities::GetGCHeap()->GetMaxGeneration();
+}
+
+COOP_PINVOKE_HELPER(Int32, RhGetGcCollectionCount, (Int32 generation, Boolean getSpecialGCCount))
+{
+    return GCHeapUtilities::GetGCHeap()->CollectionCount(generation, getSpecialGCCount);
+}
+
+COOP_PINVOKE_HELPER(Int32, RhGetGeneration, (OBJECTREF obj))
+{
+    return GCHeapUtilities::GetGCHeap()->WhichGeneration(obj);
+}
+
+COOP_PINVOKE_HELPER(Int32, RhGetGcLatencyMode, ())
+{
+    return GCHeapUtilities::GetGCHeap()->GetGcLatencyMode();
+}
+
+COOP_PINVOKE_HELPER(Int32, RhSetGcLatencyMode, (Int32 newLatencyMode))
+{
+    return GCHeapUtilities::GetGCHeap()->SetGcLatencyMode(newLatencyMode);
+}
+
+COOP_PINVOKE_HELPER(Boolean, RhIsServerGc, ())
+{
+    return GCHeapUtilities::IsServerHeap();
+}
+
+COOP_PINVOKE_HELPER(Boolean, RhRegisterGcCallout, (GcRestrictedCalloutKind eKind, void * pCallout))
+{
+    return RestrictedCallouts::RegisterGcCallout(eKind, pCallout);
+}
+
+COOP_PINVOKE_HELPER(void, RhUnregisterGcCallout, (GcRestrictedCalloutKind eKind, void * pCallout))
+{
+    RestrictedCallouts::UnregisterGcCallout(eKind, pCallout);
+}
+
+COOP_PINVOKE_HELPER(Boolean, RhIsPromoted, (OBJECTREF obj))
+{
+    return GCHeapUtilities::GetGCHeap()->IsPromoted(obj) ? Boolean_true : Boolean_false;
+}
+
+COOP_PINVOKE_HELPER(Int32, RhGetLohCompactionMode, ())
+{
+    return GCHeapUtilities::GetGCHeap()->GetLOHCompactionMode();
+}
+
+COOP_PINVOKE_HELPER(void, RhSetLohCompactionMode, (Int32 newLohCompactionMode))
+{
+    GCHeapUtilities::GetGCHeap()->SetLOHCompactionMode(newLohCompactionMode);
+}
+
+COOP_PINVOKE_HELPER(Int64, RhGetCurrentObjSize, ())
+{
+    return GCHeapUtilities::GetGCHeap()->GetCurrentObjSize();
+}
+
+COOP_PINVOKE_HELPER(Int64, RhGetGCNow, ())
+{
+    return GCHeapUtilities::GetGCHeap()->GetNow();
+}
+
+COOP_PINVOKE_HELPER(Int64, RhGetLastGCStartTime, (Int32 generation))
+{
+    return GCHeapUtilities::GetGCHeap()->GetLastGCStartTime(generation);
+}
+
+COOP_PINVOKE_HELPER(Int64, RhGetLastGCDuration, (Int32 generation))
+{
+    return GCHeapUtilities::GetGCHeap()->GetLastGCDuration(generation);
+}
+
+COOP_PINVOKE_HELPER(Boolean, RhRegisterForFullGCNotification, (Int32 maxGenerationThreshold, Int32 largeObjectHeapThreshold))
+{
+    ASSERT(maxGenerationThreshold >= 1 && maxGenerationThreshold <= 99);
+    ASSERT(largeObjectHeapThreshold >= 1 && largeObjectHeapThreshold <= 99);
+    return GCHeapUtilities::GetGCHeap()->RegisterForFullGCNotification(maxGenerationThreshold, largeObjectHeapThreshold)
+        ? Boolean_true : Boolean_false;
+}
+
+COOP_PINVOKE_HELPER(Boolean, RhCancelFullGCNotification, ())
+{
+    return GCHeapUtilities::GetGCHeap()->CancelFullGCNotification() ? Boolean_true : Boolean_false;
+}
+
+COOP_PINVOKE_HELPER(Int32, RhWaitForFullGCApproach, (Int32 millisecondsTimeout))
+{
+    ASSERT(millisecondsTimeout >= -1);
+    ASSERT(ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode());
+
+    int timeout = millisecondsTimeout == -1 ? INFINITE : millisecondsTimeout;
+    return GCHeapUtilities::GetGCHeap()->WaitForFullGCApproach(millisecondsTimeout);
+}
+
+COOP_PINVOKE_HELPER(Int32, RhWaitForFullGCComplete, (Int32 millisecondsTimeout))
+{
+    ASSERT(millisecondsTimeout >= -1);
+    ASSERT(ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode());
+
+    int timeout = millisecondsTimeout == -1 ? INFINITE : millisecondsTimeout;
+    return GCHeapUtilities::GetGCHeap()->WaitForFullGCComplete(millisecondsTimeout);
+}
+
+COOP_PINVOKE_HELPER(Int64, RhGetGCSegmentSize, ())
+{
+    size_t first = GCHeapUtilities::GetGCHeap()->GetValidSegmentSize(Boolean_true);
+    size_t second = GCHeapUtilities::GetGCHeap()->GetValidSegmentSize(Boolean_false);
+
+    return (first > second) ? first : second;
+}
+
+COOP_PINVOKE_HELPER(Int64, RhGetAllocatedBytesForCurrentThread, ())
+{
+    Thread *pThread = ThreadStore::GetCurrentThread();
+    gc_alloc_context *ac = pThread->GetAllocContext();
+    Int64 currentAllocated = ac->alloc_bytes + ac->alloc_bytes_uoh - (ac->alloc_limit - ac->alloc_ptr);
+    return currentAllocated;
+}
+
+struct RH_GC_GENERATION_INFO
+{
+    UInt64 sizeBefore;
+    UInt64 fragmentationBefore;
+    UInt64 sizeAfter;
+    UInt64 fragmentationAfter;
+};
+
+#if defined(TARGET_X86) && !defined(TARGET_UNIX)
+#include "pshpack4.h"
+#ifdef _MSC_VER 
+#pragma warning(push)
+#pragma warning(disable:4121) // alignment of a member was sensitive to packing
+#endif
+#endif
+struct RH_GH_MEMORY_INFO
+{
+public:
+    UInt64 highMemLoadThresholdBytes;
+    UInt64 totalAvailableMemoryBytes;
+    UInt64 lastRecordedMemLoadBytes;
+    UInt64 lastRecordedHeapSizeBytes;
+    UInt64 lastRecordedFragmentationBytes;
+    UInt64 totalCommittedBytes;
+    UInt64 promotedBytes;
+    UInt64 pinnedObjectCount;
+    UInt64 finalizationPendingCount;
+    UInt64 index;
+    UInt32 generation;
+    UInt32 pauseTimePercent;
+    UInt8 isCompaction;
+    UInt8 isConcurrent;
+    RH_GC_GENERATION_INFO generationInfo0;
+    RH_GC_GENERATION_INFO generationInfo1;
+    RH_GC_GENERATION_INFO generationInfo2;
+    RH_GC_GENERATION_INFO generationInfo3;
+    RH_GC_GENERATION_INFO generationInfo4;
+    UInt64 pauseDuration0;
+    UInt64 pauseDuration1;
+};
+#if defined(TARGET_X86) && !defined(TARGET_UNIX)
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+#include "poppack.h"
+#endif
+
+COOP_PINVOKE_HELPER(void, RhGetMemoryInfo, (RH_GH_MEMORY_INFO* pData, int kind))
+{
+    UInt64* genInfoRaw = (UInt64*)&(pData->generationInfo0);
+    UInt64* pauseInfoRaw = (UInt64*)&(pData->pauseDuration0);
+
+    return GCHeapUtilities::GetGCHeap()->GetMemoryInfo(
+        &(pData->highMemLoadThresholdBytes),
+        &(pData->totalAvailableMemoryBytes),
+        &(pData->lastRecordedMemLoadBytes),
+        &(pData->lastRecordedHeapSizeBytes),
+        &(pData->lastRecordedFragmentationBytes),
+        &(pData->totalCommittedBytes),
+        &(pData->promotedBytes),
+        &(pData->pinnedObjectCount),
+        &(pData->finalizationPendingCount),
+        &(pData->index),
+        &(pData->generation),
+        &(pData->pauseTimePercent),
+        (bool*)&(pData->isCompaction),
+        (bool*)&(pData->isConcurrent),
+        genInfoRaw,
+        pauseInfoRaw,
+        kind);
+}
+
+COOP_PINVOKE_HELPER(Int64, RhGetTotalAllocatedBytes, ())
+{
+    uint64_t allocated_bytes = GCHeapUtilities::GetGCHeap()->GetTotalAllocatedBytes() - RedhawkGCInterface::GetDeadThreadsNonAllocBytes();
+
+    // highest reported allocated_bytes. We do not want to report a value less than that even if unused_bytes has increased.
+    static uint64_t high_watermark;
+
+    uint64_t current_high = high_watermark;
+    while (allocated_bytes > current_high)
+    {
+        uint64_t orig = PalInterlockedCompareExchange64((Int64*)&high_watermark, allocated_bytes, current_high);
+        if (orig == current_high)
+            return allocated_bytes;
+
+        current_high = orig;
+    }
+
+    return current_high;
+}
+
+EXTERN_C REDHAWK_API Int64 __cdecl RhGetTotalAllocatedBytesPrecise()
+{
+    Int64 allocated;
+
+    // We need to suspend/restart the EE to get each thread's
+    // non-allocated memory from their allocation contexts
+
+    GCToEEInterface::SuspendEE(SUSPEND_REASON::SUSPEND_FOR_GC);
+    
+    allocated = GCHeapUtilities::GetGCHeap()->GetTotalAllocatedBytes() - RedhawkGCInterface::GetDeadThreadsNonAllocBytes();
+
+    FOREACH_THREAD(pThread)
+    {
+        gc_alloc_context* ac = pThread->GetAllocContext();
+        allocated -= ac->alloc_limit - ac->alloc_ptr;
+    }
+    END_FOREACH_THREAD
+
+    GCToEEInterface::RestartEE(true);
+    
+    return allocated;
+}
+
+static Array* AllocateNewArrayImpl(Thread* pThread, EEType* pArrayEEType, UInt32 numElements, UInt32 flags)
+{
+    size_t size;
+#ifndef HOST_64BIT
+    // if the element count is <= 0x10000, no overflow is possible because the component size is
+    // <= 0xffff, and thus the product is <= 0xffff0000, and the base size is only ~12 bytes
+    if (numElements > 0x10000)
+    {
+        // Perform the size computation using 64-bit integeres to detect overflow
+        uint64_t size64 = (uint64_t)pArrayEEType->get_BaseSize() + ((uint64_t)numElements * (uint64_t)pArrayEEType->get_ComponentSize());
+        size64 = (size64 + (sizeof(UIntNative) - 1)) & ~(sizeof(UIntNative) - 1);
+
+        size = (size_t)size64;
+        if (size != size64)
+        {
+            return NULL;
+        }
+    }
+    else
+#endif // !HOST_64BIT
+    {
+        size = (size_t)pArrayEEType->get_BaseSize() + ((size_t)numElements * (size_t)pArrayEEType->get_ComponentSize());
+        size = ALIGN_UP(size, sizeof(UIntNative));
+    }
+
+    size_t max_object_size;
+#ifdef HOST_64BIT
+    if (g_pConfig->GetGCAllowVeryLargeObjects())
+    {
+        max_object_size = (INT64_MAX - 7 - min_obj_size);
+    }
+    else
+#endif // HOST_64BIT
+    {
+        max_object_size = (INT32_MAX - 7 - min_obj_size);
+    }
+
+    if (size >= max_object_size)
+    {
+        return NULL;
+    }
+
+    const int MaxArrayLength = 0x7FEFFFFF;
+    const int MaxByteArrayLength = 0x7FFFFFC7;
+
+    // Impose limits on maximum array length in each dimension to allow efficient
+    // implementation of advanced range check elimination in future. We have to allow
+    // higher limit for array of bytes (or one byte structs) for backward compatibility.
+    // Keep in sync with Array.MaxArrayLength in BCL.
+    if (size > MaxByteArrayLength /* note: comparing allocation size with element count */)
+    {
+        // Ensure the above if check covers the minimal interesting size
+        static_assert(MaxByteArrayLength < (uint64_t)MaxArrayLength * 2, "");
+
+        if (pArrayEEType->get_ComponentSize() != 1)
+        {
+            size_t elementCount = (size - pArrayEEType->get_BaseSize()) / pArrayEEType->get_ComponentSize();
+            if (elementCount > MaxArrayLength)
+                return NULL;
+        }
+        else
+        {
+            size_t elementCount = size - pArrayEEType->get_BaseSize();
+            if (elementCount > MaxByteArrayLength)
+                return NULL;
+        }
+    }
+
+    if (size > RH_LARGE_OBJECT_SIZE)
+        flags |= GC_ALLOC_LARGE_OBJECT_HEAP;
+
+    // Save the EEType for instrumentation purposes.
+    RedhawkGCInterface::SetLastAllocEEType(pArrayEEType);
+
+    Array* pArray = (Array*)GCHeapUtilities::GetGCHeap()->Alloc(pThread->GetAllocContext(), size, flags);
+    if (pArray == NULL)
+    {
+        return NULL;
+    }
+
+    pArray->set_EEType(pArrayEEType);
+    pArray->InitArrayLength(numElements);
+
+    if (size >= RH_LARGE_OBJECT_SIZE)
+        GCHeapUtilities::GetGCHeap()->PublishObject((uint8_t*)pArray);
+
+    return pArray;
+}
+
+EXTERN_C REDHAWK_API void RhAllocateNewArray(EEType* pArrayEEType, UInt32 numElements, UInt32 flags, Array** pResult)
+{
+    Thread* pThread = ThreadStore::GetCurrentThread();
+
+    pThread->SetupHackPInvokeTunnel();
+    pThread->DisablePreemptiveMode();
+
+    ASSERT(!pThread->IsDoNotTriggerGcSet());
+
+    *pResult = AllocateNewArrayImpl(pThread, pArrayEEType, numElements, flags);
+
+    pThread->EnablePreemptiveMode();
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.cpp
new file mode 100644
index 0000000000000..66bc74d4e730e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.cpp
@@ -0,0 +1,131 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Unmanaged GC memory helpers
+//
+
+#include "common.h"
+#include "gcenv.h"
+#include "PalRedhawkCommon.h"
+#include "CommonMacros.inl"
+
+#include "GCMemoryHelpers.h"
+#include "GCMemoryHelpers.inl"
+
+// This function clears a piece of memory in a GC safe way.  It makes the guarantee that it will clear memory in at 
+// least pointer sized chunks whenever possible.  Unaligned memory at the beginning and remaining bytes at the end are 
+// written bytewise. We must make this guarantee whenever we clear memory in the GC heap that could contain object 
+// references.  The GC or other user threads can read object references at any time, clearing them bytewise can result 
+// in a read on another thread getting incorrect data.
+//
+// USAGE:  The caller is responsible for hoisting any null reference exceptions to a place where the hardware exception
+//         can be properly translated to a managed exception.
+COOP_PINVOKE_CDECL_HELPER(void *, RhpInitMultibyte, (void * mem, int c, size_t size))
+{ 
+    // The caller must do the null-check because we cannot take an AV in the runtime and translate it to managed.
+    ASSERT(mem != nullptr); 
+
+    UIntNative  bv = (UInt8)c;
+    UIntNative  pv = 0;
+
+    if (bv != 0)
+    {
+        pv = 
+#if (POINTER_SIZE == 8)
+            bv << 7*8 | bv << 6*8 | bv << 5*8 | bv << 4*8 |
+#endif
+            bv << 3*8 | bv << 2*8 | bv << 1*8 | bv;
+    }
+
+    InlineGCSafeFillMemory(mem, size, pv);
+
+    // memset returns the destination buffer
+    return mem;
+} 
+
+
+// This is a GC-safe variant of memcpy.  It guarantees that the object references in the GC heap are updated atomically.
+// This is required for type safety and proper operation of the background GC.
+//
+// USAGE:   1) The caller is responsible for performing the appropriate bulk write barrier.
+//          2) The caller is responsible for hoisting any null reference exceptions to a place where the hardware 
+//             exception can be properly translated to a managed exception.  This is handled by RhpCopyMultibyte.
+//          3) The caller must ensure that all three parameters are pointer-size-aligned.  This should be the case for
+//             value types which contain GC refs anyway, so if you want to copy structs without GC refs which might be
+//             unaligned, then you must use RhpCopyMultibyteNoGCRefs.
+COOP_PINVOKE_CDECL_HELPER(void *, memcpyGCRefs, (void * dest, const void *src, size_t len))
+{ 
+    // null pointers are not allowed (they are checked by RhpCopyMultibyte)
+    ASSERT(dest != nullptr);
+    ASSERT(src != nullptr);
+
+    InlineForwardGCSafeCopy(dest, src, len);
+
+    // memcpy returns the destination buffer
+    return dest;
+}
+
+// This is a GC-safe variant of memcpy.  It guarantees that the object references in the GC heap are updated atomically.
+// This is required for type safety and proper operation of the background GC.
+// Writebarrier is included.
+//
+// USAGE:
+//          1) The caller is responsible for hoisting any null reference exceptions to a place where the hardware 
+//             exception can be properly translated to a managed exception.  This is handled by RhpCopyMultibyte.
+//          2) The caller must ensure that all three parameters are pointer-size-aligned.  This should be the case for
+//             value types which contain GC refs anyway, so if you want to copy structs without GC refs which might be
+//             unaligned, then you must use RhpCopyMultibyteNoGCRefs.
+COOP_PINVOKE_CDECL_HELPER(void *, memcpyGCRefsWithWriteBarrier, (void * dest, const void *src, size_t len))
+{
+    // null pointers are not allowed (they are checked by RhpCopyMultibyteWithWriteBarrier)
+    ASSERT(dest != nullptr);
+    ASSERT(src != nullptr);
+
+    InlineForwardGCSafeCopy(dest, src, len);
+    InlinedBulkWriteBarrier(dest, len);
+
+    // memcpy returns the destination buffer
+    return dest;
+}
+
+// Same as memcpyGCRefsWithWriteBarrier, except it checks if memory might contain GC pointers
+// and if so dispatches to memcpyGCRefsWithWriteBarrier and if not uses traditional memcpy
+COOP_PINVOKE_CDECL_HELPER(void *, memcpyAnyWithWriteBarrier, (void * dest, const void *src, size_t len))
+{
+    // null pointers are not allowed (they are checked by RhpCopyMultibyteWithWriteBarrier)
+    ASSERT(dest != nullptr);
+    ASSERT(src != nullptr);
+
+    // Use GC safe copy whenever there might be GC pointers
+    if (IS_ALIGNED(dest, sizeof(size_t)) && IS_ALIGNED(src, sizeof(size_t)) && IS_ALIGNED(len, sizeof(size_t)))
+    {
+        return memcpyGCRefsWithWriteBarrier(dest, src, len);
+    }
+    
+    return memcpy(dest, src, len);
+}
+
+// Move memory, in a way that is compatible with a move onto the heap, but
+// does not require the destination pointer to be on the heap.
+
+COOP_PINVOKE_HELPER(void, RhBulkMoveWithWriteBarrier, (uint8_t* pDest, uint8_t* pSrc, size_t cbDest))
+{
+    if (pDest <= pSrc || pSrc + cbDest <= pDest)
+        InlineForwardGCSafeCopy(pDest, pSrc, cbDest);
+    else
+        InlineBackwardGCSafeCopy(pDest, pSrc, cbDest);
+
+    InlinedBulkWriteBarrier(pDest, cbDest);
+}
+
+void GCSafeCopyMemoryWithWriteBarrier(void * dest, const void *src, size_t len)
+{
+    InlineForwardGCSafeCopy(dest, src, len);
+    InlinedBulkWriteBarrier(dest, len);
+}
+
+void REDHAWK_CALLCONV RhpBulkWriteBarrier(void* pMemStart, UInt32 cbMemSize)
+{
+    InlinedBulkWriteBarrier(pMemStart, cbMemSize);
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.h b/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.h
new file mode 100644
index 0000000000000..3d74bd3fa498a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.h
@@ -0,0 +1,10 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Unmanaged GC memory helpers
+//
+
+void GCSafeCopyMemoryWithWriteBarrier(void * dest, const void *src, size_t len);
+
+EXTERN_C void REDHAWK_CALLCONV RhpBulkWriteBarrier(void* pMemStart, UInt32 cbMemSize);
diff --git a/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.inl b/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.inl
new file mode 100644
index 0000000000000..5d973bc70edbb
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/GCMemoryHelpers.inl
@@ -0,0 +1,255 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "volatile.h"
+
+//
+// Unmanaged GC memory helpers
+//
+
+// This function fills a piece of memory in a GC safe way.  It makes the guarantee
+// that it will fill memory in at least pointer sized chunks whenever possible.
+// Unaligned memory at the beginning and remaining bytes at the end are written bytewise.
+// We must make this guarantee whenever we clear memory in the GC heap that could contain 
+// object references.  The GC or other user threads can read object references at any time, 
+// clearing them bytewise can result in a read on another thread getting incorrect data.  
+FORCEINLINE void InlineGCSafeFillMemory(void * mem, size_t size, size_t pv)
+{
+    UInt8 * memBytes = (UInt8 *)mem;
+    UInt8 * endBytes = &memBytes[size];
+
+    // handle unaligned bytes at the beginning
+    while (!IS_ALIGNED(memBytes, sizeof(void *)) && (memBytes < endBytes))
+        *memBytes++ = (UInt8)pv;
+
+    // now write pointer sized pieces
+    // volatile ensures that this doesn't get optimized back into a memset call
+    size_t nPtrs = (endBytes - memBytes) / sizeof(void *);
+    volatile UIntNative* memPtr = (UIntNative*)memBytes;
+    for (size_t i = 0; i < nPtrs; i++)
+        *memPtr++ = pv;
+
+    // handle remaining bytes at the end
+    memBytes = (UInt8*)memPtr;
+    while (memBytes < endBytes)
+        *memBytes++ = (UInt8)pv;
+}
+
+// These functions copy memory in a GC safe way.  They makes the guarantee
+// that the memory is copies in at least pointer sized chunks.
+
+FORCEINLINE void InlineForwardGCSafeCopy(void * dest, const void *src, size_t len)
+{
+    // All parameters must be pointer-size-aligned
+    ASSERT(IS_ALIGNED(dest, sizeof(size_t)));
+    ASSERT(IS_ALIGNED(src, sizeof(size_t)));
+    ASSERT(IS_ALIGNED(len, sizeof(size_t)));
+
+    size_t size = len;
+    UInt8 * dmem = (UInt8 *)dest;
+    UInt8 * smem = (UInt8 *)src;
+
+    // regions must be non-overlapping
+    ASSERT(dmem <= smem || smem + size <= dmem);
+
+    // copy 4 pointers at a time 
+    while (size >= 4 * sizeof(size_t))
+    {
+        size -= 4 * sizeof(size_t);
+        ((size_t *)dmem)[0] = ((size_t *)smem)[0];
+        ((size_t *)dmem)[1] = ((size_t *)smem)[1];
+        ((size_t *)dmem)[2] = ((size_t *)smem)[2];
+        ((size_t *)dmem)[3] = ((size_t *)smem)[3];
+        smem += 4 * sizeof(size_t);
+        dmem += 4 * sizeof(size_t);
+    }
+
+    // copy 2 trailing pointers, if needed
+    if ((size & (2 * sizeof(size_t))) != 0)
+    {
+        ((size_t *)dmem)[0] = ((size_t *)smem)[0];
+        ((size_t *)dmem)[1] = ((size_t *)smem)[1];
+        smem += 2 * sizeof(size_t);
+        dmem += 2 * sizeof(size_t);
+    }
+
+    // finish with one pointer, if needed
+    if ((size & sizeof(size_t)) != 0)
+    {
+        ((size_t *)dmem)[0] = ((size_t *)smem)[0];
+    }
+}
+
+FORCEINLINE void InlineBackwardGCSafeCopy(void * dest, const void *src, size_t len)
+{
+    // All parameters must be pointer-size-aligned
+    ASSERT(IS_ALIGNED(dest, sizeof(size_t)));
+    ASSERT(IS_ALIGNED(src, sizeof(size_t)));
+    ASSERT(IS_ALIGNED(len, sizeof(size_t)));
+
+    size_t size = len;
+    UInt8 * dmem = (UInt8 *)dest + len;
+    UInt8 * smem = (UInt8 *)src + len;
+
+    // regions must be non-overlapping
+    ASSERT(smem <= dmem || dmem + size <= smem);
+
+    // copy 4 pointers at a time 
+    while (size >= 4 * sizeof(size_t))
+    {
+        size -= 4 * sizeof(size_t);
+        smem -= 4 * sizeof(size_t);
+        dmem -= 4 * sizeof(size_t);
+        ((size_t *)dmem)[3] = ((size_t *)smem)[3];
+        ((size_t *)dmem)[2] = ((size_t *)smem)[2];
+        ((size_t *)dmem)[1] = ((size_t *)smem)[1];
+        ((size_t *)dmem)[0] = ((size_t *)smem)[0];
+    }
+
+    // copy 2 trailing pointers, if needed
+    if ((size & (2 * sizeof(size_t))) != 0)
+    {
+        smem -= 2 * sizeof(size_t);
+        dmem -= 2 * sizeof(size_t);
+        ((size_t *)dmem)[1] = ((size_t *)smem)[1];
+        ((size_t *)dmem)[0] = ((size_t *)smem)[0];
+    }
+
+    // finish with one pointer, if needed
+    if ((size & sizeof(size_t)) != 0)
+    {
+        smem -= sizeof(size_t);
+        dmem -= sizeof(size_t);
+        ((size_t *)dmem)[0] = ((size_t *)smem)[0];
+    }
+}
+
+
+#ifndef DACCESS_COMPILE
+#ifdef WRITE_BARRIER_CHECK
+extern uint8_t* g_GCShadow;
+extern uint8_t* g_GCShadowEnd;
+typedef DPTR(uint8_t)   PTR_uint8_t;
+extern "C" {
+    GPTR_DECL(uint8_t, g_lowest_address);
+    GPTR_DECL(uint8_t, g_highest_address);
+}
+#endif
+
+typedef DPTR(uint32_t)   PTR_uint32_t;
+extern "C" {
+    GPTR_DECL(uint32_t, g_card_table);
+}
+static const UInt32 INVALIDGCVALUE = 0xcccccccd;
+
+FORCEINLINE void InlineWriteBarrier(void * dst, void * ref)
+{
+    if (((uint8_t*)ref >= g_ephemeral_low) && ((uint8_t*)ref < g_ephemeral_high))
+    {
+        // volatile is used here to prevent fetch of g_card_table from being reordered 
+        // with g_lowest/highest_address check above. See comment in code:gc_heap::grow_brick_card_tables.
+        uint8_t* pCardByte = (uint8_t *)VolatileLoadWithoutBarrier(&g_card_table) + ((size_t)dst >> LOG2_CLUMP_SIZE);
+        if (*pCardByte != 0xFF)
+            *pCardByte = 0xFF;
+    }
+}
+
+FORCEINLINE void InlineCheckedWriteBarrier(void * dst, void * ref)
+{
+    // if the dst is outside of the heap (unboxed value classes) then we
+    //      simply exit
+    if (((uint8_t*)dst < g_lowest_address) || ((uint8_t*)dst >= g_highest_address))
+        return;
+
+    InlineWriteBarrier(dst, ref);
+}
+
+FORCEINLINE void InlinedBulkWriteBarrier(void* pMemStart, size_t cbMemSize)
+{
+    // Check whether the writes were even into the heap. If not there's no card update required.
+    // Also if the size is smaller than a pointer, no write barrier is required.
+    // This case can occur with universal shared generic code where the size
+    // is not known at compile time.
+    if (pMemStart < g_lowest_address || (pMemStart >= g_highest_address) || (cbMemSize < sizeof(UIntNative)))
+    {
+        return;
+    }
+
+#ifdef WRITE_BARRIER_CHECK
+    // Perform shadow heap updates corresponding to the gc heap updates that immediately preceded this helper
+    // call.
+
+    // If g_GCShadow is 0, don't perform the check.
+    if (g_GCShadow != NULL)
+    {
+        // Compute the shadow heap address corresponding to the beginning of the range of heap addresses modified
+        // and in the process range check it to make sure we have the shadow version allocated.
+        UIntNative* shadowSlot = (UIntNative*)(g_GCShadow + ((uint8_t*)pMemStart - g_lowest_address));
+        if (shadowSlot <= (UIntNative*)g_GCShadowEnd)
+        {
+            // Iterate over every pointer sized slot in the range, copying data from the real heap to the shadow heap.
+            // As we perform each copy we need to recheck the real heap contents with an ordered read to ensure we're
+            // not racing with another heap updater. If we discover a race we invalidate the corresponding shadow heap
+            // slot using a special well-known value so that this location will not be tested during the next shadow
+            // heap validation.
+
+            UIntNative* realSlot = (UIntNative*)pMemStart;
+            UIntNative slotCount = cbMemSize / sizeof(UIntNative);
+            do
+            {
+                // Update shadow slot from real slot.
+                UIntNative realValue = *realSlot;
+                *shadowSlot = realValue;
+                // Memory barrier to ensure the next read is ordered wrt to the shadow heap write we just made.
+                PalMemoryBarrier();
+
+                // Read the real slot contents again. If they don't agree with what we just wrote then someone just raced
+                // with us and updated the heap again. In such cases we invalidate the shadow slot.
+                if (*realSlot != realValue)
+                {
+                    *shadowSlot = INVALIDGCVALUE;
+                }
+
+                realSlot++;
+                shadowSlot++;
+                slotCount--;
+            }
+            while (slotCount > 0);
+        }
+    }
+
+#endif // WRITE_BARRIER_CHECK
+
+    // Compute the starting card address and the number of bytes to write (groups of 8 cards). We could try
+    // for further optimization here using aligned 32-bit writes but there's some overhead in setup required
+    // and additional complexity. It's not clear this is warranted given that a single byte of card table
+    // update already covers 1K of object space (2K on 64-bit platforms). It's also not worth probing that
+    // 1K/2K range to see if any of the pointers appear to be non-ephemeral GC references. Given the size of
+    // the area the chances are high that at least one interesting GC refenence is present.
+
+    size_t startAddress = (size_t)pMemStart;
+    size_t endAddress = startAddress + cbMemSize;
+    size_t startingClump = startAddress >> LOG2_CLUMP_SIZE;
+    size_t endingClump = (endAddress + CLUMP_SIZE - 1) >> LOG2_CLUMP_SIZE;
+
+    // calculate the number of clumps to mark (round_up(end) - start)
+    size_t clumpCount = endingClump - startingClump;
+    // VolatileLoadWithoutBarrier() is used here to prevent fetch of g_card_table from being reordered 
+    // with g_lowest/highest_address check at the beginning of this function. 
+    uint8_t* card = ((uint8_t*)VolatileLoadWithoutBarrier(&g_card_table)) + startingClump;
+
+    // Fill the cards. To avoid cache line thrashing we check whether the cards have already been set before
+    // writing.
+    do
+    {
+        if (*card != 0xff)
+        {
+            *card = 0xff;
+        }
+
+        card++;
+        clumpCount--;
+    }
+    while (clumpCount != 0);
+}
+#endif // DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/GcStressControl.cpp b/src/coreclr/src/nativeaot/Runtime/GcStressControl.cpp
new file mode 100644
index 0000000000000..5296d30971c29
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/GcStressControl.cpp
@@ -0,0 +1,182 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+
+#if defined(FEATURE_GC_STRESS) & !defined(DACCESS_COMPILE)
+
+
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "holder.h"
+#include "Crst.h"
+#include "RhConfig.h"
+#include "gcrhinterface.h"
+#include "slist.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "forward_declarations.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "event.h"
+#include "RWLock.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "shash.h"
+#include "shash.inl"
+#include "GcStressControl.h"
+
+
+class GcStressControl
+{
+public:
+    static bool ShouldHijack(UIntNative CallsiteIP, HijackType ht)
+    {
+        if (s_initState != isInited)
+            Initialize();
+
+        // don't hijack for GC stress if we're in a "no GC stress" region
+        Thread * pCurrentThread = ThreadStore::GetCurrentThread();
+        if (pCurrentThread->IsSuppressGcStressSet())
+            return false;
+
+        if (g_pRhConfig->GetGcStressThrottleMode() == 0)
+        {
+            return true;
+        }
+        if (g_pRhConfig->GetGcStressThrottleMode() & gcstm_TriggerRandom)
+        {
+            if (GcStressTriggerRandom(CallsiteIP, ht, pCurrentThread))
+                return true;
+        }
+        if (g_pRhConfig->GetGcStressThrottleMode() & gcstm_TriggerOnFirstHit)
+        {
+            if (GcStressTriggerFirstHit(CallsiteIP, ht))
+                return true;
+        }
+        return false;
+    }
+
+private:
+    enum InitState { isNotInited, isIniting, isInited };
+
+    static void Initialize()
+    {
+        volatile InitState is = (InitState) PalInterlockedCompareExchange((volatile Int32*)(&s_initState), isIniting, isNotInited);
+        if (is == isNotInited)
+        {
+            s_lock.InitNoThrow(CrstGcStressControl);
+
+            if (g_pRhConfig->GetGcStressSeed())
+                s_lGcStressRNGSeed = g_pRhConfig->GetGcStressSeed();
+            else
+                s_lGcStressRNGSeed = PalGetTickCount();
+
+            if (g_pRhConfig->GetGcStressFreqDenom())
+                s_lGcStressFreqDenom = g_pRhConfig->GetGcStressFreqDenom();
+            else
+                s_lGcStressFreqDenom = 10000;
+
+            s_initState = isInited;
+        }
+        else
+        {
+            while (s_initState != isInited)
+                ;
+        }
+    }
+
+    // returns true if no entry was found for CallsiteIP, false otherwise
+    static bool GcStressTrackAtIP(UIntNative CallsiteIP, HijackType ht, bool bForceGC)
+    {
+        // do this under a lock, as the underlying SHash might be "grown" by
+        // operations on other threads
+
+        CrstHolder lh(&s_lock);
+
+        const CallsiteCountEntry * pEntry = s_callsites.LookupPtr(CallsiteIP);
+        size_t hits;
+
+        if (pEntry == NULL)
+        {
+            hits = 1;
+            CallsiteCountEntry e = {CallsiteIP, 1, 1, ht}; 
+            s_callsites.AddOrReplace(e);
+        }
+        else
+        {
+            hits = ++(const_cast<CallsiteCountEntry*>(pEntry)->countHit);
+            if (bForceGC)
+            {
+                ++(const_cast<CallsiteCountEntry*>(pEntry)->countForced);
+            }
+        }
+
+        return pEntry == NULL;    
+    }
+
+    static bool GcStressTriggerFirstHit(UIntNative CallsiteIP, HijackType ht)
+    {
+        return GcStressTrackAtIP(CallsiteIP, ht, false);
+    }
+
+    static UInt32 GcStressRNG(UInt32 uMaxValue, Thread *pCurrentThread)
+    {
+        if (!pCurrentThread->IsRandInited())
+        {
+            pCurrentThread->SetRandomSeed(s_lGcStressRNGSeed);
+        }
+
+        return pCurrentThread->NextRand() % uMaxValue;
+    }
+
+    static bool GcStressTriggerRandom(UIntNative CallsiteIP, HijackType ht, Thread *pCurrentThread)
+    {
+        bool bRes = false;
+        if (ht == htLoop)
+        {
+            bRes = GcStressRNG(s_lGcStressFreqDenom , pCurrentThread) < g_pRhConfig->GetGcStressFreqLoop();
+        }
+        else if (ht == htCallsite)
+        {
+            bRes = GcStressRNG(s_lGcStressFreqDenom , pCurrentThread) < g_pRhConfig->GetGcStressFreqCallsite();
+        }
+        if (bRes)
+        {
+            // if we're about to trigger a GC, track this in s_callsites
+            GcStressTrackAtIP(CallsiteIP, ht, true);
+        }
+        return bRes;
+    }
+
+private:
+    static CrstStatic           s_lock;
+    static UInt32               s_lGcStressRNGSeed;
+    static UInt32               s_lGcStressFreqDenom;
+    static volatile InitState   s_initState;
+
+public:
+    static CallsiteCountSHash   s_callsites;            // exposed to the DAC
+};
+
+// public interface:
+
+CallsiteCountSHash GcStressControl::s_callsites;
+CrstStatic GcStressControl::s_lock;
+UInt32 GcStressControl::s_lGcStressRNGSeed = 0;
+UInt32 GcStressControl::s_lGcStressFreqDenom = 0;
+volatile GcStressControl::InitState GcStressControl::s_initState = GcStressControl::isNotInited;
+
+GPTR_IMPL_INIT(CallsiteCountSHash, g_pCallsites, &GcStressControl::s_callsites);
+
+bool ShouldHijackForGcStress(UIntNative CallsiteIP, HijackType ht)
+{
+    return GcStressControl::ShouldHijack(CallsiteIP, ht);
+}
+
+#endif // FEATURE_GC_STRESS & !DACCESS_COMPILE
+
+
diff --git a/src/coreclr/src/nativeaot/Runtime/GcStressControl.h b/src/coreclr/src/nativeaot/Runtime/GcStressControl.h
new file mode 100644
index 0000000000000..3d564d7a6ef0f
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/GcStressControl.h
@@ -0,0 +1,51 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#ifndef __GcStressControl_h__
+#define __GcStressControl_h__
+
+
+enum HijackType { htLoop, htCallsite };
+bool ShouldHijackForGcStress(UIntNative CallsiteIP, HijackType ht);
+
+
+enum GcStressThrottleMode {
+        gcstm_TriggerAlways     = 0x0000,   // trigger a GC every time we hit a GC safe point
+        gcstm_TriggerOnFirstHit = 0x0001,   // trigger a GC the first time a GC safe point is hit
+        gcstm_TriggerRandom     = 0x0002,   // trigger a GC randomly, as defined by GcStressFreqCallsite/GcStressFreqLoop/GcStressSeed
+};
+
+struct CallsiteCountEntry
+{
+    UIntNative callsiteIP;
+    UIntNative countHit;
+    UIntNative countForced;
+    HijackType ht;
+};
+
+typedef DPTR(CallsiteCountEntry) PTR_CallsiteCountEntry;
+
+class CallsiteCountTraits: public NoRemoveSHashTraits< DefaultSHashTraits < CallsiteCountEntry > >
+{
+public:
+    typedef UIntNative key_t;
+
+    static UIntNative GetKey(const CallsiteCountEntry & e) { return e.callsiteIP; }
+
+    static count_t Hash(UIntNative k)
+    { return (count_t) k; }
+
+    static bool Equals(UIntNative k1, UIntNative k2)
+    { return k1 == k2; }
+
+    static CallsiteCountEntry Null() 
+    { CallsiteCountEntry e; e.callsiteIP = 0; return e; }
+
+    static bool IsNull(const CallsiteCountEntry & e)
+    { return e.callsiteIP == 0; }
+};
+
+typedef SHash < CallsiteCountTraits > CallsiteCountSHash;
+typedef DPTR(CallsiteCountSHash) PTR_CallsiteCountSHash;
+
+
+#endif // __GcStressControl_h__
diff --git a/src/coreclr/src/nativeaot/Runtime/HandleTableHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/HandleTableHelpers.cpp
new file mode 100644
index 0000000000000..83cfe0e0c1431
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/HandleTableHelpers.cpp
@@ -0,0 +1,83 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Helper functions that are p/invoked from redhawkm in order to expose handle table functionality to managed
+// code. These p/invokes are special in that the handle table code requires we remain in co-operative mode
+// (since these routines mutate the handle tables which are also accessed during garbage collections). The
+// binder has special knowledge of these methods and doesn't generate the normal code to transition out of the
+// runtime prior to the call.
+// 
+#include "common.h"
+#include "gcenv.h"
+#include "objecthandle.h"
+#include "RestrictedCallouts.h"
+#include "gchandleutilities.h"
+
+
+COOP_PINVOKE_HELPER(OBJECTHANDLE, RhpHandleAlloc, (Object *pObject, int type))
+{
+    return GCHandleUtilities::GetGCHandleManager()->GetGlobalHandleStore()->CreateHandleOfType(pObject, (HandleType)type);
+}
+
+COOP_PINVOKE_HELPER(OBJECTHANDLE, RhpHandleAllocDependent, (Object *pPrimary, Object *pSecondary))
+{
+    return GCHandleUtilities::GetGCHandleManager()->GetGlobalHandleStore()->CreateDependentHandle(pPrimary, pSecondary);
+}
+
+COOP_PINVOKE_HELPER(void, RhHandleFree, (OBJECTHANDLE handle))
+{
+    GCHandleUtilities::GetGCHandleManager()->DestroyHandleOfUnknownType(handle);
+}
+
+COOP_PINVOKE_HELPER(Object *, RhHandleGet, (OBJECTHANDLE handle))
+{
+    return ObjectFromHandle(handle);
+}
+
+COOP_PINVOKE_HELPER(Object *, RhHandleGetDependent, (OBJECTHANDLE handle, Object **ppSecondary))
+{
+    Object *pPrimary = ObjectFromHandle(handle);
+    *ppSecondary = (pPrimary != NULL) ? GetDependentHandleSecondary(handle) : NULL;
+    return pPrimary;
+}
+
+COOP_PINVOKE_HELPER(void, RhHandleSetDependentSecondary, (OBJECTHANDLE handle, Object *pSecondary))
+{
+    SetDependentHandleSecondary(handle, pSecondary);
+}
+
+COOP_PINVOKE_HELPER(void, RhHandleSet, (OBJECTHANDLE handle, Object *pObject))
+{
+    GCHandleUtilities::GetGCHandleManager()->StoreObjectInHandle(handle, pObject);
+}
+
+COOP_PINVOKE_HELPER(Boolean, RhRegisterRefCountedHandleCallback, (void * pCallout, EEType * pTypeFilter))
+{
+    return RestrictedCallouts::RegisterRefCountedHandleCallback(pCallout, pTypeFilter);
+}
+
+COOP_PINVOKE_HELPER(void, RhUnregisterRefCountedHandleCallback, (void * pCallout, EEType * pTypeFilter))
+{
+    RestrictedCallouts::UnregisterRefCountedHandleCallback(pCallout, pTypeFilter);
+}
+
+COOP_PINVOKE_HELPER(OBJECTHANDLE, RhpHandleAllocVariable, (Object * pObject, UInt32 type)) 
+{
+    return GCHandleUtilities::GetGCHandleManager()->GetGlobalHandleStore()->CreateHandleWithExtraInfo(pObject, HNDTYPE_VARIABLE, (void*)((uintptr_t)type));
+}
+
+COOP_PINVOKE_HELPER(UInt32, RhHandleGetVariableType, (OBJECTHANDLE handle))
+{
+    return GetVariableHandleType(handle);
+}
+
+COOP_PINVOKE_HELPER(void, RhHandleSetVariableType, (OBJECTHANDLE handle, UInt32 type))
+{
+    UpdateVariableHandleType(handle, type);
+}
+
+COOP_PINVOKE_HELPER(UInt32, RhHandleCompareExchangeVariableType, (OBJECTHANDLE handle, UInt32 oldType, UInt32 newType))
+{
+    return CompareExchangeVariableHandleType(handle, oldType, newType);
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/ICodeManager.h b/src/coreclr/src/nativeaot/Runtime/ICodeManager.h
new file mode 100644
index 0000000000000..2c92eea53cfd2
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/ICodeManager.h
@@ -0,0 +1,165 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#pragma once
+
+#define ICODEMANAGER_INCLUDED
+
+// TODO: Debugger/DAC support (look for TODO: JIT)
+
+struct REGDISPLAY;
+
+#define GC_CALL_INTERIOR            0x1
+#define GC_CALL_PINNED              0x2
+#define GC_CALL_CHECK_APP_DOMAIN    0x4
+#define GC_CALL_STATIC              0x8
+
+typedef void (*GCEnumCallback)(
+    void *              hCallback,      // callback data
+    PTR_PTR_VOID        pObject,        // address of object-reference we are reporting
+    UInt32              flags           // is this a pinned and/or interior pointer
+);
+
+struct GCEnumContext
+{
+    GCEnumCallback pCallback;
+};
+
+// All values but GCRK_Unknown must correspond to MethodReturnKind enumeration in gcinfo.h
+enum GCRefKind : unsigned char
+{
+    GCRK_Scalar         = 0x00,
+    GCRK_Object         = 0x01,
+    GCRK_Byref          = 0x02,
+#ifdef TARGET_ARM64
+    // Composite return kinds for value types returned in two registers (encoded with two bits per register)
+    GCRK_Scalar_Obj     = (GCRK_Object << 2) | GCRK_Scalar,
+    GCRK_Obj_Obj        = (GCRK_Object << 2) | GCRK_Object,
+    GCRK_Byref_Obj      = (GCRK_Object << 2) | GCRK_Byref,
+    GCRK_Scalar_Byref   = (GCRK_Byref  << 2) | GCRK_Scalar,
+    GCRK_Obj_Byref      = (GCRK_Byref  << 2) | GCRK_Object,
+    GCRK_Byref_Byref    = (GCRK_Byref  << 2) | GCRK_Byref,
+
+    GCRK_LastValid      = GCRK_Byref_Byref,
+#else // TARGET_ARM64
+    GCRK_LastValid      = GCRK_Byref,
+#endif // TARGET_ARM64
+    GCRK_Unknown        = 0xFF,
+};
+
+#ifdef TARGET_ARM64
+// Extract individual GCRefKind components from a composite return kind
+inline GCRefKind ExtractReg0ReturnKind(GCRefKind returnKind)
+{
+    ASSERT(returnKind <= GCRK_LastValid);
+    return (GCRefKind)(returnKind & (GCRK_Object | GCRK_Byref));
+}
+
+inline GCRefKind ExtractReg1ReturnKind(GCRefKind returnKind)
+{
+    ASSERT(returnKind <= GCRK_LastValid);
+    return (GCRefKind)(returnKind >> 2);
+}
+#endif // TARGET_ARM64
+
+//
+// MethodInfo is placeholder type used to allocate space for MethodInfo. Maximum size 
+// of the actual method should be less or equal to the placeholder size.
+// It avoids memory allocation during stackwalk.
+//
+class MethodInfo
+{
+    TADDR dummyPtrs[5];
+    Int32 dummyInts[8];
+};
+
+class EHEnumState
+{
+    TADDR dummyPtrs[2];
+    Int32 dummyInts[2];
+};
+
+enum EHClauseKind
+{
+    EH_CLAUSE_TYPED = 0,
+    EH_CLAUSE_FAULT = 1,
+    EH_CLAUSE_FILTER = 2,
+    EH_CLAUSE_UNUSED = 3,
+};
+
+struct EHClause
+{
+    EHClauseKind m_clauseKind;
+    UInt32 m_tryStartOffset;
+    UInt32 m_tryEndOffset;
+    UInt8* m_filterAddress;
+    UInt8* m_handlerAddress;
+    void* m_pTargetType;
+};
+
+// Note: make sure you change the def in System\Runtime\InternalCalls.cs if you change this!
+enum class ClasslibFunctionId
+{
+    GetRuntimeException = 0,
+    FailFast = 1,
+    UnhandledExceptionHandler = 2,
+    AppendExceptionStackFrame = 3,
+    CheckStaticClassConstruction = 4,
+    GetSystemArrayEEType = 5,
+    OnFirstChanceException = 6,
+    DebugFuncEvalHelper = 7,
+    DebugFuncEvalAbortHelper = 8,
+};
+
+enum class AssociatedDataFlags : unsigned char
+{
+    None = 0,
+    HasUnboxingStubTarget = 1,
+};
+
+class ICodeManager
+{
+public:
+    virtual bool FindMethodInfo(PTR_VOID        ControlPC, 
+                                MethodInfo *    pMethodInfoOut) = 0;
+
+    virtual bool IsFunclet(MethodInfo * pMethodInfo) = 0;
+
+    virtual PTR_VOID GetFramePointer(MethodInfo *   pMethodInfo,
+                                     REGDISPLAY *   pRegisterSet) = 0;
+
+    virtual void EnumGcRefs(MethodInfo *    pMethodInfo, 
+                            PTR_VOID        safePointAddress,
+                            REGDISPLAY *    pRegisterSet,
+                            GCEnumContext * hCallback) = 0;
+
+    virtual bool UnwindStackFrame(MethodInfo *    pMethodInfo,
+                                  REGDISPLAY *    pRegisterSet,                     // in/out
+                                  PTR_VOID *      ppPreviousTransitionFrame) = 0;   // out
+
+    virtual UIntNative GetConservativeUpperBoundForOutgoingArgs(MethodInfo *   pMethodInfo,
+                                                                REGDISPLAY *   pRegisterSet) = 0;
+
+    virtual bool GetReturnAddressHijackInfo(MethodInfo *    pMethodInfo,
+                                            REGDISPLAY *    pRegisterSet,           // in
+                                            PTR_PTR_VOID *  ppvRetAddrLocation,     // out
+                                            GCRefKind *     pRetValueKind) = 0;     // out
+
+    virtual void UnsynchronizedHijackMethodLoops(MethodInfo * pMethodInfo) = 0;
+
+    virtual PTR_VOID RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC) = 0;
+
+    virtual bool EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMethodStartAddress, EHEnumState * pEHEnumState) = 0;
+
+    virtual bool EHEnumNext(EHEnumState * pEHEnumState, EHClause * pEHClause) = 0;
+
+    virtual PTR_VOID GetMethodStartAddress(MethodInfo * pMethodInfo) = 0;
+
+    virtual PTR_VOID GetOsModuleHandle() = 0;
+
+    virtual void * GetClasslibFunction(ClasslibFunctionId functionId) = 0;
+
+    // Returns any custom data attached to the method. Format:
+    //      AssociatedDataFlags        // 1 byte. Flags describing the data stored
+    //      Data (stream of bytes)     // Variable size (depending on flags). Custom data associated with method
+    virtual PTR_VOID GetAssociatedData(PTR_VOID ControlPC) = 0;
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/MathHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/MathHelpers.cpp
new file mode 100644
index 0000000000000..09fae4a18c15e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/MathHelpers.cpp
@@ -0,0 +1,179 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "rhassert.h"
+
+//
+// Floating point and 64-bit integer math helpers.
+//
+
+EXTERN_C REDHAWK_API UInt64 REDHAWK_CALLCONV RhpDbl2ULng(double val)
+{
+    return((UInt64)val);
+}
+
+#undef min
+#undef max
+#include <cmath>
+
+EXTERN_C REDHAWK_API float REDHAWK_CALLCONV RhpFltRem(float dividend, float divisor)
+{
+    //
+    // From the ECMA standard:
+    //
+    // If [divisor] is zero or [dividend] is infinity
+    //   the result is NaN.
+    // If [divisor] is infinity,
+    //   the result is [dividend] (negated for -infinity***).
+    //
+    // ***"negated for -infinity" has been removed from the spec
+    //
+
+    if (divisor==0 || !std::isfinite(dividend))
+    {
+        return -nanf(0);
+    }
+    else if (!std::isfinite(divisor) && !std::isnan(divisor))
+    {
+        return dividend;
+    }
+    // else...
+    return fmodf(dividend,divisor);
+}
+
+EXTERN_C REDHAWK_API double REDHAWK_CALLCONV RhpDblRem(double dividend, double divisor)
+{
+    //
+    // From the ECMA standard:
+    //
+    // If [divisor] is zero or [dividend] is infinity
+    //   the result is NaN.
+    // If [divisor] is infinity,
+    //   the result is [dividend] (negated for -infinity***).
+    //
+    // ***"negated for -infinity" has been removed from the spec
+    //
+    if (divisor==0 || !std::isfinite(dividend))
+    {
+        return -nan(0);
+    }
+    else if (!std::isfinite(divisor) && !std::isnan(divisor))
+    {
+        return dividend;
+    }
+    // else...
+    return(fmod(dividend,divisor));
+}
+
+EXTERN_C REDHAWK_API double REDHAWK_CALLCONV RhpDblRound(double value)
+{
+    return round(value);
+}
+
+EXTERN_C REDHAWK_API float REDHAWK_CALLCONV RhpFltRound(float value)
+{
+    return roundf(value);
+}
+
+#ifdef HOST_ARM
+EXTERN_C REDHAWK_API Int32 REDHAWK_CALLCONV RhpIDiv(Int32 i, Int32 j)
+{
+    ASSERT(j && "Divide by zero!");
+    return i / j;
+}
+
+EXTERN_C REDHAWK_API UInt32 REDHAWK_CALLCONV RhpUDiv(UInt32 i, UInt32 j)
+{
+    ASSERT(j && "Divide by zero!");
+    return i / j;
+}
+
+EXTERN_C REDHAWK_API Int64 REDHAWK_CALLCONV RhpLDiv(Int64 i, Int64 j)
+{
+    ASSERT(j && "Divide by zero!");
+    return i / j;
+}
+
+EXTERN_C REDHAWK_API UInt64 REDHAWK_CALLCONV RhpULDiv(UInt64 i, UInt64 j)
+{
+    ASSERT(j && "Divide by zero!");
+    return i / j;
+}
+
+EXTERN_C REDHAWK_API Int32 REDHAWK_CALLCONV RhpIMod(Int32 i, Int32 j)
+{
+    ASSERT(j && "Divide by zero!");
+    return i % j;
+}
+
+EXTERN_C REDHAWK_API UInt32 REDHAWK_CALLCONV RhpUMod(UInt32 i, UInt32 j)
+{
+    ASSERT(j && "Divide by zero!");
+    return i % j;
+}
+
+EXTERN_C REDHAWK_API Int64 REDHAWK_CALLCONV RhpLMod(Int64 i, Int64 j)
+{
+    ASSERT(j && "Divide by zero!");
+    return i % j;
+}
+
+EXTERN_C REDHAWK_API UInt64 REDHAWK_CALLCONV RhpULMod(UInt64 i, UInt64 j)
+{
+    ASSERT(j && "Divide by zero!");
+    return i % j;
+}
+
+EXTERN_C REDHAWK_API Int64 REDHAWK_CALLCONV RhpLMul(Int64 i, Int64 j)
+{
+    return i * j;
+}
+
+EXTERN_C REDHAWK_API UInt64 REDHAWK_CALLCONV RhpULMul(UInt64 i, UInt64 j)
+{
+    return i * j;
+}
+
+EXTERN_C REDHAWK_API UInt64 REDHAWK_CALLCONV RhpLRsz(UInt64 i, Int32 j)
+{
+    return i >> j;
+}
+
+EXTERN_C REDHAWK_API Int64 REDHAWK_CALLCONV RhpLRsh(Int64 i, Int32 j)
+{
+    return i >> j;
+}
+
+EXTERN_C REDHAWK_API Int64 REDHAWK_CALLCONV RhpLLsh(Int64 i, Int32 j)
+{
+    return i << j;
+}
+
+EXTERN_C REDHAWK_API Int64 REDHAWK_CALLCONV RhpDbl2Lng(double val)
+{
+    return (Int64)val;
+}
+
+EXTERN_C REDHAWK_API Int32 REDHAWK_CALLCONV RhpDbl2Int(double val)
+{
+    return (Int32)val;
+}
+
+EXTERN_C REDHAWK_API UInt32 REDHAWK_CALLCONV RhpDbl2UInt(double val)
+{
+    return (UInt32)val;
+}
+
+EXTERN_C REDHAWK_API double REDHAWK_CALLCONV RhpLng2Dbl(Int64 val)
+{
+    return (double)val;
+}
+
+EXTERN_C REDHAWK_API double REDHAWK_CALLCONV RhpULng2Dbl(UInt64 val)
+{
+    return (double)val;
+}
+
+#endif // HOST_ARM
diff --git a/src/coreclr/src/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/MiscHelpers.cpp
new file mode 100644
index 0000000000000..3b90f0f6d0f35
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/MiscHelpers.cpp
@@ -0,0 +1,541 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Miscellaneous unmanaged helpers called by managed code.
+//
+
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "holder.h"
+#include "Crst.h"
+#include "rhbinder.h"
+#include "RWLock.h"
+#include "RuntimeInstance.h"
+#include "regdisplay.h"
+#include "gcrhinterface.h"
+#include "varint.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "event.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "thread.inl"
+#include "gcrhinterface.h"
+#include "shash.h"
+#include "TypeManager.h"
+#include "eetype.h"
+#include "ObjectLayout.h"
+#include "slist.inl"
+#include "eetype.inl"
+#include "CommonMacros.inl"
+#include "volatile.h"
+#include "GCMemoryHelpers.h"
+#include "GCMemoryHelpers.inl"
+#include "yieldprocessornormalized.h"
+
+COOP_PINVOKE_HELPER(void, RhDebugBreak, ())
+{
+    PalDebugBreak();
+}
+
+// Busy spin for the given number of iterations.
+COOP_PINVOKE_HELPER(void, RhSpinWait, (Int32 iterations))
+{
+    YieldProcessorNormalizationInfo normalizationInfo;
+    YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, iterations);
+}
+
+// Yield the cpu to another thread ready to process, if one is available.
+EXTERN_C REDHAWK_API UInt32_BOOL __cdecl RhYield()
+{
+    // This must be called via p/invoke -- it's a wait operation and we don't want to block thread suspension on this.
+    ASSERT_MSG(!ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode(),
+        "You must p/invoke to RhYield");
+
+    return PalSwitchToThread();
+}
+
+EXTERN_C REDHAWK_API void __cdecl RhFlushProcessWriteBuffers()
+{
+    // This must be called via p/invoke -- it's a wait operation and we don't want to block thread suspension on this.
+    ASSERT_MSG(!ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode(),
+        "You must p/invoke to RhFlushProcessWriteBuffers");
+
+    PalFlushProcessWriteBuffers();
+}
+
+// Get the list of currently loaded Redhawk modules (as OS HMODULE handles). The caller provides a reference
+// to an array of pointer-sized elements and we return the total number of modules currently loaded (whether
+// that is less than, equal to or greater than the number of elements in the array). If there are more modules
+// loaded than the array will hold then the array is filled to capacity and the caller can tell further
+// modules are available based on the return count. It is also possible to call this method without an array,
+// in which case just the module count is returned (note that it's still possible for the module count to
+// increase between calls to this method).
+COOP_PINVOKE_HELPER(UInt32, RhGetLoadedOSModules, (Array * pResultArray))
+{
+    // Note that we depend on the fact that this is a COOP helper to make writing into an unpinned array safe.
+
+    // If a result array is passed then it should be an array type with pointer-sized components that are not
+    // GC-references.
+    ASSERT(!pResultArray || pResultArray->get_EEType()->IsArray());
+    ASSERT(!pResultArray || !pResultArray->get_EEType()->HasReferenceFields());
+    ASSERT(!pResultArray || pResultArray->get_EEType()->get_ComponentSize() == sizeof(void*));
+
+    UInt32 cResultArrayElements = pResultArray ? pResultArray->GetArrayLength() : 0;
+    HANDLE * pResultElements = pResultArray ? (HANDLE*)(pResultArray + 1) : NULL;
+
+    UInt32 cModules = 0;
+
+    ReaderWriterLock::ReadHolder read(&GetRuntimeInstance()->GetTypeManagerLock());
+
+    RuntimeInstance::OsModuleList *osModules = GetRuntimeInstance()->GetOsModuleList();
+    
+    for (RuntimeInstance::OsModuleList::Iterator iter = osModules->Begin(); iter != osModules->End(); iter++)
+    {
+        if (pResultArray && (cModules < cResultArrayElements))
+            pResultElements[cModules] = iter->m_osModule;
+        cModules++;
+    }
+
+    return cModules;
+}
+
+COOP_PINVOKE_HELPER(HANDLE, RhGetOSModuleFromPointer, (PTR_VOID pPointerVal))
+{
+    ICodeManager * pCodeManager = GetRuntimeInstance()->FindCodeManagerByAddress(pPointerVal);
+
+    if (pCodeManager != NULL)
+        return (HANDLE)pCodeManager->GetOsModuleHandle();
+
+    return NULL;
+}
+
+COOP_PINVOKE_HELPER(HANDLE, RhGetOSModuleFromEEType, (EEType * pEEType))
+{
+    return pEEType->GetTypeManagerPtr()->AsTypeManager()->GetOsModuleHandle();
+}
+
+COOP_PINVOKE_HELPER(TypeManagerHandle, RhGetModuleFromEEType, (EEType * pEEType))
+{
+    return *pEEType->GetTypeManagerPtr();
+}
+
+COOP_PINVOKE_HELPER(Boolean, RhFindBlob, (TypeManagerHandle *pTypeManagerHandle, UInt32 blobId, UInt8 ** ppbBlob, UInt32 * pcbBlob))
+{
+    TypeManagerHandle typeManagerHandle = *pTypeManagerHandle;
+
+    ReadyToRunSectionType section =
+        (ReadyToRunSectionType)((UInt32)ReadyToRunSectionType::ReadonlyBlobRegionStart + blobId);
+    ASSERT(section <= ReadyToRunSectionType::ReadonlyBlobRegionEnd);
+
+    TypeManager* pModule = typeManagerHandle.AsTypeManager();
+
+    int length;
+    void* pBlob;
+    pBlob = pModule->GetModuleSection(section, &length);
+
+    *ppbBlob = (UInt8*)pBlob;
+    *pcbBlob = (UInt32)length;
+
+    return pBlob != NULL;
+}
+
+// This helper is not called directly but is used by the implementation of RhpCheckCctor to locate the
+// CheckStaticClassConstruction classlib callback. It must not trigger a GC. The return address passed points
+// to code in the caller's module and can be used in the lookup.
+COOP_PINVOKE_HELPER(void *, GetClasslibCCtorCheck, (void * pReturnAddress))
+{
+    // Locate the calling module from the context structure address (which is in writable memory in the
+    // module image).
+    ICodeManager * pCodeManager = GetRuntimeInstance()->FindCodeManagerByAddress(pReturnAddress);
+    ASSERT(pCodeManager);
+
+    // Lookup the callback registered by the classlib.
+    void * pCallback = pCodeManager->GetClasslibFunction(ClasslibFunctionId::CheckStaticClassConstruction);
+
+    // We have no fallback path if we got here but the classlib doesn't implement the callback.
+    if (pCallback == NULL)
+        RhFailFast();
+
+    return pCallback;
+}
+
+COOP_PINVOKE_HELPER(void *, RhGetTargetOfUnboxingAndInstantiatingStub, (void * pUnboxStub))
+{
+    return GetRuntimeInstance()->GetTargetOfUnboxingAndInstantiatingStub(pUnboxStub);
+}
+
+#if TARGET_ARM
+//*****************************************************************************
+//  Extract the 16-bit immediate from ARM Thumb2 Instruction (format T2_N)
+//*****************************************************************************
+static FORCEINLINE UInt16 GetThumb2Imm16(UInt16 * p)
+{
+    return ((p[0] << 12) & 0xf000) |
+        ((p[0] << 1) & 0x0800) |
+        ((p[1] >> 4) & 0x0700) |
+        ((p[1] >> 0) & 0x00ff);
+}
+
+//*****************************************************************************
+//  Extract the 32-bit immediate from movw/movt sequence
+//*****************************************************************************
+inline UInt32 GetThumb2Mov32(UInt16 * p)
+{
+    // Make sure we are decoding movw/movt sequence
+    ASSERT((*(p + 0) & 0xFBF0) == 0xF240);
+    ASSERT((*(p + 2) & 0xFBF0) == 0xF2C0);
+
+    return (UInt32)GetThumb2Imm16(p) + ((UInt32)GetThumb2Imm16(p + 2) << 16);
+}
+
+//*****************************************************************************
+//  Extract the 24-bit distance from a B/BL instruction
+//*****************************************************************************
+inline Int32 GetThumb2BlRel24(UInt16 * p)
+{
+    UInt16 Opcode0 = p[0];
+    UInt16 Opcode1 = p[1];
+
+    UInt32 S = Opcode0 >> 10;
+    UInt32 J2 = Opcode1 >> 11;
+    UInt32 J1 = Opcode1 >> 13;
+
+    Int32 ret =
+        ((S << 24) & 0x1000000) |
+        (((J1 ^ S ^ 1) << 23) & 0x0800000) |
+        (((J2 ^ S ^ 1) << 22) & 0x0400000) |
+        ((Opcode0 << 12) & 0x03FF000) |
+        ((Opcode1 << 1) & 0x0000FFE);
+
+    // Sign-extend and return
+    return (ret << 7) >> 7;
+}
+#endif // TARGET_ARM
+
+// Given a pointer to code, find out if this points to an import stub
+// or unboxing stub, and if so, return the address that stub jumps to
+COOP_PINVOKE_HELPER(UInt8 *, RhGetCodeTarget, (UInt8 * pCodeOrg))
+{
+    bool unboxingStub = false;
+
+    // First, check the unboxing stubs regions known by the runtime (if any exist)
+    if (!GetRuntimeInstance()->IsUnboxingStub(pCodeOrg))
+    {
+        return pCodeOrg;
+    }
+
+#ifdef TARGET_AMD64
+    UInt8 * pCode = pCodeOrg;
+
+    // is this "add rcx/rdi,8"?
+    if (pCode[0] == 0x48 &&
+        pCode[1] == 0x83 &&
+#ifdef UNIX_AMD64_ABI
+        pCode[2] == 0xc7 &&
+#else
+        pCode[2] == 0xc1 &&
+#endif
+        pCode[3] == 0x08)
+    {
+        // unboxing sequence
+        unboxingStub = true;
+        pCode += 4;
+    }
+    // is this an indirect jump?
+    if (pCode[0] == 0xff && pCode[1] == 0x25)
+    {
+        // normal import stub - dist to IAT cell is relative to the point *after* the instruction
+        Int32 distToIatCell = *(Int32 *)&pCode[2];
+        UInt8 ** pIatCell = (UInt8 **)(pCode + 6 + distToIatCell);
+        return *pIatCell;
+    }
+    // is this an unboxing stub followed by a relative jump?
+    else if (unboxingStub && pCode[0] == 0xe9)
+    {
+        // relative jump - dist is relative to the point *after* the instruction
+        Int32 distToTarget = *(Int32 *)&pCode[1];
+        UInt8 * target = pCode + 5 + distToTarget;
+        return target;
+    }
+
+#elif TARGET_X86
+    UInt8 * pCode = pCodeOrg;
+
+    // is this "add ecx,4"?
+    if (pCode[0] == 0x83 && pCode[1] == 0xc1 && pCode[2] == 0x04)
+    {
+        // unboxing sequence
+        unboxingStub = true;
+        pCode += 3;
+    }
+    // is this an indirect jump?
+    if (pCode[0] == 0xff && pCode[1] == 0x25)
+    {
+        // normal import stub - address of IAT follows
+        UInt8 **pIatCell = *(UInt8 ***)&pCode[2];
+        return *pIatCell;
+    }
+    // is this an unboxing stub followed by a relative jump?
+    else if (unboxingStub && pCode[0] == 0xe9)
+    {
+        // relative jump - dist is relative to the point *after* the instruction
+        Int32 distToTarget = *(Int32 *)&pCode[1];
+        UInt8 * pTarget = pCode + 5 + distToTarget;
+        return pTarget;
+    }
+
+#elif TARGET_ARM
+    UInt16 * pCode = (UInt16 *)((size_t)pCodeOrg & ~THUMB_CODE);
+    // is this "adds r0,4"?
+    if (pCode[0] == 0x3004)
+    {
+        // unboxing sequence
+        unboxingStub = true;
+        pCode += 1;
+    }
+    // is this movw r12,#imm16; movt r12,#imm16; ldr pc,[r12]
+    // or movw r12,#imm16; movt r12,#imm16; bx r12
+    if  ((pCode[0] & 0xfbf0) == 0xf240 && (pCode[1] & 0x0f00) == 0x0c00
+        && (pCode[2] & 0xfbf0) == 0xf2c0 && (pCode[3] & 0x0f00) == 0x0c00
+        && ((pCode[4] == 0xf8dc && pCode[5] == 0xf000) || pCode[4] == 0x4760))
+    {
+        if (pCode[4] == 0xf8dc && pCode[5] == 0xf000)
+        {
+            // ldr pc,[r12]
+            UInt8 **pIatCell = (UInt8 **)GetThumb2Mov32(pCode);
+            return *pIatCell;
+        }
+        else if (pCode[4] == 0x4760)
+        {
+            // bx r12
+            return (UInt8 *)GetThumb2Mov32(pCode);
+        }
+    }
+    // is this an unboxing stub followed by a relative jump?
+    else if (unboxingStub && (pCode[0] & 0xf800) == 0xf000 && (pCode[1] & 0xd000) == 0x9000)
+    {
+        Int32 distToTarget = GetThumb2BlRel24(pCode);
+        UInt8 * pTarget = (UInt8 *)(pCode + 2) + distToTarget + THUMB_CODE;
+        return (UInt8 *)pTarget;
+    }
+
+#elif TARGET_ARM64
+    UInt32 * pCode = (UInt32 *)pCodeOrg;
+    // is this "add x0,x0,#8"?
+    if (pCode[0] == 0x91002000)
+    {
+        // unboxing sequence
+        unboxingStub = true;
+        pCode++;
+    }
+    // is this an indirect jump?
+    // adrp xip0,#imm21; ldr xip0,[xip0,#imm12]; br xip0
+    if ((pCode[0] & 0x9f00001f) == 0x90000010 &&
+        (pCode[1] & 0xffc003ff) == 0xf9400210 &&
+        pCode[2] == 0xd61f0200)
+    {
+        // normal import stub - dist to IAT cell is relative to (PC & ~0xfff)
+        // adrp: imm = SignExtend(immhi:immlo:Zeros(12), 64);
+        Int64 distToIatCell = (((((Int64)pCode[0] & ~0x1f) << 40) >> 31) | ((pCode[0] >> 17) & 0x3000));
+        // ldr: offset = LSL(ZeroExtend(imm12, 64), 3);
+        distToIatCell += (pCode[1] >> 7) & 0x7ff8;
+        UInt8 ** pIatCell = (UInt8 **)(((Int64)pCode & ~0xfff) + distToIatCell);
+        return *pIatCell;
+    }
+    // is this an unboxing stub followed by a relative jump?
+    else if (unboxingStub && (pCode[0] >> 26) == 0x5)
+    {
+        // relative jump - dist is relative to the instruction
+        // offset = SignExtend(imm26:'00', 64);
+        Int64 distToTarget = ((Int64)pCode[0] << 38) >> 36;
+        return (UInt8 *)pCode + distToTarget;
+    }
+#else
+    UNREFERENCED_PARAMETER(unboxingStub);
+    PORTABILITY_ASSERT("RhGetCodeTarget");
+#endif
+
+    return pCodeOrg;
+}
+
+//
+// Return true if the array slice is valid
+//
+FORCEINLINE bool CheckArraySlice(Array * pArray, Int32 index, Int32 length)
+{
+    Int32 arrayLength = pArray->GetArrayLength();
+
+    return (0 <= index) && (index <= arrayLength) &&
+           (0 <= length) && (length <= arrayLength) &&
+           (length <= arrayLength - index);
+}
+
+//
+// This function handles all cases of Array.Copy that do not require conversions or casting. It returns false if the copy cannot be performed, leaving
+// the handling of the complex cases or throwing appropriate exception to the higher level framework.
+//
+COOP_PINVOKE_HELPER(Boolean, RhpArrayCopy, (Array * pSourceArray, Int32 sourceIndex, Array * pDestinationArray, Int32 destinationIndex, Int32 length))
+{
+    if (pSourceArray == NULL || pDestinationArray == NULL)
+        return false;
+
+    EEType* pArrayType = pSourceArray->get_EEType();
+    EEType* pDestinationArrayType = pDestinationArray->get_EEType();
+    if (pArrayType != pDestinationArrayType)
+    {
+        if (!pArrayType->IsEquivalentTo(pDestinationArrayType))
+           return false;
+    }
+
+    size_t componentSize = pArrayType->get_ComponentSize();
+    if (componentSize == 0) // Not an array
+        return false;
+
+    if (!CheckArraySlice(pSourceArray, sourceIndex, length))
+        return false;
+
+    if (!CheckArraySlice(pDestinationArray, destinationIndex, length))
+        return false;
+
+    if (length == 0)
+        return true;
+
+    UInt8 * pSourceData = (UInt8 *)pSourceArray->GetArrayData() + sourceIndex * componentSize;
+    UInt8 * pDestinationData = (UInt8 *)pDestinationArray->GetArrayData() + destinationIndex * componentSize;
+    size_t size = length * componentSize;
+
+    if (pArrayType->HasReferenceFields())
+    {
+        if (pDestinationData <= pSourceData || pSourceData + size <= pDestinationData)
+            InlineForwardGCSafeCopy(pDestinationData, pSourceData, size);
+        else
+            InlineBackwardGCSafeCopy(pDestinationData, pSourceData, size);
+
+        InlinedBulkWriteBarrier(pDestinationData, size);
+    }
+    else
+    {
+        memmove(pDestinationData, pSourceData, size);
+    }
+
+    return true;
+}
+
+//
+// This function handles all cases of Array.Clear that do not require conversions. It returns false if the operation cannot be performed, leaving
+// the handling of the complex cases or throwing appropriate exception to the higher level framework. It is only allowed to return false for illegal 
+// calls as the BCL side has fallback for "complex cases" only.
+//
+COOP_PINVOKE_HELPER(Boolean, RhpArrayClear, (Array * pArray, Int32 index, Int32 length))
+{
+    if (pArray == NULL)
+        return false;
+
+    EEType* pArrayType = pArray->get_EEType();
+
+    size_t componentSize = pArrayType->get_ComponentSize();
+    if (componentSize == 0) // Not an array
+        return false;
+
+    if (!CheckArraySlice(pArray, index, length))
+        return false;
+
+    if (length == 0)
+        return true;
+
+    InlineGCSafeFillMemory((UInt8 *)pArray->GetArrayData() + index * componentSize, length * componentSize, 0);
+
+    return true;
+}
+
+// Get the universal transition thunk. If the universal transition stub is called through
+// the normal PE static linkage model, a jump stub would be used which may interfere with
+// the custom calling convention of the universal transition thunk. So instead, a special
+// api just for getting the thunk address is needed.
+// TODO: On ARM this may still result in a jump stub that trashes R12. Determine if anything
+//       needs to be done about that when we implement the stub for ARM.
+extern "C" void RhpUniversalTransition();
+COOP_PINVOKE_HELPER(void*, RhGetUniversalTransitionThunk, ())
+{
+    return (void*)RhpUniversalTransition;
+}
+
+extern CrstStatic g_CastCacheLock;
+
+EXTERN_C REDHAWK_API void __cdecl RhpAcquireCastCacheLock()
+{
+    g_CastCacheLock.Enter();
+}
+
+EXTERN_C REDHAWK_API void __cdecl RhpReleaseCastCacheLock()
+{
+    g_CastCacheLock.Leave();
+}
+
+extern CrstStatic g_ThunkPoolLock;
+
+EXTERN_C REDHAWK_API void __cdecl RhpAcquireThunkPoolLock()
+{
+    g_ThunkPoolLock.Enter();
+}
+
+EXTERN_C REDHAWK_API void __cdecl RhpReleaseThunkPoolLock()
+{
+    g_ThunkPoolLock.Leave();
+}
+
+EXTERN_C Int32 __cdecl RhpCalculateStackTraceWorker(void* pOutputBuffer, UInt32 outputBufferLength);
+
+EXTERN_C REDHAWK_API Int32 __cdecl RhpGetCurrentThreadStackTrace(void* pOutputBuffer, UInt32 outputBufferLength)
+{
+    // This must be called via p/invoke rather than RuntimeImport to make the stack crawlable.
+
+    ThreadStore::GetCurrentThread()->SetupHackPInvokeTunnel();
+
+    return RhpCalculateStackTraceWorker(pOutputBuffer, outputBufferLength);
+}
+
+COOP_PINVOKE_HELPER(void*, RhpRegisterFrozenSegment, (void* pSegmentStart, size_t length))
+{
+    return RedhawkGCInterface::RegisterFrozenSegment(pSegmentStart, length);
+}
+
+COOP_PINVOKE_HELPER(void, RhpUnregisterFrozenSegment, (void* pSegmentHandle))
+{
+    RedhawkGCInterface::UnregisterFrozenSegment((GcSegmentHandle)pSegmentHandle);
+}
+
+COOP_PINVOKE_HELPER(void*, RhpGetModuleSection, (TypeManagerHandle *pModule, Int32 headerId, Int32* length))
+{
+    return pModule->AsTypeManager()->GetModuleSection((ReadyToRunSectionType)headerId, length);
+}
+
+COOP_PINVOKE_HELPER(void, RhGetCurrentThreadStackBounds, (PTR_VOID * ppStackLow, PTR_VOID * ppStackHigh))
+{
+    ThreadStore::GetCurrentThread()->GetStackBounds(ppStackLow, ppStackHigh);
+}
+
+#ifdef TARGET_UNIX
+
+// Function to call when a thread is detached from the runtime
+ThreadExitCallback g_threadExitCallback;
+
+COOP_PINVOKE_HELPER(void, RhSetThreadExitCallback, (void * pCallback))
+{
+    g_threadExitCallback = (ThreadExitCallback)pCallback;
+}
+
+#endif // TARGET_UNIX
+
+COOP_PINVOKE_HELPER(Int32, RhGetProcessCpuCount, ())
+{
+    return PalGetProcessCpuCount();
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/ObjectLayout.cpp b/src/coreclr/src/nativeaot/Runtime/ObjectLayout.cpp
new file mode 100644
index 0000000000000..ce3328c52c9d4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/ObjectLayout.cpp
@@ -0,0 +1,69 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Implementations of functions dealing with object layout related types.
+//
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "rhassert.h"
+#include "RedhawkWarnings.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "TargetPtrs.h"
+#include "eetype.h"
+#include "ObjectLayout.h"
+
+#ifndef DACCESS_COMPILE
+void Object::InitEEType(EEType * pEEType)
+{
+    ASSERT(NULL == m_pEEType);
+    m_pEEType = pEEType;
+}
+#endif
+
+UInt32 Array::GetArrayLength()
+{
+    return m_Length;
+}
+
+void* Array::GetArrayData()
+{
+    UInt8* pData = (UInt8*)this;
+    pData += (get_EEType()->get_BaseSize() - sizeof(ObjHeader));
+    return pData;
+}
+
+#ifndef DACCESS_COMPILE
+void Array::InitArrayLength(UInt32 length)
+{
+    m_Length = length;
+}
+
+void ObjHeader::SetBit(UInt32 uBit)
+{
+    PalInterlockedOr(&m_uSyncBlockValue, uBit);
+}
+
+void ObjHeader::ClrBit(UInt32 uBit)
+{
+    PalInterlockedAnd(&m_uSyncBlockValue, ~uBit);
+}
+
+size_t Object::GetSize()
+{
+    EEType * pEEType = get_EEType();
+
+    // strings have component size2, all other non-arrays should have 0
+    ASSERT(( pEEType->get_ComponentSize() <= 2) || pEEType->IsArray());
+
+    size_t s = pEEType->get_BaseSize();
+    UInt16 componentSize = pEEType->get_ComponentSize();
+    if (componentSize > 0)
+        s += ((Array*)this)->GetArrayLength() * componentSize;
+    return s;
+}
+
+#endif
diff --git a/src/coreclr/src/nativeaot/Runtime/ObjectLayout.h b/src/coreclr/src/nativeaot/Runtime/ObjectLayout.h
new file mode 100644
index 0000000000000..c3924a2f2c8c5
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/ObjectLayout.h
@@ -0,0 +1,129 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Low-level types describing GC object layouts.
+//
+
+// Bits stolen from the sync block index that the GC/HandleTable knows about (currently these are at the same
+// positions as the mainline runtime but we can change this below when it becomes apparent how Redhawk will
+// handle sync blocks).
+#define BIT_SBLK_GC_RESERVE                 0x20000000
+#define BIT_SBLK_FINALIZER_RUN              0x40000000
+
+// The sync block index header (small structure that immediately precedes every object in the GC heap). Only
+// the GC uses this so far, and only to store a couple of bits of information.
+class ObjHeader
+{
+private:
+#if defined(HOST_64BIT)
+    UInt32   m_uAlignpad;
+#endif // HOST_64BIT
+    UInt32   m_uSyncBlockValue;
+
+public:
+    UInt32 GetBits() { return m_uSyncBlockValue; }
+    void SetBit(UInt32 uBit);
+    void ClrBit(UInt32 uBit);
+    void SetGCBit() { m_uSyncBlockValue |= BIT_SBLK_GC_RESERVE; }
+    void ClrGCBit() { m_uSyncBlockValue &= ~BIT_SBLK_GC_RESERVE; }
+};
+
+//-------------------------------------------------------------------------------------------------
+static UIntNative const SYNC_BLOCK_SKEW  = sizeof(void *);
+
+class EEType;
+typedef DPTR(class EEType) PTR_EEType;
+class MethodTable;
+
+//-------------------------------------------------------------------------------------------------
+class Object
+{
+    friend class AsmOffsets;
+
+    PTR_EEType  m_pEEType;
+public:  
+    EEType * get_EEType() const
+        { return m_pEEType; }
+    EEType * get_SafeEEType() const
+        { return dac_cast<PTR_EEType>((dac_cast<TADDR>(m_pEEType)) & ~((UIntNative)3)); }
+    ObjHeader * GetHeader() { return dac_cast<DPTR(ObjHeader)>(dac_cast<TADDR>(this) - SYNC_BLOCK_SKEW); }
+#ifndef DACCESS_COMPILE
+    void set_EEType(EEType * pEEType)
+        { m_pEEType = pEEType; }
+    void InitEEType(EEType * pEEType);
+
+    size_t GetSize();
+#endif
+
+    //
+    // Adapter methods for GC code so that GC and runtime code can use the same type.  
+    // These methods are deprecated -- only use from existing GC code.
+    //
+    MethodTable * RawGetMethodTable() const
+    {
+        return (MethodTable*)get_EEType();
+    }
+    MethodTable * GetGCSafeMethodTable() const
+    {
+        return (MethodTable *)get_SafeEEType();
+    }
+    void RawSetMethodTable(MethodTable * pMT)
+    {
+        m_pEEType = PTR_EEType((EEType *)pMT);
+    }
+    ////// End adaptor methods 
+};
+typedef DPTR(Object) PTR_Object;
+typedef DPTR(PTR_Object) PTR_PTR_Object;
+
+//-------------------------------------------------------------------------------------------------
+static UIntNative const MIN_OBJECT_SIZE  = (2 * sizeof(void*)) + sizeof(ObjHeader);
+
+//-------------------------------------------------------------------------------------------------
+static UIntNative const REFERENCE_SIZE   = sizeof(Object *);
+
+//-------------------------------------------------------------------------------------------------
+class Array : public Object
+{
+    friend class ArrayBase;
+    friend class AsmOffsets;
+
+    UInt32       m_Length;
+#if defined(HOST_64BIT)
+    UInt32       m_uAlignpad;
+#endif // HOST_64BIT
+public:  
+    UInt32 GetArrayLength();
+    void InitArrayLength(UInt32 length);
+    void* GetArrayData();
+};
+typedef DPTR(Array) PTR_Array;
+
+//-------------------------------------------------------------------------------------------------
+class String : public Object
+{
+    friend class AsmOffsets;
+    friend class StringConstants;
+
+    UInt32       m_Length;
+    UInt16       m_FirstChar;
+};
+typedef DPTR(String) PTR_String;
+
+//-------------------------------------------------------------------------------------------------
+class StringConstants
+{
+public:
+    static UIntNative const ComponentSize = sizeof(((String*)0)->m_FirstChar);
+    static UIntNative const BaseSize = sizeof(ObjHeader) + offsetof(String, m_FirstChar) + ComponentSize;
+};
+
+//-------------------------------------------------------------------------------------------------
+static UIntNative const STRING_COMPONENT_SIZE = StringConstants::ComponentSize;
+
+//-------------------------------------------------------------------------------------------------
+static UIntNative const STRING_BASE_SIZE = StringConstants::BaseSize;
+
+//-------------------------------------------------------------------------------------------------
+static UIntNative const MAX_STRING_LENGTH = 0x3FFFFFDF;
diff --git a/src/coreclr/src/nativeaot/Runtime/OptionalFieldsRuntime.cpp b/src/coreclr/src/nativeaot/Runtime/OptionalFieldsRuntime.cpp
new file mode 100644
index 0000000000000..d8b10a35fea2a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/OptionalFieldsRuntime.cpp
@@ -0,0 +1,71 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Implementations of methods of OptionalFields which are used only at runtime (i.e. reading field values).
+//
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "rhbinder.h"
+#include "eetype.h"
+#include "ObjectLayout.h"
+#include "varint.h"
+
+// Reads the field type from the current byte of the stream and indicates whether this represents the last
+// field.
+/*static*/ OptionalFieldTag OptionalFields::DecodeFieldTag(PTR_UInt8 * ppFields, bool *pfLastField)
+{
+    UInt8 tagByte;
+    tagByte = **ppFields;
+
+    // The last field has the most significant bit of the byte set.
+    *pfLastField = (tagByte & 0x80) != 0;
+
+    // The remaining 7 bits encode the field type.
+    OptionalFieldTag eTag = (OptionalFieldTag)(tagByte & 0x7f);
+
+    // Advance the pointer past the header.
+    (*ppFields)++;
+
+    return eTag;
+}
+
+// Reads a field value (or the basis for an out-of-line record delta) starting from the first byte after the
+// field header. Advances the field location to the start of the next field.
+UInt32 OptionalFields::DecodeFieldValue(PTR_UInt8 * ppFields)
+{
+    // VarInt is used to encode the field value (and updates the field pointer in doing so).
+    return VarInt::ReadUnsigned(*ppFields);
+}
+
+/*static*/ UInt32 OptionalFields::GetInlineField(OptionalFieldTag eTag, UInt32 uiDefaultValue)
+{       
+    // Point at start of encoding stream.
+    PTR_UInt8 pFields = dac_cast<PTR_UInt8>(this);
+
+    for (;;)
+    {
+        // Read field tag, an indication of whether this is the last field and the field value (we always read
+        // the value, even if the tag is not a match because decoding the value advances the field pointer to
+        // the next field).
+        bool fLastField;
+        OptionalFieldTag eCurrentTag = DecodeFieldTag(&pFields, &fLastField);
+        UInt32 uiCurrentValue = DecodeFieldValue(&pFields);
+
+        // If we found a tag match return the current value.
+        if (eCurrentTag == eTag)
+            return uiCurrentValue;
+
+        // If this was the last field we're done as well.
+        if (fLastField)
+            break;
+    }
+
+    // Reached end of stream without getting a match. Field is not present so return default value.
+    return uiDefaultValue;
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/src/nativeaot/Runtime/PalRedhawk.h
new file mode 100644
index 0000000000000..7789c05346bfc
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/PalRedhawk.h
@@ -0,0 +1,857 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Provides declarations for external resources consumed by Redhawk. This comprises functionality
+// normally exported from Win32 libraries such as KERNEL32 and MSVCRT. When hosted on Win32 calls to these
+// functions become simple pass throughs to the native implementation via export forwarding entries in a PAL
+// (Platform Abstraction Layer) library. On other platforms the PAL library has actual code to emulate the
+// functionality of these same APIs.
+//
+// In order to make it both obvious and intentional where Redhawk consumes an external API, such functions are
+// decorated with an 'Pal' prefix. Ideally the associated supporting types, constants etc. would be
+// similarly isolated from their concrete Win32 definitions, making the extent of platform dependence within
+// the core explicit. For now that is too big a work item and we'll settle for manually restricting the use of
+// external header files to within this header.
+//
+
+#include <sal.h>
+#include <stdarg.h>
+#include "gcenv.structs.h"
+
+#ifndef PAL_REDHAWK_INCLUDED
+#define PAL_REDHAWK_INCLUDED
+
+/* Adapted from intrin.h - For compatibility with <winnt.h>, some intrinsics are __cdecl except on x64 */
+#if defined (_M_X64)
+#define __PN__MACHINECALL_CDECL_OR_DEFAULT
+#else
+#define __PN__MACHINECALL_CDECL_OR_DEFAULT __cdecl
+#endif
+
+#ifndef _INC_WINDOWS
+//#ifndef DACCESS_COMPILE
+
+// There are some fairly primitive type definitions below but don't pull them into the rest of Redhawk unless
+// we have to (in which case these definitions will move to CommonTypes.h).
+typedef WCHAR *             LPWSTR;
+typedef const WCHAR *       LPCWSTR;
+typedef char *              LPSTR;
+typedef const char *        LPCSTR;
+typedef void *              HINSTANCE;
+
+typedef void *              LPSECURITY_ATTRIBUTES;
+typedef void *              LPOVERLAPPED;
+
+#ifndef __GCENV_BASE_INCLUDED__
+#define CALLBACK            __stdcall
+#define WINAPI              __stdcall
+#define WINBASEAPI          __declspec(dllimport)
+#endif //!__GCENV_BASE_INCLUDED__
+
+#ifdef TARGET_UNIX
+#define DIRECTORY_SEPARATOR_CHAR '/'
+#else // TARGET_UNIX
+#define DIRECTORY_SEPARATOR_CHAR '\\'
+#endif // TARGET_UNIX
+
+typedef union _LARGE_INTEGER {
+    struct {
+#if BIGENDIAN
+        int32_t HighPart;
+        uint32_t LowPart;
+#else
+        uint32_t LowPart;
+        int32_t HighPart;
+#endif
+    } u;
+    int64_t QuadPart;
+} LARGE_INTEGER, *PLARGE_INTEGER;
+
+typedef struct _GUID {
+    uint32_t Data1;
+    uint16_t Data2;
+    uint16_t Data3;
+    uint8_t Data4[8];
+} GUID;
+
+#define DECLARE_HANDLE(_name) typedef HANDLE _name
+
+struct SYSTEM_INFO
+{
+    union
+    {
+        UInt32  dwOemId;
+        struct {
+            UInt16 wProcessorArchitecture;
+            UInt16 wReserved;
+        } DUMMYSTRUCTNAME;
+    } DUMMYUNIONNAME;
+    UInt32      dwPageSize;
+    void *      lpMinimumApplicationAddress;
+    void *      lpMaximumApplicationAddress;
+    UIntNative  dwActiveProcessorMask;
+    UInt32      dwNumberOfProcessors;
+    UInt32      dwProcessorType;
+    UInt32      dwAllocationGranularity;
+    UInt16      wProcessorLevel;
+    UInt16      wProcessorRevision;
+};
+
+// defined in gcrhenv.cpp
+bool __SwitchToThread(uint32_t dwSleepMSec, uint32_t dwSwitchCount);
+
+struct FILETIME
+{
+    UInt32 dwLowDateTime;
+    UInt32 dwHighDateTime;
+};
+
+enum MEMORY_RESOURCE_NOTIFICATION_TYPE
+{
+    LowMemoryResourceNotification,
+    HighMemoryResourceNotification
+};
+
+enum LOGICAL_PROCESSOR_RELATIONSHIP
+{
+    RelationProcessorCore,
+    RelationNumaNode,
+    RelationCache,
+    RelationProcessorPackage
+};
+
+#define LTP_PC_SMT 0x1
+
+enum PROCESSOR_CACHE_TYPE
+{
+    CacheUnified,
+    CacheInstruction,
+    CacheData,
+    CacheTrace
+};
+
+struct CACHE_DESCRIPTOR
+{
+    UInt8   Level;
+    UInt8   Associativity;
+    UInt16  LineSize;
+    UInt32  Size;
+    PROCESSOR_CACHE_TYPE Type;
+};
+
+struct SYSTEM_LOGICAL_PROCESSOR_INFORMATION
+{
+    UIntNative   ProcessorMask;
+    LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
+    union
+    {
+        struct
+        {
+            UInt8  Flags;
+        } ProcessorCore;
+        struct
+        {
+            UInt32 NodeNumber;
+        } NumaNode;
+        CACHE_DESCRIPTOR Cache;
+        UInt64  Reserved[2];
+    };
+};
+
+#ifdef HOST_AMD64
+
+typedef struct DECLSPEC_ALIGN(16) _XSAVE_FORMAT {
+    UInt16  ControlWord;
+    UInt16  StatusWord;
+    UInt8   TagWord;
+    UInt8   Reserved1;
+    UInt16  ErrorOpcode;
+    UInt32  ErrorOffset;
+    UInt16  ErrorSelector;
+    UInt16  Reserved2;
+    UInt32  DataOffset;
+    UInt16  DataSelector;
+    UInt16  Reserved3;
+    UInt32  MxCsr;
+    UInt32  MxCsr_Mask;
+    Fp128   FloatRegisters[8];
+#if defined(HOST_64BIT)
+    Fp128   XmmRegisters[16];
+    UInt8   Reserved4[96];
+#else
+    Fp128   XmmRegisters[8];
+    UInt8   Reserved4[220];
+    UInt32  Cr0NpxState;
+#endif
+} XSAVE_FORMAT, *PXSAVE_FORMAT;
+
+
+typedef XSAVE_FORMAT XMM_SAVE_AREA32, *PXMM_SAVE_AREA32;
+
+typedef struct DECLSPEC_ALIGN(16) _CONTEXT {
+    UInt64 P1Home;
+    UInt64 P2Home;
+    UInt64 P3Home;
+    UInt64 P4Home;
+    UInt64 P5Home;
+    UInt64 P6Home;
+    UInt32 ContextFlags;
+    UInt32 MxCsr;
+    UInt16 SegCs;
+    UInt16 SegDs;
+    UInt16 SegEs;
+    UInt16 SegFs;
+    UInt16 SegGs;
+    UInt16 SegSs;
+    UInt32 EFlags;
+    UInt64 Dr0;
+    UInt64 Dr1;
+    UInt64 Dr2;
+    UInt64 Dr3;
+    UInt64 Dr6;
+    UInt64 Dr7;
+    UInt64 Rax;
+    UInt64 Rcx;
+    UInt64 Rdx;
+    UInt64 Rbx;
+    UInt64 Rsp;
+    UInt64 Rbp;
+    UInt64 Rsi;
+    UInt64 Rdi;
+    UInt64 R8;
+    UInt64 R9;
+    UInt64 R10;
+    UInt64 R11;
+    UInt64 R12;
+    UInt64 R13;
+    UInt64 R14;
+    UInt64 R15;
+    UInt64 Rip;
+    union {
+        XMM_SAVE_AREA32 FltSave;
+        struct {
+            Fp128 Header[2];
+            Fp128 Legacy[8];
+            Fp128 Xmm0;
+            Fp128 Xmm1;
+            Fp128 Xmm2;
+            Fp128 Xmm3;
+            Fp128 Xmm4;
+            Fp128 Xmm5;
+            Fp128 Xmm6;
+            Fp128 Xmm7;
+            Fp128 Xmm8;
+            Fp128 Xmm9;
+            Fp128 Xmm10;
+            Fp128 Xmm11;
+            Fp128 Xmm12;
+            Fp128 Xmm13;
+            Fp128 Xmm14;
+            Fp128 Xmm15;
+        } DUMMYSTRUCTNAME;
+    } DUMMYUNIONNAME;
+    Fp128 VectorRegister[26];
+    UInt64 VectorControl;
+    UInt64 DebugControl;
+    UInt64 LastBranchToRip;
+    UInt64 LastBranchFromRip;
+    UInt64 LastExceptionToRip;
+    UInt64 LastExceptionFromRip;
+
+    void SetIp(UIntNative ip) { Rip = ip; }
+    void SetSp(UIntNative sp) { Rsp = sp; }
+#ifdef UNIX_AMD64_ABI
+    void SetArg0Reg(UIntNative val) { Rdi = val; }
+    void SetArg1Reg(UIntNative val) { Rsi = val; }
+#else // UNIX_AMD64_ABI
+    void SetArg0Reg(UIntNative val) { Rcx = val; }
+    void SetArg1Reg(UIntNative val) { Rdx = val; }
+#endif // UNIX_AMD64_ABI
+    UIntNative GetIp() { return Rip; }
+    UIntNative GetSp() { return Rsp; }
+} CONTEXT, *PCONTEXT;
+#elif defined(HOST_ARM)
+
+#define ARM_MAX_BREAKPOINTS     8
+#define ARM_MAX_WATCHPOINTS     1
+
+typedef struct DECLSPEC_ALIGN(8) _CONTEXT {
+    UInt32 ContextFlags;
+    UInt32 R0;
+    UInt32 R1;
+    UInt32 R2;
+    UInt32 R3;
+    UInt32 R4;
+    UInt32 R5;
+    UInt32 R6;
+    UInt32 R7;
+    UInt32 R8;
+    UInt32 R9;
+    UInt32 R10;
+    UInt32 R11;
+    UInt32 R12;
+    UInt32 Sp; // R13
+    UInt32 Lr; // R14
+    UInt32 Pc; // R15
+    UInt32 Cpsr;
+    UInt32 Fpscr;
+    UInt32 Padding;
+    union {
+        Fp128  Q[16];
+        UInt64 D[32];
+        UInt32 S[32];
+    } DUMMYUNIONNAME;
+    UInt32 Bvr[ARM_MAX_BREAKPOINTS];
+    UInt32 Bcr[ARM_MAX_BREAKPOINTS];
+    UInt32 Wvr[ARM_MAX_WATCHPOINTS];
+    UInt32 Wcr[ARM_MAX_WATCHPOINTS];
+    UInt32 Padding2[2];
+
+    void SetIp(UIntNative ip) { Pc = ip; }
+    void SetArg0Reg(UIntNative val) { R0 = val; }
+    void SetArg1Reg(UIntNative val) { R1 = val; }
+    UIntNative GetIp() { return Pc; }
+    UIntNative GetLr() { return Lr; }
+} CONTEXT, *PCONTEXT;
+
+#elif defined(HOST_X86)
+#define SIZE_OF_80387_REGISTERS      80
+#define MAXIMUM_SUPPORTED_EXTENSION  512
+
+typedef struct _FLOATING_SAVE_AREA {
+    UInt32 ControlWord;
+    UInt32 StatusWord;
+    UInt32 TagWord;
+    UInt32 ErrorOffset;
+    UInt32 ErrorSelector;
+    UInt32 DataOffset;
+    UInt32 DataSelector;
+    UInt8  RegisterArea[SIZE_OF_80387_REGISTERS];
+    UInt32 Cr0NpxState;
+} FLOATING_SAVE_AREA;
+
+#include "pshpack4.h"
+typedef struct _CONTEXT {
+    UInt32 ContextFlags;
+    UInt32 Dr0;
+    UInt32 Dr1;
+    UInt32 Dr2;
+    UInt32 Dr3;
+    UInt32 Dr6;
+    UInt32 Dr7;
+    FLOATING_SAVE_AREA FloatSave;
+    UInt32 SegGs;
+    UInt32 SegFs;
+    UInt32 SegEs;
+    UInt32 SegDs;
+    UInt32 Edi;
+    UInt32 Esi;
+    UInt32 Ebx;
+    UInt32 Edx;
+    UInt32 Ecx;
+    UInt32 Eax;
+    UInt32 Ebp;
+    UInt32 Eip;
+    UInt32 SegCs;
+    UInt32 EFlags;
+    UInt32 Esp;
+    UInt32 SegSs;
+    UInt8  ExtendedRegisters[MAXIMUM_SUPPORTED_EXTENSION];
+
+    void SetIp(UIntNative ip) { Eip = ip; }
+    void SetSp(UIntNative sp) { Esp = sp; }
+    void SetArg0Reg(UIntNative val) { Ecx = val; }
+    void SetArg1Reg(UIntNative val) { Edx = val; }
+    UIntNative GetIp() { return Eip; }
+    UIntNative GetSp() { return Esp; }
+} CONTEXT, *PCONTEXT;
+#include "poppack.h"
+
+#elif defined(HOST_ARM64)
+
+// Specify the number of breakpoints and watchpoints that the OS
+// will track. Architecturally, ARM64 supports up to 16. In practice,
+// however, almost no one implements more than 4 of each.
+
+#define ARM64_MAX_BREAKPOINTS     8
+#define ARM64_MAX_WATCHPOINTS     2
+
+typedef struct _NEON128 {
+    UInt64 Low;
+    Int64 High;
+} NEON128, *PNEON128;
+
+typedef struct DECLSPEC_ALIGN(16) _CONTEXT {
+    //
+    // Control flags.
+    //
+    UInt32 ContextFlags;
+
+    //
+    // Integer registers
+    //
+    UInt32 Cpsr;       // NZVF + DAIF + CurrentEL + SPSel
+    union {
+        struct {
+            UInt64 X0;
+            UInt64 X1;
+            UInt64 X2;
+            UInt64 X3;
+            UInt64 X4;
+            UInt64 X5;
+            UInt64 X6;
+            UInt64 X7;
+            UInt64 X8;
+            UInt64 X9;
+            UInt64 X10;
+            UInt64 X11;
+            UInt64 X12;
+            UInt64 X13;
+            UInt64 X14;
+            UInt64 X15;
+            UInt64 X16;
+            UInt64 X17;
+            UInt64 X18;
+            UInt64 X19;
+            UInt64 X20;
+            UInt64 X21;
+            UInt64 X22;
+            UInt64 X23;
+            UInt64 X24;
+            UInt64 X25;
+            UInt64 X26;
+            UInt64 X27;
+            UInt64 X28;
+#pragma warning(push)
+#pragma warning(disable:4201) // nameless struct
+        };
+        UInt64 X[29];
+    };
+#pragma warning(pop)
+    UInt64 Fp; // X29
+    UInt64 Lr; // X30
+    UInt64 Sp;
+    UInt64 Pc;
+
+    //
+    // Floating Point/NEON Registers
+    //
+    NEON128 V[32];
+    UInt32 Fpcr;
+    UInt32 Fpsr;
+
+    //
+    // Debug registers
+    //
+    UInt32 Bcr[ARM64_MAX_BREAKPOINTS];
+    UInt64 Bvr[ARM64_MAX_BREAKPOINTS];
+    UInt32 Wcr[ARM64_MAX_WATCHPOINTS];
+    UInt64 Wvr[ARM64_MAX_WATCHPOINTS];
+
+    void SetIp(UIntNative ip) { Pc = ip; }
+    void SetArg0Reg(UIntNative val) { X0 = val; }
+    void SetArg1Reg(UIntNative val) { X1 = val; }
+    UIntNative GetIp() { return Pc; }
+    UIntNative GetLr() { return Lr; }
+} CONTEXT, *PCONTEXT;
+
+#elif defined(HOST_WASM)
+
+typedef struct DECLSPEC_ALIGN(8) _CONTEXT {
+    // TODO: Figure out if WebAssembly has a meaningful context available
+    void SetIp(UIntNative ip) {  }
+    void SetArg0Reg(UIntNative val) {  }
+    void SetArg1Reg(UIntNative val) {  }
+    UIntNative GetIp() { return 0; }
+} CONTEXT, *PCONTEXT;
+#endif 
+
+#define EXCEPTION_MAXIMUM_PARAMETERS 15 // maximum number of exception parameters
+
+typedef struct _EXCEPTION_RECORD32 {
+    UInt32      ExceptionCode;
+    UInt32      ExceptionFlags;
+    UIntNative  ExceptionRecord;
+    UIntNative  ExceptionAddress;
+    UInt32      NumberParameters;
+    UIntNative  ExceptionInformation[EXCEPTION_MAXIMUM_PARAMETERS];
+} EXCEPTION_RECORD, *PEXCEPTION_RECORD;
+
+typedef struct _EXCEPTION_POINTERS {
+    PEXCEPTION_RECORD   ExceptionRecord;
+    PCONTEXT            ContextRecord;
+} EXCEPTION_POINTERS, *PEXCEPTION_POINTERS;
+
+typedef Int32 (__stdcall *PVECTORED_EXCEPTION_HANDLER)(
+    PEXCEPTION_POINTERS ExceptionInfo
+    );
+
+#define EXCEPTION_CONTINUE_EXECUTION (-1)
+#define EXCEPTION_CONTINUE_SEARCH (0)
+#define EXCEPTION_EXECUTE_HANDLER (1)
+
+typedef enum _EXCEPTION_DISPOSITION {
+    ExceptionContinueExecution,
+    ExceptionContinueSearch,
+    ExceptionNestedException,
+    ExceptionCollidedUnwind
+} EXCEPTION_DISPOSITION;
+
+#define STATUS_ACCESS_VIOLATION                     ((UInt32   )0xC0000005L)
+#define STATUS_STACK_OVERFLOW                       ((UInt32   )0xC00000FDL)
+#define STATUS_REDHAWK_NULL_REFERENCE               ((UInt32   )0x00000000L)
+#define STATUS_REDHAWK_WRITE_BARRIER_NULL_REFERENCE ((UInt32   )0x00000042L)
+
+#ifdef TARGET_UNIX
+#define NULL_AREA_SIZE                   (4*1024)
+#else
+#define NULL_AREA_SIZE                   (64*1024)
+#endif
+
+//#endif // !DACCESS_COMPILE
+#endif // !_INC_WINDOWS
+
+
+
+#ifndef DACCESS_COMPILE 
+#ifndef _INC_WINDOWS
+
+typedef UInt32 (WINAPI *PTHREAD_START_ROUTINE)(_In_opt_ void* lpThreadParameter);
+typedef IntNative (WINAPI *FARPROC)();
+
+#ifndef __GCENV_BASE_INCLUDED__
+#define TRUE                    1
+#define FALSE                   0
+#endif // !__GCENV_BASE_INCLUDED__
+
+#define INVALID_HANDLE_VALUE    ((HANDLE)(IntNative)-1)
+
+#define DLL_PROCESS_ATTACH      1
+#define DLL_THREAD_ATTACH       2
+#define DLL_THREAD_DETACH       3
+#define DLL_PROCESS_DETACH      0
+#define DLL_PROCESS_VERIFIER    4
+
+#define INFINITE                0xFFFFFFFF
+
+#define DUPLICATE_CLOSE_SOURCE  0x00000001
+#define DUPLICATE_SAME_ACCESS   0x00000002
+
+#define GENERIC_READ            0x80000000
+#define GENERIC_WRITE           0x40000000
+#define GENERIC_EXECUTE         0x20000000
+#define GENERIC_ALL             0x10000000
+
+#define FILE_SHARE_READ         0x00000001
+#define FILE_SHARE_WRITE        0x00000002
+#define FILE_SHARE_DELETE       0x00000004
+
+#define FILE_ATTRIBUTE_READONLY             0x00000001
+#define FILE_ATTRIBUTE_HIDDEN               0x00000002
+#define FILE_ATTRIBUTE_SYSTEM               0x00000004
+#define FILE_ATTRIBUTE_DIRECTORY            0x00000010
+#define FILE_ATTRIBUTE_ARCHIVE              0x00000020
+#define FILE_ATTRIBUTE_DEVICE               0x00000040
+#define FILE_ATTRIBUTE_NORMAL               0x00000080
+#define FILE_ATTRIBUTE_TEMPORARY            0x00000100
+#define FILE_ATTRIBUTE_SPARSE_FILE          0x00000200
+#define FILE_ATTRIBUTE_REPARSE_POINT        0x00000400
+#define FILE_ATTRIBUTE_COMPRESSED           0x00000800
+#define FILE_ATTRIBUTE_OFFLINE              0x00001000
+#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED  0x00002000
+#define FILE_ATTRIBUTE_ENCRYPTED            0x00004000
+
+#define CREATE_NEW              1
+#define CREATE_ALWAYS           2
+#define OPEN_EXISTING           3
+#define OPEN_ALWAYS             4
+#define TRUNCATE_EXISTING       5
+
+#define FILE_BEGIN              0
+#define FILE_CURRENT            1
+#define FILE_END                2
+
+#define PAGE_NOACCESS           0x01
+#define PAGE_READONLY           0x02
+#define PAGE_READWRITE          0x04
+#define PAGE_WRITECOPY          0x08
+#define PAGE_EXECUTE            0x10
+#define PAGE_EXECUTE_READ       0x20
+#define PAGE_EXECUTE_READWRITE  0x40
+#define PAGE_EXECUTE_WRITECOPY  0x80
+#define PAGE_GUARD              0x100
+#define PAGE_NOCACHE            0x200
+#define PAGE_WRITECOMBINE       0x400
+#define MEM_COMMIT              0x1000
+#define MEM_RESERVE             0x2000
+#define MEM_DECOMMIT            0x4000
+#define MEM_RELEASE             0x8000
+#define MEM_FREE                0x10000
+#define MEM_PRIVATE             0x20000
+#define MEM_MAPPED              0x40000
+#define MEM_RESET               0x80000
+#define MEM_TOP_DOWN            0x100000
+#define MEM_WRITE_WATCH         0x200000
+#define MEM_PHYSICAL            0x400000
+#define MEM_LARGE_PAGES         0x20000000
+#define MEM_4MB_PAGES           0x80000000
+
+#define WAIT_OBJECT_0           0
+#define WAIT_TIMEOUT            258
+#define WAIT_FAILED             0xFFFFFFFF
+
+#define CREATE_SUSPENDED        0x00000004
+#define THREAD_PRIORITY_NORMAL  0
+#define THREAD_PRIORITY_HIGHEST 2
+
+#define NOERROR                 0x0
+
+#define SUSPENDTHREAD_FAILED    0xFFFFFFFF
+#define RESUMETHREAD_FAILED     0xFFFFFFFF
+
+#define ERROR_INSUFFICIENT_BUFFER 122
+#define ERROR_TIMEOUT             1460
+#define ERROR_ALREADY_EXISTS      183
+
+#define GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT    0x00000002
+#define GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS          0x00000004
+
+#endif // !_INC_WINDOWS
+#endif // !DACCESS_COMPILE
+
+typedef UInt64 REGHANDLE;
+typedef UInt64 TRACEHANDLE;
+
+#ifndef _EVNTPROV_H_
+struct EVENT_DATA_DESCRIPTOR
+{
+    UInt64  Ptr;
+    UInt32  Size;
+    UInt32  Reserved;
+};
+
+struct EVENT_DESCRIPTOR
+{
+    UInt16  Id;
+    UInt8   Version;
+    UInt8   Channel;
+    UInt8   Level;
+    UInt8   Opcode;
+    UInt16  Task;
+    UInt64  Keyword;
+
+};
+
+struct EVENT_FILTER_DESCRIPTOR
+{
+    UInt64  Ptr;
+    UInt32  Size;
+    UInt32  Type;
+};
+
+__forceinline
+void
+EventDataDescCreate(_Out_ EVENT_DATA_DESCRIPTOR * EventDataDescriptor, _In_opt_ const void * DataPtr, UInt32 DataSize)
+{
+    EventDataDescriptor->Ptr = (UInt64)DataPtr;
+    EventDataDescriptor->Size = DataSize;
+    EventDataDescriptor->Reserved = 0;
+}
+#endif // _EVNTPROV_H_
+
+extern GCSystemInfo g_RhSystemInfo;
+
+#ifdef TARGET_UNIX
+#define REDHAWK_PALIMPORT extern "C"
+#define REDHAWK_PALEXPORT extern "C"
+#define REDHAWK_PALAPI
+#else
+#define REDHAWK_PALIMPORT EXTERN_C
+#define REDHAWK_PALAPI __stdcall
+#endif // TARGET_UNIX
+
+bool InitializeSystemInfo();
+
+#ifndef DACCESS_COMPILE
+
+#ifdef _DEBUG
+#define CaptureStackBackTrace RtlCaptureStackBackTrace
+#endif
+
+#ifndef _INC_WINDOWS
+// Include the list of external functions we wish to access. If we do our job 100% then it will be
+// possible to link without any direct reference to any Win32 library.
+#include "PalRedhawkFunctions.h"
+#endif // !_INC_WINDOWS
+#endif // !DACCESS_COMPILE
+
+// The Redhawk PAL must be initialized before any of its exports can be called. Returns true for a successful
+// initialization and false on failure.
+REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalInit();
+
+// Given a mask of capabilities return true if all of them are supported by the current PAL.
+REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalHasCapability(PalCapability capability);
+
+// Given the OS handle of a loaded module, compute the upper and lower virtual address bounds (inclusive).
+REDHAWK_PALIMPORT void REDHAWK_PALAPI PalGetModuleBounds(HANDLE hOsHandle, _Out_ UInt8 ** ppLowerBound, _Out_ UInt8 ** ppUpperBound);
+
+typedef struct _GUID GUID;
+REDHAWK_PALIMPORT void REDHAWK_PALAPI PalGetPDBInfo(HANDLE hOsHandle, _Out_ GUID * pGuidSignature, _Out_ UInt32 * pdwAge, _Out_writes_z_(cchPath) WCHAR * wszPath, Int32 cchPath);
+
+#ifndef APP_LOCAL_RUNTIME
+REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalGetThreadContext(HANDLE hThread, _Out_ PAL_LIMITED_CONTEXT * pCtx);
+#endif
+
+REDHAWK_PALIMPORT Int32 REDHAWK_PALAPI PalGetProcessCpuCount();
+
+REDHAWK_PALIMPORT UInt32 REDHAWK_PALAPI PalReadFileContents(_In_z_ const TCHAR *, _Out_writes_all_(maxBytesToRead) char * buff, _In_ UInt32 maxBytesToRead);
+
+// Retrieves the entire range of memory dedicated to the calling thread's stack.  This does
+// not get the current dynamic bounds of the stack, which can be significantly smaller than 
+// the maximum bounds.
+REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalGetMaximumStackBounds(_Out_ void** ppStackLowOut, _Out_ void** ppStackHighOut);
+
+// Return value:  number of characters in name string
+REDHAWK_PALIMPORT Int32 PalGetModuleFileName(_Out_ const TCHAR** pModuleNameOut, HANDLE moduleBase);
+
+#if _WIN32
+
+// Various intrinsic declarations needed for the PalGetCurrentTEB implementation below.
+#if defined(HOST_X86)
+EXTERN_C unsigned long __readfsdword(unsigned long Offset);
+#pragma intrinsic(__readfsdword)
+#elif defined(HOST_AMD64)
+EXTERN_C unsigned __int64  __readgsqword(unsigned long Offset);
+#pragma intrinsic(__readgsqword)
+#elif defined(HOST_ARM)
+EXTERN_C unsigned int _MoveFromCoprocessor(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int);
+#pragma intrinsic(_MoveFromCoprocessor)
+#elif defined(HOST_ARM64)
+EXTERN_C unsigned __int64 __getReg(int);
+#pragma intrinsic(__getReg)
+#else
+#error Unsupported architecture
+#endif
+
+// Retrieves the OS TEB for the current thread.
+inline UInt8 * PalNtCurrentTeb()
+{
+#if defined(HOST_X86)
+    return (UInt8*)__readfsdword(0x18); 
+#elif defined(HOST_AMD64)
+    return (UInt8*)__readgsqword(0x30);
+#elif defined(HOST_ARM)
+    return (UInt8*)_MoveFromCoprocessor(15, 0, 13,  0, 2);
+#elif defined(HOST_ARM64)
+    return (UInt8*)__getReg(18);
+#else
+#error Unsupported architecture
+#endif
+}
+
+// Offsets of ThreadLocalStoragePointer in the TEB.
+#if defined(HOST_64BIT)
+#define OFFSETOF__TEB__ThreadLocalStoragePointer 0x58
+#else
+#define OFFSETOF__TEB__ThreadLocalStoragePointer 0x2c
+#endif
+
+#else // _WIN32
+
+inline UInt8 * PalNtCurrentTeb()
+{
+    // UNIXTODO: Implement PalNtCurrentTeb
+    return NULL;
+}
+
+#define OFFSETOF__TEB__ThreadLocalStoragePointer 0
+
+#endif // _WIN32
+
+//
+// Compiler intrinsic definitions. In the interest of performance the PAL doesn't provide exports of these
+// (that would defeat the purpose of having an intrinsic in the first place). Instead we place the necessary
+// compiler linkage directly inline in this header. As a result this section may have platform specific
+// conditional compilation (upto and including defining an export of functionality that isn't a supported
+// intrinsic on that platform).
+//
+
+EXTERN_C void * __cdecl _alloca(size_t);
+#pragma intrinsic(_alloca)
+
+REDHAWK_PALIMPORT _Ret_maybenull_ _Post_writable_byte_size_(size) void* REDHAWK_PALAPI PalVirtualAlloc(_In_opt_ void* pAddress, UIntNative size, UInt32 allocationType, UInt32 protect);
+REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualFree(_In_ void* pAddress, UIntNative size, UInt32 freeType);
+REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualProtect(_In_ void* pAddress, UIntNative size, UInt32 protect);
+REDHAWK_PALIMPORT void REDHAWK_PALAPI PalSleep(UInt32 milliseconds);
+REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalSwitchToThread();
+REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ LPCWSTR pName);
+REDHAWK_PALIMPORT UInt32 REDHAWK_PALAPI PalGetTickCount();
+REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalCreateFileW(_In_z_ LPCWSTR pFileName, uint32_t desiredAccess, uint32_t shareMode, _In_opt_ void* pSecurityAttributes, uint32_t creationDisposition, uint32_t flagsAndAttributes, HANDLE hTemplateFile);
+REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalCreateLowMemoryNotification();
+REDHAWK_PALIMPORT void REDHAWK_PALAPI PalTerminateCurrentProcess(UInt32 exitCode);
+REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer);
+
+#ifndef APP_LOCAL_RUNTIME
+
+#ifdef TARGET_UNIX
+REDHAWK_PALIMPORT void REDHAWK_PALAPI PalSetHardwareExceptionHandler(PHARDWARE_EXCEPTION_HANDLER handler);
+#else
+REDHAWK_PALIMPORT void* REDHAWK_PALAPI PalAddVectoredExceptionHandler(UInt32 firstHandler, _In_ PVECTORED_EXCEPTION_HANDLER vectoredHandler);
+#endif
+
+#endif
+
+
+typedef UInt32 (__stdcall *BackgroundCallback)(_In_opt_ void* pCallbackContext);
+REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext);
+REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext);
+
+typedef UInt32_BOOL (*PalHijackCallback)(HANDLE hThread, _In_ PAL_LIMITED_CONTEXT* pThreadContext, _In_opt_ void* pCallbackContext);
+REDHAWK_PALIMPORT UInt32 REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_ PalHijackCallback callback, _In_opt_ void* pCallbackContext);
+
+#ifdef FEATURE_ETW
+REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalEventEnabled(REGHANDLE regHandle, _In_ const EVENT_DESCRIPTOR* eventDescriptor);
+#endif
+
+REDHAWK_PALIMPORT _Ret_maybenull_ void* REDHAWK_PALAPI PalSetWerDataBuffer(_In_ void* pNewBuffer);
+
+REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(_In_ HANDLE hTemplateModule, UInt32 templateRva, size_t templateSize, _Outptr_result_bytebuffer_(templateSize) void** newThunksOut);
+REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalFreeThunksFromTemplate(_In_ void *pBaseAddress);
+
+REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalMarkThunksAsValidCallTargets(
+    void *virtualAddress, 
+    int thunkSize,
+    int thunksPerBlock,
+    int thunkBlockSize,
+    int thunkBlocksPerMapping);
+
+REDHAWK_PALIMPORT UInt32 REDHAWK_PALAPI PalCompatibleWaitAny(UInt32_BOOL alertable, UInt32 timeout, UInt32 count, HANDLE* pHandles, UInt32_BOOL allowReentrantWait);
+
+REDHAWK_PALIMPORT void REDHAWK_PALAPI PalAttachThread(void* thread);
+REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalDetachThread(void* thread);
+
+REDHAWK_PALIMPORT UInt64 PalGetCurrentThreadIdForLogging();
+
+REDHAWK_PALIMPORT void PalPrintFatalError(const char* message);
+
+#ifdef TARGET_UNIX
+REDHAWK_PALIMPORT Int32 __cdecl _stricmp(const char *string1, const char *string2);
+#endif // TARGET_UNIX
+
+#ifdef UNICODE
+#define _tcsicmp _wcsicmp
+#else
+#define _tcsicmp _stricmp
+#endif
+
+#if defined(HOST_X86) || defined(HOST_AMD64)
+REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI getcpuid(uint32_t arg1, unsigned char result[16]);
+REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI getextcpuid(uint32_t arg1, uint32_t arg2, unsigned char result[16]);
+REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport();
+REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvxEnabled();
+#endif // defined(HOST_X86) || defined(HOST_AMD64)
+
+#include "PalRedhawkInline.h"
+
+#endif // !PAL_REDHAWK_INCLUDED
diff --git a/src/coreclr/src/nativeaot/Runtime/PalRedhawkCommon.h b/src/coreclr/src/nativeaot/Runtime/PalRedhawkCommon.h
new file mode 100644
index 0000000000000..d1933865af2a8
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/PalRedhawkCommon.h
@@ -0,0 +1,172 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Provide common definitions between the Redhawk and the Redhawk PAL implementation. This header file is used
+// (rather than PalRedhawk.h) since the PAL implementation is built in a different environment than Redhawk
+// code. For instance both environments may provide a definition of various common macros such as NULL.
+//
+// This header contains only environment neutral definitions (i.e. using only base C++ types and compositions
+// of those types) and can thus be included from either environment without issue.
+//
+
+#ifndef __PAL_REDHAWK_COMMON_INCLUDED
+#define __PAL_REDHAWK_COMMON_INCLUDED
+
+#include "rhassert.h"
+
+// We define the notion of capabilities: optional functionality that the PAL may expose. Use
+// PalHasCapability() with the constants below to determine what is supported at runtime.
+enum PalCapability
+{
+    WriteWatchCapability                = 0x00000001,   // GetWriteWatch() and friends
+    LowMemoryNotificationCapability     = 0x00000002,   // CreateMemoryResourceNotification() and friends
+};
+
+#ifndef DECLSPEC_ALIGN
+#ifdef _MSC_VER
+#define DECLSPEC_ALIGN(x)   __declspec(align(x))
+#else
+#define DECLSPEC_ALIGN(x)   __attribute__((aligned(x)))
+#endif
+#endif // DECLSPEC_ALIGN
+
+#ifdef HOST_AMD64
+#define AMD64_ALIGN_16 DECLSPEC_ALIGN(16)
+#else // HOST_AMD64
+#define AMD64_ALIGN_16
+#endif // HOST_AMD64
+
+struct AMD64_ALIGN_16 Fp128 {
+    UInt64 Low;
+    Int64 High;
+};
+
+
+struct PAL_LIMITED_CONTEXT
+{
+    // Includes special registers, callee saved registers and general purpose registers used to return values from functions (not floating point return registers)
+#ifdef TARGET_ARM
+    UIntNative  R0;
+    UIntNative  R4;
+    UIntNative  R5;
+    UIntNative  R6;
+    UIntNative  R7;
+    UIntNative  R8;
+    UIntNative  R9;
+    UIntNative  R10;
+    UIntNative  R11;
+
+    UIntNative  IP;
+    UIntNative  SP;
+    UIntNative  LR;
+
+    UInt64      D[16-8]; // D8 .. D15 registers (D16 .. D31 are volatile according to the ABI spec)
+
+    UIntNative GetIp() const { return IP; }
+    UIntNative GetSp() const { return SP; }
+    UIntNative GetFp() const { return R7; }
+    UIntNative GetLr() const { return LR; }
+    void SetIp(UIntNative ip) { IP = ip; }
+    void SetSp(UIntNative sp) { SP = sp; }
+#elif defined(TARGET_ARM64)
+    UIntNative  FP;
+    UIntNative  LR;
+
+    UIntNative  X0;
+    UIntNative  X1;
+    UIntNative  X19;
+    UIntNative  X20;
+    UIntNative  X21;
+    UIntNative  X22;
+    UIntNative  X23;
+    UIntNative  X24;
+    UIntNative  X25;
+    UIntNative  X26;
+    UIntNative  X27;
+    UIntNative  X28;
+
+    UIntNative  SP;
+    UIntNative  IP;
+
+    UInt64      D[16 - 8];  // Only the bottom 64-bit value of the V registers V8..V15 needs to be preserved
+                            // (V0-V7 and V16-V31 are not preserved according to the ABI spec).
+
+
+    UIntNative GetIp() const { return IP; }
+    UIntNative GetSp() const { return SP; }
+    UIntNative GetFp() const { return FP; }
+    UIntNative GetLr() const { return LR; }
+    void SetIp(UIntNative ip) { IP = ip; }
+    void SetSp(UIntNative sp) { SP = sp; }
+#elif defined(UNIX_AMD64_ABI)
+    // Param regs: rdi, rsi, rdx, rcx, r8, r9, scratch: rax, rdx (both return val), preserved: rbp, rbx, r12-r15
+    UIntNative  IP;
+    UIntNative  Rsp;
+    UIntNative  Rbp;
+    UIntNative  Rax;
+    UIntNative  Rbx;
+    UIntNative  Rdx;
+    UIntNative  R12;
+    UIntNative  R13;
+    UIntNative  R14;
+    UIntNative  R15;
+
+    UIntNative GetIp() const { return IP; }
+    UIntNative GetSp() const { return Rsp; }
+    void SetIp(UIntNative ip) { IP = ip; }
+    void SetSp(UIntNative sp) { Rsp = sp; }
+    UIntNative GetFp() const { return Rbp; }
+#elif defined(TARGET_X86) || defined(TARGET_AMD64)
+    UIntNative  IP;
+    UIntNative  Rsp;
+    UIntNative  Rbp;
+    UIntNative  Rdi;
+    UIntNative  Rsi;
+    UIntNative  Rax;
+    UIntNative  Rbx;
+#ifdef TARGET_AMD64
+    UIntNative  R12;
+    UIntNative  R13;
+    UIntNative  R14;
+    UIntNative  R15;
+    UIntNative  __explicit_padding__;
+    Fp128       Xmm6;
+    Fp128       Xmm7;
+    Fp128       Xmm8;
+    Fp128       Xmm9;
+    Fp128       Xmm10;
+    Fp128       Xmm11;
+    Fp128       Xmm12;
+    Fp128       Xmm13;
+    Fp128       Xmm14;
+    Fp128       Xmm15;
+#endif // TARGET_AMD64
+
+    UIntNative GetIp() const { return IP; }
+    UIntNative GetSp() const { return Rsp; }
+    UIntNative GetFp() const { return Rbp; }
+    void SetIp(UIntNative ip) { IP = ip; }
+    void SetSp(UIntNative sp) { Rsp = sp; }
+#else // TARGET_ARM
+    UIntNative  IP;
+
+    UIntNative GetIp() const { PORTABILITY_ASSERT("GetIp");  return 0; }
+    UIntNative GetSp() const { PORTABILITY_ASSERT("GetSp"); return 0; }
+    UIntNative GetFp() const { PORTABILITY_ASSERT("GetFp"); return 0; }
+    void SetIp(UIntNative ip) { PORTABILITY_ASSERT("SetIp"); }
+    void SetSp(UIntNative sp) { PORTABILITY_ASSERT("GetSp"); }
+#endif // TARGET_ARM
+};
+
+void RuntimeThreadShutdown(void* thread);
+
+#ifdef TARGET_UNIX
+typedef void (__fastcall * ThreadExitCallback)();
+
+extern ThreadExitCallback g_threadExitCallback;
+
+typedef Int32 (*PHARDWARE_EXCEPTION_HANDLER)(UIntNative faultCode, UIntNative faultAddress, PAL_LIMITED_CONTEXT* palContext, UIntNative* arg0Reg, UIntNative* arg1Reg);
+#endif
+
+#endif // __PAL_REDHAWK_COMMON_INCLUDED
diff --git a/src/coreclr/src/nativeaot/Runtime/PalRedhawkFunctions.h b/src/coreclr/src/nativeaot/Runtime/PalRedhawkFunctions.h
new file mode 100644
index 0000000000000..bf12d74f5e3fc
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/PalRedhawkFunctions.h
@@ -0,0 +1,186 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+extern "C" UInt16 __stdcall CaptureStackBackTrace(UInt32, UInt32, void*, UInt32*);
+inline UInt16 PalCaptureStackBackTrace(UInt32 arg1, UInt32 arg2, void* arg3, UInt32* arg4)
+{
+    return CaptureStackBackTrace(arg1, arg2, arg3, arg4);
+}
+
+extern "C" UInt32_BOOL __stdcall CloseHandle(HANDLE);
+inline UInt32_BOOL PalCloseHandle(HANDLE arg1)
+{
+    return CloseHandle(arg1);
+}
+
+extern "C" UInt32_BOOL __stdcall CreateDirectoryW(LPCWSTR, LPSECURITY_ATTRIBUTES);
+inline UInt32_BOOL PalCreateDirectoryW(LPCWSTR arg1, LPSECURITY_ATTRIBUTES arg2)
+{
+    return CreateDirectoryW(arg1, arg2);
+}
+
+extern "C" void __stdcall DeleteCriticalSection(CRITICAL_SECTION *);
+inline void PalDeleteCriticalSection(CRITICAL_SECTION * arg1)
+{
+    DeleteCriticalSection(arg1);
+}
+
+extern "C" UInt32_BOOL __stdcall DuplicateHandle(HANDLE, HANDLE, HANDLE, HANDLE *, UInt32, UInt32_BOOL, UInt32);
+inline UInt32_BOOL PalDuplicateHandle(HANDLE arg1, HANDLE arg2, HANDLE arg3, HANDLE * arg4, UInt32 arg5, UInt32_BOOL arg6, UInt32 arg7)
+{
+    return DuplicateHandle(arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+}
+
+extern "C" void __stdcall EnterCriticalSection(CRITICAL_SECTION *);
+inline void PalEnterCriticalSection(CRITICAL_SECTION * arg1)
+{
+    EnterCriticalSection(arg1);
+}
+
+extern "C" UInt32 __stdcall EventRegister(const GUID *, void *, void *, REGHANDLE *);
+inline UInt32 PalEventRegister(const GUID * arg1, void * arg2, void * arg3, REGHANDLE * arg4)
+{
+    return EventRegister(arg1, arg2, arg3, arg4);
+}
+
+extern "C" UInt32 __stdcall EventUnregister(REGHANDLE);
+inline UInt32 PalEventUnregister(REGHANDLE arg1)
+{
+    return EventUnregister(arg1);
+}
+
+extern "C" UInt32 __stdcall EventWrite(REGHANDLE, const EVENT_DESCRIPTOR *, UInt32, EVENT_DATA_DESCRIPTOR *);
+inline UInt32 PalEventWrite(REGHANDLE arg1, const EVENT_DESCRIPTOR * arg2, UInt32 arg3, EVENT_DATA_DESCRIPTOR * arg4)
+{
+    return EventWrite(arg1, arg2, arg3, arg4);
+}
+
+extern "C" void __stdcall FlushProcessWriteBuffers();
+inline void PalFlushProcessWriteBuffers()
+{
+    FlushProcessWriteBuffers();
+}
+
+extern "C" HANDLE __stdcall GetCurrentProcess();
+inline HANDLE PalGetCurrentProcess()
+{
+    return GetCurrentProcess();
+}
+
+extern "C" UInt32 __stdcall GetCurrentProcessId();
+inline UInt32 PalGetCurrentProcessId()
+{
+    return GetCurrentProcessId();
+}
+
+extern "C" HANDLE __stdcall GetCurrentThread();
+inline HANDLE PalGetCurrentThread()
+{
+    return GetCurrentThread();
+}
+
+#ifdef UNICODE
+extern "C" UInt32 __stdcall GetEnvironmentVariableW(__in_z_opt LPCWSTR, __out_z_opt LPWSTR, UInt32);
+inline UInt32 PalGetEnvironmentVariable(__in_z_opt LPCWSTR arg1, __out_z_opt LPWSTR arg2, UInt32 arg3)
+{
+    return GetEnvironmentVariableW(arg1, arg2, arg3);
+}
+#else
+extern "C" UInt32 __stdcall GetEnvironmentVariableA(__in_z_opt LPCSTR, __out_z_opt LPSTR, UInt32);
+inline UInt32 PalGetEnvironmentVariable(__in_z_opt LPCSTR arg1, __out_z_opt LPSTR arg2, UInt32 arg3)
+{
+    return GetEnvironmentVariableA(arg1, arg2, arg3);
+}
+#endif
+
+extern "C" void * __stdcall GetProcAddress(HANDLE, const char *);
+inline void * PalGetProcAddress(HANDLE arg1, const char * arg2)
+{
+    return GetProcAddress(arg1, arg2);
+}
+
+extern "C" UInt32_BOOL __stdcall InitializeCriticalSectionEx(CRITICAL_SECTION *, UInt32, UInt32);
+inline UInt32_BOOL PalInitializeCriticalSectionEx(CRITICAL_SECTION * arg1, UInt32 arg2, UInt32 arg3)
+{
+    return InitializeCriticalSectionEx(arg1, arg2, arg3);
+}
+
+extern "C" UInt32_BOOL __stdcall IsDebuggerPresent();
+inline UInt32_BOOL PalIsDebuggerPresent()
+{
+    return IsDebuggerPresent();
+}
+
+extern "C" void __stdcall LeaveCriticalSection(CRITICAL_SECTION *);
+inline void PalLeaveCriticalSection(CRITICAL_SECTION * arg1)
+{
+    LeaveCriticalSection(arg1);
+}
+
+extern "C" HANDLE __stdcall LoadLibraryExW(const WCHAR *, HANDLE, UInt32);
+inline HANDLE PalLoadLibraryExW(const WCHAR * arg1, HANDLE arg2, UInt32 arg3)
+{
+    return LoadLibraryExW(arg1, arg2, arg3);
+}
+
+extern "C" UInt32_BOOL __stdcall QueryPerformanceCounter(LARGE_INTEGER *);
+inline UInt32_BOOL PalQueryPerformanceCounter(LARGE_INTEGER * arg1)
+{
+    return QueryPerformanceCounter(arg1);
+}
+
+extern "C" UInt32_BOOL __stdcall QueryPerformanceFrequency(LARGE_INTEGER *);
+inline UInt32_BOOL PalQueryPerformanceFrequency(LARGE_INTEGER * arg1)
+{
+    return QueryPerformanceFrequency(arg1);
+}
+
+extern "C" void __stdcall RaiseException(UInt32, UInt32, UInt32, const UInt32 *);
+inline void PalRaiseException(UInt32 arg1, UInt32 arg2, UInt32 arg3, const UInt32 * arg4)
+{
+    RaiseException(arg1, arg2, arg3, arg4);
+}
+
+extern "C" UInt32_BOOL __stdcall ReleaseMutex(HANDLE);
+inline UInt32_BOOL PalReleaseMutex(HANDLE arg1)
+{
+    return ReleaseMutex(arg1);
+}
+
+extern "C" UInt32_BOOL __stdcall ResetEvent(HANDLE);
+inline UInt32_BOOL PalResetEvent(HANDLE arg1)
+{
+    return ResetEvent(arg1);
+}
+
+extern "C" UInt32_BOOL __stdcall SetEvent(HANDLE);
+inline UInt32_BOOL PalSetEvent(HANDLE arg1)
+{
+    return SetEvent(arg1);
+}
+
+extern "C" void __stdcall TerminateProcess(HANDLE, UInt32);
+inline void PalTerminateProcess(HANDLE arg1, UInt32 arg2)
+{
+    TerminateProcess(arg1, arg2);
+}
+
+extern "C" UInt32 __stdcall WaitForSingleObjectEx(HANDLE, UInt32, UInt32_BOOL);
+inline UInt32 PalWaitForSingleObjectEx(HANDLE arg1, UInt32 arg2, UInt32_BOOL arg3)
+{
+    return WaitForSingleObjectEx(arg1, arg2, arg3);
+}
+
+#ifdef PAL_REDHAWK_INCLUDED
+extern "C" void __stdcall GetSystemTimeAsFileTime(FILETIME *);
+inline void PalGetSystemTimeAsFileTime(FILETIME * arg1)
+{
+    GetSystemTimeAsFileTime(arg1);
+}
+
+extern "C" void __stdcall RaiseFailFastException(PEXCEPTION_RECORD, PCONTEXT, UInt32);
+inline void PalRaiseFailFastException(PEXCEPTION_RECORD arg1, PCONTEXT arg2, UInt32 arg3)
+{
+    RaiseFailFastException(arg1, arg2, arg3);
+}
+#endif 
diff --git a/src/coreclr/src/nativeaot/Runtime/RWLock.cpp b/src/coreclr/src/nativeaot/Runtime/RWLock.cpp
new file mode 100644
index 0000000000000..a678c5cedad4c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/RWLock.cpp
@@ -0,0 +1,267 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// RWLock.cpp -- adapted from CLR SimpleRWLock.cpp
+//
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "holder.h"
+#include "Crst.h"
+#include "event.h"
+#include "RWLock.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "RuntimeInstance.h"
+#include "yieldprocessornormalized.h"
+
+// Configurable constants used across our spin locks
+// Initialization here is necessary so that we have meaningful values before the runtime is started
+// These initial values were selected to match the defaults, but anything reasonable is close enough
+struct SpinConstants
+{
+    UInt32 uInitialDuration;
+    UInt32 uMaximumDuration;
+    UInt32 uBackoffFactor;
+    UInt32 uRepetitions;
+} g_SpinConstants = { 
+    50,        // dwInitialDuration 
+    40000,     // dwMaximumDuration - ideally (20000 * max(2, numProc))
+    3,         // dwBackoffFactor
+    10         // dwRepetitions
+};
+
+ReaderWriterLock::ReadHolder::ReadHolder(ReaderWriterLock * pLock, bool fAcquireLock) :
+    m_pLock(pLock)
+{
+#ifndef DACCESS_COMPILE
+    m_fLockAcquired = fAcquireLock;
+    if (fAcquireLock)
+        m_pLock->AcquireReadLock();
+#else
+    UNREFERENCED_PARAMETER(fAcquireLock);
+#endif // !DACCESS_COMPILE
+}
+
+ReaderWriterLock::ReadHolder::~ReadHolder()
+{
+#ifndef DACCESS_COMPILE
+    if (m_fLockAcquired)
+        m_pLock->ReleaseReadLock();
+#endif // !DACCESS_COMPILE
+}
+
+ReaderWriterLock::WriteHolder::WriteHolder(ReaderWriterLock * pLock, bool fAcquireLock) :
+    m_pLock(pLock)
+{
+#ifndef DACCESS_COMPILE
+    m_fLockAcquired = fAcquireLock;
+    if (fAcquireLock)
+        m_pLock->AcquireWriteLock();
+#else
+    UNREFERENCED_PARAMETER(fAcquireLock);
+#endif // !DACCESS_COMPILE
+}
+
+ReaderWriterLock::WriteHolder::~WriteHolder()
+{
+#ifndef DACCESS_COMPILE
+    if (m_fLockAcquired)
+        m_pLock->ReleaseWriteLock();
+#endif // !DACCESS_COMPILE
+}
+
+ReaderWriterLock::ReaderWriterLock(bool fBlockOnGc) :
+    m_RWLock(0)
+#if 0
+    , m_WriterWaiting(false)
+#endif
+{
+    m_spinCount = (
+#ifndef DACCESS_COMPILE
+        (PalGetProcessCpuCount() == 1) ? 0 : 
+#endif
+        4000);
+    m_fBlockOnGc = fBlockOnGc;
+}
+
+
+#ifndef DACCESS_COMPILE
+
+// Attempt to take the read lock, but do not wait if a writer has the lock.
+// Release the lock if successfully acquired.  Returns true if the lock was
+// taken and released.  Returns false if a writer had the lock.  
+//
+// BEWARE: Because this method returns after releasing the lock, you can't 
+// infer the state of the lock based on the return value.  This is currently
+// only used to detect if a suspended thread owns the write lock to prevent
+// deadlock with the Hijack logic during GC suspension.
+//
+bool ReaderWriterLock::DangerousTryPulseReadLock()
+{
+    if (TryAcquireReadLock())
+    {
+        ReleaseReadLock();
+        return true;
+    }
+    return false;
+}
+
+bool ReaderWriterLock::TryAcquireReadLock()
+{
+    Int32 RWLock;
+
+    do 
+    {
+        RWLock = m_RWLock;
+        if (RWLock == -1)
+            return false;
+        ASSERT(RWLock >= 0);
+    }
+    while (RWLock != PalInterlockedCompareExchange(&m_RWLock, RWLock+1, RWLock));
+
+    return true;
+}
+
+void ReaderWriterLock::AcquireReadLock()
+{
+    if (TryAcquireReadLock())
+        return;
+
+    AcquireReadLockWorker();
+}
+
+void ReaderWriterLock::AcquireReadLockWorker()
+{
+    UInt32 uSwitchCount = 0;
+
+    for (;;)
+    {
+#if 0
+        // @TODO: Validate that we never re-enter the reader lock from a thread that
+        // already holds it.  This scenario will deadlock if there are outstanding
+        // writers.
+
+        // prevent writers from being starved. This assumes that writers are rare and 
+        // dont hold the lock for a long time. 
+        while (m_WriterWaiting)
+        {
+            Int32 spinCount = m_spinCount;
+            while (spinCount > 0) {
+                spinCount--;
+                PalYieldProcessor();
+            }
+            __SwitchToThread(0, ++uSwitchCount);
+        }
+#endif
+
+        if (TryAcquireReadLock())
+            return;
+
+        UInt32 uDelay = g_SpinConstants.uInitialDuration;
+        do
+        {
+            if (TryAcquireReadLock())
+                return;
+
+            if (g_RhSystemInfo.dwNumberOfProcessors <= 1)
+                break;
+
+            // Delay by approximately 2*i clock cycles (Pentium III).
+            YieldProcessorNormalizedForPreSkylakeCount(uDelay);
+
+            // exponential backoff: wait a factor longer in the next iteration
+            uDelay *= g_SpinConstants.uBackoffFactor;
+        }
+        while (uDelay < g_SpinConstants.uMaximumDuration);
+
+        __SwitchToThread(0, ++uSwitchCount);
+    }
+}
+
+void ReaderWriterLock::ReleaseReadLock()
+{
+    Int32 RWLock;
+    RWLock = PalInterlockedDecrement(&m_RWLock);
+    ASSERT(RWLock >= 0);
+}
+
+
+bool ReaderWriterLock::TryAcquireWriteLock()
+{
+    Int32 RWLock = PalInterlockedCompareExchange(&m_RWLock, -1, 0);
+
+    ASSERT(RWLock >= 0 || RWLock == -1);
+    
+    if (RWLock)
+        return false;
+
+#if 0
+    m_WriterWaiting = false;
+#endif
+
+    return true;
+}
+
+void ReaderWriterLock::AcquireWriteLock()
+{
+    UInt32 uSwitchCount = 0;
+
+    for (;;)
+    {
+        if (TryAcquireWriteLock())
+            return;
+
+#if 0
+        // Set the writer waiting word, if not already set, to notify potential readers to wait.
+        m_WriterWaiting = true;
+#endif
+
+        UInt32 uDelay = g_SpinConstants.uInitialDuration;
+        do
+        {
+            if (TryAcquireWriteLock())
+                return;
+
+            // Do not spin if GC is in progress because the lock will not
+            // be released until GC is finished.
+            if (m_fBlockOnGc && ThreadStore::IsTrapThreadsRequested())
+            {
+                RedhawkGCInterface::WaitForGCCompletion();
+            }
+
+            if (g_RhSystemInfo.dwNumberOfProcessors <= 1)
+            {
+                break;
+            }
+
+            // Delay by approximately 2*i clock cycles (Pentium III).
+            YieldProcessorNormalizedForPreSkylakeCount(uDelay);
+
+            // exponential backoff: wait a factor longer in the next iteration
+            uDelay *= g_SpinConstants.uBackoffFactor;
+        }
+        while (uDelay < g_SpinConstants.uMaximumDuration);
+
+        __SwitchToThread(0, ++uSwitchCount);
+    }
+}
+
+void ReaderWriterLock::ReleaseWriteLock()
+{
+    Int32 RWLock;
+    RWLock = PalInterlockedExchange(&m_RWLock, 0);
+    ASSERT(RWLock == -1);
+}
+#endif // DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/RWLock.h b/src/coreclr/src/nativeaot/Runtime/RWLock.h
new file mode 100644
index 0000000000000..79459a86cf525
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/RWLock.h
@@ -0,0 +1,58 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef __RWLock_h__
+#define __RWLock_h__
+
+class ReaderWriterLock
+{
+    volatile Int32  m_RWLock;       // lock used for R/W synchronization
+    Int32           m_spinCount;    // spin count for a reader waiting for a writer to release the lock
+    bool            m_fBlockOnGc;   // True if the spinning writers should block when GC is in progress
+
+
+#if 0
+    // used to prevent writers from being starved by readers
+    // we currently do not prevent writers from starving readers since writers 
+    // are supposed to be rare.
+    bool            m_WriterWaiting;
+#endif
+
+    bool TryAcquireReadLock();
+    bool TryAcquireWriteLock();
+
+public:
+    class ReadHolder
+    {
+        ReaderWriterLock * m_pLock;
+        bool               m_fLockAcquired;
+    public:
+        ReadHolder(ReaderWriterLock * pLock, bool fAcquireLock = true);
+        ~ReadHolder();
+    };
+
+    class WriteHolder
+    {
+        ReaderWriterLock * m_pLock;
+        bool               m_fLockAcquired;
+    public:
+        WriteHolder(ReaderWriterLock * pLock, bool fAcquireLock = true);
+        ~WriteHolder();
+    };
+
+    ReaderWriterLock(bool fBlockOnGc = false);
+
+    void AcquireReadLock();
+    void ReleaseReadLock();
+
+    bool DangerousTryPulseReadLock();
+
+protected:
+    void AcquireWriteLock();
+    void ReleaseWriteLock();
+
+    void AcquireReadLockWorker();
+
+};
+
+#endif // __RWLock_h__
diff --git a/src/coreclr/src/nativeaot/Runtime/Range.h b/src/coreclr/src/nativeaot/Runtime/Range.h
new file mode 100644
index 0000000000000..a728c587c1641
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/Range.h
@@ -0,0 +1,137 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#pragma once
+
+namespace rh { namespace util
+{
+    //---------------------------------------------------------------------------------------------
+    // Represents value range [a,b), and provides various convenience methods.
+
+    template <typename VALUE_TYPE, typename LENGTH_TYPE = VALUE_TYPE>
+    class Range
+    {
+        typedef Range<VALUE_TYPE, LENGTH_TYPE> THIS_T;
+
+    public:
+        //-----------------------------------------------------------------------------------------
+        // Construction
+
+        Range()
+            : m_start(0),
+              m_end(0)
+            {}
+
+        Range(Range const & range)
+            : m_start(range.m_start),
+              m_end(range.m_end)
+            {}
+
+        static Range<VALUE_TYPE> CreateWithEndpoint(VALUE_TYPE start,
+                                                    VALUE_TYPE end)
+            { return Range<VALUE_TYPE>(start, end); }
+
+        static Range<VALUE_TYPE> CreateWithLength(VALUE_TYPE start, LENGTH_TYPE len)
+            { return Range<VALUE_TYPE>(start, start + len); }
+
+        //-----------------------------------------------------------------------------------------
+        // Operations
+
+        THIS_T& operator=(THIS_T const & range)
+            { m_start = range.m_start; m_end = range.m_end; return *this; }
+
+        bool Equals(THIS_T const & range) const
+            { return GetStart() == range.GetStart() && GetEnd() == range.GetEnd(); }
+
+        bool operator==(THIS_T const & range) const
+            { return Equals(range); }
+
+        bool operator!=(THIS_T const & range) const
+            { return !Equals(range); }
+
+        VALUE_TYPE GetStart() const
+            { return m_start; }
+
+        VALUE_TYPE GetEnd() const
+            { return m_end; }
+
+        LENGTH_TYPE GetLength() const
+            { return m_end - m_start; }
+
+        bool IntersectsWith(THIS_T const &range) const
+            { return range.GetStart() < GetEnd() && range.GetEnd() > GetStart(); }
+
+        bool IntersectsWith(VALUE_TYPE start,
+                            VALUE_TYPE end) const
+            { return IntersectsWith(THIS_T(start, end)); }
+
+        bool Contains(THIS_T const &range) const
+            { return GetStart() <= range.GetStart() && range.GetEnd() <= GetEnd(); }
+
+        bool IsAdjacentTo(THIS_T const &range) const
+            { return GetEnd() == range.GetStart() || range.GetEnd() == GetStart(); }
+
+    protected:
+        Range(VALUE_TYPE start, VALUE_TYPE end)
+            : m_start(start),
+              m_end(end)
+            { ASSERT(start <= end); }
+
+        VALUE_TYPE m_start;
+        VALUE_TYPE m_end;
+    };
+    
+    //---------------------------------------------------------------------------------------------
+    // Represents address range [a,b), and provides various convenience methods.
+
+    class MemRange : public Range<UInt8*, UIntNative>
+    {
+        typedef Range<UInt8*, UIntNative> BASE_T;
+
+    public:
+        //-----------------------------------------------------------------------------------------
+        // Construction
+
+        MemRange()
+            : BASE_T()
+            {}
+
+        MemRange(void* pvMemStart,
+                 UIntNative cbMemLen)
+            : BASE_T(reinterpret_cast<UInt8*>(pvMemStart), reinterpret_cast<UInt8*>(pvMemStart) + cbMemLen)
+            {}
+
+        MemRange(void* pvMemStart,
+                 void* pvMemEnd)
+            : BASE_T(reinterpret_cast<UInt8*>(pvMemStart), reinterpret_cast<UInt8*>(pvMemEnd))
+            {}
+
+        MemRange(MemRange const & range)
+            : BASE_T(range)
+            { }
+
+        //-----------------------------------------------------------------------------------------
+        // Operations
+
+        MemRange& operator=(MemRange const & range)
+            { BASE_T::operator=(range); return *this; }
+
+        UIntNative GetPageCount() const
+        {
+            UInt8 *pCurPage = ALIGN_DOWN(GetStart(), OS_PAGE_SIZE);
+            UInt8 *pEndPage = ALIGN_UP(GetEnd(), OS_PAGE_SIZE);
+            return (pEndPage - pCurPage) / OS_PAGE_SIZE;
+        }
+
+        UInt8* GetStartPage() const
+            { return ALIGN_DOWN(GetStart(), OS_PAGE_SIZE); }
+
+        // The page immediately following the last page contained by this range.
+        UInt8* GetEndPage() const
+            { return ALIGN_UP(GetEnd(), OS_PAGE_SIZE); }
+
+        MemRange GetPageRange() const
+            { return MemRange(GetStartPage(), GetEndPage()); }
+    };
+}// namespace util
+}// namespace rh
+
diff --git a/src/coreclr/src/nativeaot/Runtime/RedhawkWarnings.h b/src/coreclr/src/nativeaot/Runtime/RedhawkWarnings.h
new file mode 100644
index 0000000000000..e3cc1118b5d8d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/RedhawkWarnings.h
@@ -0,0 +1,8 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Disable some commonly ignored warnings
+//
+
+MSVC_DISABLE_WARNING(4200)      // nonstandard extension used : zero-sized array in struct/union
diff --git a/src/coreclr/src/nativeaot/Runtime/RestrictedCallouts.cpp b/src/coreclr/src/nativeaot/Runtime/RestrictedCallouts.cpp
new file mode 100644
index 0000000000000..b7a8ce134b679
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/RestrictedCallouts.cpp
@@ -0,0 +1,248 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Callouts from the unmanaged portion of the runtime to C# helpers made during garbage collections. See
+// RestrictedCallouts.h for more detail.
+//
+
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "holder.h"
+#include "gcrhinterface.h"
+#include "shash.h"
+#include "RWLock.h"
+#include "rhbinder.h"
+#include "Crst.h"
+#include "RuntimeInstance.h"
+#include "eetype.h"
+#include "ObjectLayout.h"
+#include "event.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "RestrictedCallouts.h"
+
+// The head of the chains of GC callouts, one per callout type.
+RestrictedCallouts::GcRestrictedCallout * RestrictedCallouts::s_rgGcRestrictedCallouts[GCRC_Count] = { 0 };
+
+// The head of the chain of HandleTable callouts.
+RestrictedCallouts::HandleTableRestrictedCallout * RestrictedCallouts::s_pHandleTableRestrictedCallouts = NULL;
+
+// Lock protecting access to s_rgGcRestrictedCallouts and s_pHandleTableRestrictedCallouts during registration
+// and unregistration (not used during actual callbacks since everything is single threaded then).
+CrstStatic RestrictedCallouts::s_sLock;
+
+// One time startup initialization.
+bool RestrictedCallouts::Initialize()
+{
+    s_sLock.Init(CrstRestrictedCallouts, CRST_DEFAULT);
+
+    return true;
+}
+
+// Register callback of the given type to the method with the given address. The most recently registered
+// callbacks are called first. Returns true on success, false if insufficient memory was available for the
+// registration.
+bool RestrictedCallouts::RegisterGcCallout(GcRestrictedCalloutKind eKind, void * pCalloutMethod)
+{
+    // Validate callout kind.
+    if (eKind >= GCRC_Count)
+    {
+        ASSERT_UNCONDITIONALLY("Invalid GC restricted callout kind.");
+        RhFailFast();
+    }
+
+    GcRestrictedCallout * pCallout = new (nothrow) GcRestrictedCallout();
+    if (pCallout == NULL)
+        return false;
+
+    pCallout->m_pCalloutMethod = pCalloutMethod;
+
+    CrstHolder lh(&s_sLock);
+
+    // Link new callout to head of the chain according to its type.
+    pCallout->m_pNext = s_rgGcRestrictedCallouts[eKind];
+    s_rgGcRestrictedCallouts[eKind] = pCallout;
+
+    return true;
+}
+
+// Unregister a previously registered callout. Removes the first registration that matches on both callout
+// kind and address. Causes a fail fast if the registration doesn't exist.
+void RestrictedCallouts::UnregisterGcCallout(GcRestrictedCalloutKind eKind, void * pCalloutMethod)
+{
+    // Validate callout kind.
+    if (eKind >= GCRC_Count)
+    {
+        ASSERT_UNCONDITIONALLY("Invalid GC restricted callout kind.");
+        RhFailFast();
+    }
+
+    CrstHolder lh(&s_sLock);
+
+    GcRestrictedCallout * pCurrCallout = s_rgGcRestrictedCallouts[eKind];
+    GcRestrictedCallout * pPrevCallout = NULL;
+
+    while (pCurrCallout)
+    {
+        if (pCurrCallout->m_pCalloutMethod == pCalloutMethod)
+        {
+            // Found a matching entry, remove it from the chain.
+            if (pPrevCallout)
+                pPrevCallout->m_pNext = pCurrCallout->m_pNext;
+            else
+                s_rgGcRestrictedCallouts[eKind] = pCurrCallout->m_pNext;
+
+            delete pCurrCallout;
+
+            return;
+        }
+
+        pPrevCallout = pCurrCallout;
+        pCurrCallout = pCurrCallout->m_pNext;
+    }
+
+    // If we get here we didn't find a matching registration, indicating a bug on the part of the caller.
+    ASSERT_UNCONDITIONALLY("Attempted to unregister restricted callout that wasn't registered.");
+    RhFailFast();
+}
+
+// Register callback for the "is alive" property of ref counted handles with objects of the given type (the
+// type match must be exact). The most recently registered callbacks are called first. Returns true on
+// success, false if insufficient memory was available for the registration.
+bool RestrictedCallouts::RegisterRefCountedHandleCallback(void * pCalloutMethod, EEType * pTypeFilter)
+{
+    HandleTableRestrictedCallout * pCallout = new (nothrow) HandleTableRestrictedCallout();
+    if (pCallout == NULL)
+        return false;
+
+    pCallout->m_pCalloutMethod = pCalloutMethod;
+    pCallout->m_pTypeFilter = pTypeFilter;
+
+    CrstHolder lh(&s_sLock);
+
+    // Link new callout to head of the chain.
+    pCallout->m_pNext = s_pHandleTableRestrictedCallouts;
+    s_pHandleTableRestrictedCallouts = pCallout;
+
+    return true;
+}
+
+// Unregister a previously registered callout. Removes the first registration that matches on both callout
+// address and filter type. Causes a fail fast if the registration doesn't exist.
+void RestrictedCallouts::UnregisterRefCountedHandleCallback(void * pCalloutMethod, EEType * pTypeFilter)
+{
+    CrstHolder lh(&s_sLock);
+
+    HandleTableRestrictedCallout * pCurrCallout = s_pHandleTableRestrictedCallouts;
+    HandleTableRestrictedCallout * pPrevCallout = NULL;
+
+    while (pCurrCallout)
+    {
+        if ((pCurrCallout->m_pCalloutMethod == pCalloutMethod) &&
+            (pCurrCallout->m_pTypeFilter == pTypeFilter))
+        {
+            // Found a matching entry, remove it from the chain.
+            if (pPrevCallout)
+                pPrevCallout->m_pNext = pCurrCallout->m_pNext;
+            else
+                s_pHandleTableRestrictedCallouts = pCurrCallout->m_pNext;
+
+            delete pCurrCallout;
+
+            return;
+        }
+
+        pPrevCallout = pCurrCallout;
+        pCurrCallout = pCurrCallout->m_pNext;
+    }
+
+    // If we get here we didn't find a matching registration, indicating a bug on the part of the caller.
+    ASSERT_UNCONDITIONALLY("Attempted to unregister restricted callout that wasn't registered.");
+    RhFailFast();
+}
+
+// Invoke all the registered GC callouts of the given kind. The condemned generation of the current collection
+// is passed along to the callouts.
+void RestrictedCallouts::InvokeGcCallouts(GcRestrictedCalloutKind eKind, UInt32 uiCondemnedGeneration)
+{
+    ASSERT(eKind < GCRC_Count);
+
+    // It is illegal for any of the callouts to trigger a GC.
+    Thread * pThread = ThreadStore::GetCurrentThread();
+    pThread->SetDoNotTriggerGc();
+
+    // Due to the above we have better suppress GC stress.
+    bool fGcStressWasSuppressed = pThread->IsSuppressGcStressSet();
+    if (!fGcStressWasSuppressed)
+        pThread->SetSuppressGcStress();
+
+    GcRestrictedCallout * pCurrCallout = s_rgGcRestrictedCallouts[eKind];
+    while (pCurrCallout)
+    {
+        // Make the callout.
+        ((GcRestrictedCallbackFunction)pCurrCallout->m_pCalloutMethod)(uiCondemnedGeneration);
+
+        pCurrCallout = pCurrCallout->m_pNext;
+    }
+
+    // Revert GC stress mode if we changed it.
+    if (!fGcStressWasSuppressed)
+        pThread->ClearSuppressGcStress();
+
+    pThread->ClearDoNotTriggerGc();
+}
+
+// Invoke all the registered ref counted handle callouts for the given object extracted from the handle. The
+// result is the union of the results for all the handlers that matched the object type (i.e. if one of them
+// returned true the overall result is true otherwise false is returned (which includes the case where no
+// handlers matched)). Since there should be no other side-effects of the callout, the invocations cease as
+// soon as a handler returns true.
+bool RestrictedCallouts::InvokeRefCountedHandleCallbacks(Object * pObject)
+{
+    bool fResult = false;
+
+    // It is illegal for any of the callouts to trigger a GC.
+    Thread * pThread = ThreadStore::GetCurrentThread();
+    pThread->SetDoNotTriggerGc();
+
+    // Due to the above we have better suppress GC stress.
+    bool fGcStressWasSuppressed = pThread->IsSuppressGcStressSet();
+    if (!fGcStressWasSuppressed)
+        pThread->SetSuppressGcStress();
+
+    HandleTableRestrictedCallout * pCurrCallout = s_pHandleTableRestrictedCallouts;
+    while (pCurrCallout)
+    {
+        if (pObject->get_SafeEEType() == pCurrCallout->m_pTypeFilter)
+        {
+            // Make the callout. Return true to our caller as soon as we see a true result here.
+            if (((HandleTableRestrictedCallbackFunction)pCurrCallout->m_pCalloutMethod)(pObject))
+            {
+                fResult = true;
+                goto Done;
+            }
+        }
+
+        pCurrCallout = pCurrCallout->m_pNext;
+    }
+
+  Done:
+    // Revert GC stress mode if we changed it.
+    if (!fGcStressWasSuppressed)
+        pThread->ClearSuppressGcStress();
+
+    pThread->ClearDoNotTriggerGc();
+
+    return fResult;
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/RestrictedCallouts.h b/src/coreclr/src/nativeaot/Runtime/RestrictedCallouts.h
new file mode 100644
index 0000000000000..384e2fcd9c8a5
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/RestrictedCallouts.h
@@ -0,0 +1,102 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Restricted callouts refer to calls to classlib defined code written in C# made from the runtime during a
+// garbage collection. As such these C# methods are constrained in what they can do and must be written very
+// carefully. The most obvious restriction is that they cannot trigger a GC (by attempting to allocate memory
+// for example) since that would lead to an immediate deadlock.
+//
+// Known constraints:
+//  * No triggering of GCs (new, boxing value types, foreach over a type that allocates for its IEnumerator,
+//    calling GC.Collect etc.).
+//  * No exceptions can leak out of the callout.
+//  * No blocking (or expensive) operations that could starve the GC or potentially lead to deadlocks.
+//  * No use of runtime facilities that check whether a GC is in progress, these will deadlock. The big
+//    example we know about so far is making a p/invoke call.
+//  * For the AfterMarkPhase callout special attention must be paid to avoid any action that reads the EEType*
+//    from an object header (e.g. casting). At this point the GC may have mark bits set in the the pointer.
+//
+
+class EEType;
+
+// Enum for the various GC callouts available. The values and their meanings are a contract with the classlib
+// so be careful altering these.
+enum GcRestrictedCalloutKind
+{
+    GCRC_StartCollection    = 0,    // Collection is about to begin
+    GCRC_EndCollection      = 1,    // Collection has completed
+    GCRC_AfterMarkPhase     = 2,    // All live objects are marked (not including ready for finalization
+                                    // objects), no handles have been cleared
+    GCRC_Count                      // Maximum number of callout types
+};
+
+class RestrictedCallouts
+{
+public:
+    // One time startup initialization.
+    static bool Initialize();
+
+    // Register callback of the given type to the method with the given address. The most recently registered
+    // callbacks are called first. Returns true on success, false if insufficient memory was available for the
+    // registration.
+    static bool RegisterGcCallout(GcRestrictedCalloutKind eKind, void * pCalloutMethod);
+
+    // Unregister a previously registered callout. Removes the first registration that matches on both callout
+    // kind and address. Causes a fail fast if the registration doesn't exist.
+    static void UnregisterGcCallout(GcRestrictedCalloutKind eKind, void * pCalloutMethod);
+
+    // Register callback for the "is alive" property of ref counted handles with objects of the given type
+    // (the type match must be exact). The most recently registered callbacks are called first. Returns true
+    // on success, false if insufficient memory was available for the registration.
+    static bool RegisterRefCountedHandleCallback(void * pCalloutMethod, EEType * pTypeFilter);
+
+    // Unregister a previously registered callout. Removes the first registration that matches on both callout
+    // address and filter type. Causes a fail fast if the registration doesn't exist.
+    static void UnregisterRefCountedHandleCallback(void * pCalloutMethod, EEType * pTypeFilter);
+
+    // Invoke all the registered GC callouts of the given kind. The condemned generation of the current
+    // collection is passed along to the callouts.
+    static void InvokeGcCallouts(GcRestrictedCalloutKind eKind, UInt32 uiCondemnedGeneration);
+
+    // Invoke all the registered ref counted handle callouts for the given object extracted from the handle.
+    // The result is the union of the results for all the handlers that matched the object type (i.e. if one
+    // of them returned true the overall result is true otherwise false is returned (which includes the case
+    // where no handlers matched)). Since there should be no other side-effects of the callout, the
+    // invocations cease as soon as a handler returns true.
+    static bool InvokeRefCountedHandleCallbacks(Object * pObject);
+
+private:
+    // Context struct used to record which GC callbacks are registered to be made (we allow multiple
+    // registrations).
+    struct GcRestrictedCallout
+    {
+        GcRestrictedCallout *   m_pNext;            // Next callout to make or NULL
+        void *                  m_pCalloutMethod;   // Address of code to call
+    };
+
+    // The head of the chains of GC callouts, one per callout type.
+    static GcRestrictedCallout * s_rgGcRestrictedCallouts[GCRC_Count];
+
+    // The handle table only has one callout type, for ref-counted handles. But it allows the client to
+    // specify a type filter: i.e. only handles with an object of the exact type specified will have the
+    // callout invoked.
+    struct HandleTableRestrictedCallout
+    {
+        HandleTableRestrictedCallout *  m_pNext;            // Next callout to make or NULL
+        void *                          m_pCalloutMethod;   // Address of code to call
+        EEType *                        m_pTypeFilter;      // Type of object for which callout will be made
+    };
+
+    // The head of the chain of HandleTable callouts.
+    static HandleTableRestrictedCallout * s_pHandleTableRestrictedCallouts;
+
+    // Lock protecting access to s_rgGcRestrictedCallouts and s_pHandleTableRestrictedCallouts during
+    // registration and unregistration (not used during actual callbacks since everything is single threaded
+    // then).
+    static CrstStatic s_sLock;
+
+    // Prototypes for the callouts.
+    typedef void (__fastcall * GcRestrictedCallbackFunction)(UInt32 uiCondemnedGeneration);
+    typedef Boolean (__fastcall * HandleTableRestrictedCallbackFunction)(Object * pObject);
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/RhConfig.cpp b/src/coreclr/src/nativeaot/Runtime/RhConfig.cpp
new file mode 100644
index 0000000000000..47398b7b6d926
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/RhConfig.cpp
@@ -0,0 +1,421 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#ifndef DACCESS_COMPILE
+#include "CommonTypes.h"
+#include "daccess.h"
+#include "CommonMacros.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "holder.h"
+#include "Crst.h"
+#include "event.h"
+#include "RWLock.h"
+#include "threadstore.h"
+#include "RuntimeInstance.h"
+#include "shash.h"
+#include "RhConfig.h"
+
+#include <string.h>
+
+UInt32 RhConfig::ReadConfigValue(_In_z_ const TCHAR *wszName, UInt32 uiDefaultValue)
+{
+    TCHAR wszBuffer[CONFIG_VAL_MAXLEN + 1]; // 8 hex digits plus a nul terminator.
+    const UInt32 cchBuffer = sizeof(wszBuffer) / sizeof(wszBuffer[0]);
+
+    UInt32 cchResult = 0;
+
+#ifdef FEATURE_ENVIRONMENT_VARIABLE_CONFIG
+    cchResult = PalGetEnvironmentVariable(wszName, wszBuffer, cchBuffer);
+#endif // FEATURE_ENVIRONMENT_VARIABLE_CONFIG
+
+    //if the config key wasn't found in the environment 
+    if ((cchResult == 0) || (cchResult >= cchBuffer))
+        cchResult = GetIniVariable(wszName, wszBuffer, cchBuffer);
+
+#ifdef FEATURE_EMBEDDED_CONFIG
+    // if the config key wasn't found in the ini file
+    if ((cchResult == 0) || (cchResult >= cchBuffer))
+        cchResult = GetEmbeddedVariable(wszName, wszBuffer, cchBuffer);
+#endif // FEATURE_EMBEDDED_CONFIG
+
+    if ((cchResult == 0) || (cchResult >= cchBuffer))
+        return uiDefaultValue; // not found, return default
+
+    UInt32 uiResult = 0;
+
+    for (UInt32 i = 0; i < cchResult; i++)
+    {
+        uiResult <<= 4;
+
+        TCHAR ch = wszBuffer[i];
+        if ((ch >= _T('0')) && (ch <= _T('9')))
+            uiResult += ch - _T('0');
+        else if ((ch >= _T('a')) && (ch <= _T('f')))
+            uiResult += (ch - _T('a')) + 10;
+        else if ((ch >= _T('A')) && (ch <= _T('F')))
+            uiResult += (ch - _T('A')) + 10;
+        else
+            return uiDefaultValue; // parse error, return default
+    }
+
+    return uiResult;
+}
+
+//reads a config value from rhconfig.ini into outputBuffer buffer returning the length of the value.
+//lazily reads the file so if the file is not yet read, it will read it on first called
+//if the file is not avaliable, or unreadable zero will always be returned
+//cchOutputBuffer is the maximum number of characters to write to outputBuffer
+//cchOutputBuffer must be a size >= CONFIG_VAL_MAXLEN + 1
+UInt32 RhConfig::GetIniVariable(_In_z_ const TCHAR* configName, _Out_writes_all_(cchOutputBuffer) TCHAR* outputBuffer, _In_ UInt32 cchOutputBuffer)
+{
+    //the buffer needs to be big enough to read the value buffer + null terminator
+    if (cchOutputBuffer < CONFIG_VAL_MAXLEN + 1)
+    {
+        return 0;
+    }
+
+    //if we haven't read the config yet try to read
+    if (g_iniSettings == NULL)
+    {
+        ReadConfigIni();
+    }
+
+    //if the config wasn't read or reading failed return 0 immediately
+    if (g_iniSettings == CONFIG_INI_NOT_AVAIL)
+    {
+        return 0;
+    }
+
+    return GetConfigVariable(configName, (ConfigPair*)g_iniSettings, outputBuffer, cchOutputBuffer);
+}
+
+#ifdef FEATURE_EMBEDDED_CONFIG
+UInt32 RhConfig::GetEmbeddedVariable(_In_z_ const TCHAR* configName, _Out_writes_all_(cchOutputBuffer) TCHAR* outputBuffer, _In_ UInt32 cchOutputBuffer)
+{
+    //the buffer needs to be big enough to read the value buffer + null terminator
+    if (cchOutputBuffer < CONFIG_VAL_MAXLEN + 1)
+    {
+        return 0;
+    }
+
+    //if we haven't read the config yet try to read
+    if (g_embeddedSettings == NULL)
+    {
+        ReadEmbeddedSettings();
+    }
+
+    //if the config wasn't read or reading failed return 0 immediately
+    if (g_embeddedSettings == CONFIG_INI_NOT_AVAIL)
+    {
+        return 0;
+    }
+
+    return GetConfigVariable(configName, (ConfigPair*)g_embeddedSettings, outputBuffer, cchOutputBuffer);
+}
+#endif // FEATURE_EMBEDDED_CONFIG
+
+UInt32 RhConfig::GetConfigVariable(_In_z_ const TCHAR* configName, const ConfigPair* configPairs, _Out_writes_all_(cchOutputBuffer) TCHAR* outputBuffer, _In_ UInt32 cchOutputBuffer)
+{
+    //find the first name which matches (case insensitive to be compat with environment variable counterpart)
+    for (int iSettings = 0; iSettings < RCV_Count; iSettings++)
+    {
+        if (_tcsicmp(configName, configPairs[iSettings].Key) == 0)
+        {
+            bool nullTerm = FALSE;
+
+            UInt32 iValue;
+
+            for (iValue = 0; (iValue < CONFIG_VAL_MAXLEN + 1) && (iValue < (Int32)cchOutputBuffer); iValue++)
+            {
+                outputBuffer[iValue] = configPairs[iSettings].Value[iValue];
+
+                if (outputBuffer[iValue] == '\0')
+                {
+                    nullTerm = true;
+                    break;
+                }
+            }
+
+            //return the length of the config value if null terminated else return zero
+            return nullTerm ? iValue : 0;
+        }
+    }
+
+    //if the config key was not found return 0
+    return 0;
+}
+
+//reads the configuration values from rhconfig.ini and updates g_iniSettings
+//if the file is read succesfully and g_iniSettings will be set to a valid ConfigPair[] of length RCV_Count.
+//if the file does not exist or reading the file fails,  g_iniSettings is set to CONFIG_INI_NOT_AVAIL
+//NOTE: all return paths must set g_iniSettings 
+void RhConfig::ReadConfigIni()
+{
+    if (g_iniSettings == NULL)
+    {
+        TCHAR* configPath = GetConfigPath();
+
+        //if we couldn't determine the path to the config set g_iniSettings to CONGIF_NOT_AVAIL
+        if (configPath == NULL)
+        {
+            //only set if another thread hasn't initialized the buffer yet, otherwise ignore and let the first setter win
+            PalInterlockedCompareExchangePointer(&g_iniSettings, CONFIG_INI_NOT_AVAIL, NULL);
+
+            return;
+        }
+
+        //buffer is max file size + 1 for null terminator if needed
+        char buff[CONFIG_FILE_MAXLEN + 1];
+
+        //if the file read failed or the file is bigger than the specified buffer this will return zero
+        UInt32 fSize = PalReadFileContents(configPath, buff, CONFIG_FILE_MAXLEN);
+
+        //ensure the buffer is null terminated
+        buff[fSize] = '\0';
+
+        //delete the configPath
+        delete[] configPath;
+
+        //if reading the file contents failed set g_iniSettings to CONFIG_INI_NOT_AVAIL
+        if (fSize == 0)
+        {
+            //only set if another thread hasn't initialized the buffer yet, otherwise ignore and let the first setter win
+            PalInterlockedCompareExchangePointer(&g_iniSettings, CONFIG_INI_NOT_AVAIL, NULL);
+
+            return;
+        }
+
+        ConfigPair* iniBuff = new (nothrow) ConfigPair[RCV_Count];
+        if (iniBuff == NULL)
+        {
+            //only set if another thread hasn't initialized the buffer yet, otherwise ignore and let the first setter win
+            PalInterlockedCompareExchangePointer(&g_iniSettings, CONFIG_INI_NOT_AVAIL, NULL);
+
+            return;
+        }
+
+        UInt32 iBuff = 0;
+        UInt32 iIniBuff = 0;
+        char* currLine;
+
+        //while we haven't reached the max number of config pairs, or the end of the file, read the next line
+        while (iIniBuff < RCV_Count && iBuff < fSize)
+        {
+            //'trim' the leading whitespace
+            while (priv_isspace(buff[iBuff]) && (iBuff < fSize))
+                iBuff++;
+
+            currLine = &buff[iBuff];
+
+            //find the end of the line
+            while ((buff[iBuff] != '\n') && (buff[iBuff] != '\r') && (iBuff < fSize))
+                iBuff++;
+
+            //null terminate the line
+            buff[iBuff] = '\0';
+
+            //parse the line
+            //only increment iIniBuff if the parsing succeeded otherwise reuse the config struct
+            if (ParseConfigLine(&iniBuff[iIniBuff], currLine))
+            {
+                iIniBuff++;
+            }
+
+            //advance to the next line;
+            iBuff++;
+        }
+
+        //initialize the remaining config pairs to "\0"
+        while (iIniBuff < RCV_Count)
+        {
+            iniBuff[iIniBuff].Key[0] = '\0';
+            iniBuff[iIniBuff].Value[0] = '\0';
+            iIniBuff++;
+        }
+
+        //if another thread initialized first let the first setter win
+        //delete the iniBuff to avoid leaking memory
+        if (PalInterlockedCompareExchangePointer(&g_iniSettings, iniBuff, NULL) != NULL)
+        {
+            delete[] iniBuff;
+        }
+    }
+
+    return;
+}
+
+#ifdef FEATURE_EMBEDDED_CONFIG
+struct CompilerEmbeddedSettingsBlob
+{
+    UInt32 Size;
+    char Data[1];
+};
+
+extern "C" CompilerEmbeddedSettingsBlob g_compilerEmbeddedSettingsBlob;
+
+void RhConfig::ReadEmbeddedSettings()
+{
+    if (g_embeddedSettings == NULL)
+    {
+        //if reading the file contents failed set g_embeddedSettings to CONFIG_INI_NOT_AVAIL
+        if (g_compilerEmbeddedSettingsBlob.Size == 0)
+        {
+            //only set if another thread hasn't initialized the buffer yet, otherwise ignore and let the first setter win
+            PalInterlockedCompareExchangePointer(&g_embeddedSettings, CONFIG_INI_NOT_AVAIL, NULL);
+
+            return;
+        }
+
+        ConfigPair* iniBuff = new (nothrow) ConfigPair[RCV_Count];
+        if (iniBuff == NULL)
+        {
+            //only set if another thread hasn't initialized the buffer yet, otherwise ignore and let the first setter win
+            PalInterlockedCompareExchangePointer(&g_embeddedSettings, CONFIG_INI_NOT_AVAIL, NULL);
+
+            return;
+        }
+
+        UInt32 iBuff = 0;
+        UInt32 iIniBuff = 0;
+        char* currLine;
+
+        //while we haven't reached the max number of config pairs, or the end of the file, read the next line
+        while (iIniBuff < RCV_Count && iBuff < g_compilerEmbeddedSettingsBlob.Size)
+        {
+            currLine = &g_compilerEmbeddedSettingsBlob.Data[iBuff];
+
+            //find the end of the line
+            while ((g_compilerEmbeddedSettingsBlob.Data[iBuff] != '\0') && (iBuff < g_compilerEmbeddedSettingsBlob.Size))
+                iBuff++;
+
+            //parse the line
+            //only increment iIniBuff if the parsing succeeded otherwise reuse the config struct
+            if (ParseConfigLine(&iniBuff[iIniBuff], currLine))
+            {
+                iIniBuff++;
+            }
+
+            //advance to the next line;
+            iBuff++;
+        }
+
+        //initialize the remaining config pairs to "\0"
+        while (iIniBuff < RCV_Count)
+        {
+            iniBuff[iIniBuff].Key[0] = '\0';
+            iniBuff[iIniBuff].Value[0] = '\0';
+            iIniBuff++;
+        }
+
+        //if another thread initialized first let the first setter win
+        //delete the iniBuff to avoid leaking memory
+        if (PalInterlockedCompareExchangePointer(&g_embeddedSettings, iniBuff, NULL) != NULL)
+        {
+            delete[] iniBuff;
+        }
+    }
+
+    return;
+}
+#endif // FEATURE_EMBEDDED_CONFIG
+
+//returns the path to the runtime configuration ini
+_Ret_maybenull_z_ TCHAR* RhConfig::GetConfigPath()
+{
+    const TCHAR* exePathBuff;
+
+    //get the path to rhconfig.ini, this file is expected to live along side the app 
+    //to build the path get the process executable module full path strip off the file name and 
+    //append rhconfig.ini
+    Int32 pathLen = PalGetModuleFileName(&exePathBuff, NULL);
+
+    if (pathLen <= 0)
+    {
+        return NULL;
+    }
+    UInt32 iLastDirSeparator = 0;
+
+    for (UInt32 iPath = pathLen - 1; iPath > 0; iPath--)
+    {
+        if (exePathBuff[iPath] == DIRECTORY_SEPARATOR_CHAR)
+        {
+            iLastDirSeparator = iPath;
+            break;
+        }
+    }
+
+    if (iLastDirSeparator == 0)
+    {
+        return NULL;
+    }
+
+    TCHAR* configPath = new (nothrow) TCHAR[iLastDirSeparator + 1 + wcslen(CONFIG_INI_FILENAME) + 1];
+    if (configPath != NULL)
+    {
+        //copy the path base and file name
+        for (UInt32 i = 0; i <= iLastDirSeparator; i++)
+        {
+            configPath[i] = exePathBuff[i];
+        }
+
+        for (UInt32 i = 0; i <= wcslen(CONFIG_INI_FILENAME); i++)
+        {
+            configPath[i + iLastDirSeparator + 1] = CONFIG_INI_FILENAME[i];
+        }
+    }
+
+    return configPath;
+}
+
+//Parses one line of rhconfig.ini and populates values in the passed in configPair
+//returns: true if the parsing was successful, false if the parsing failed. 
+//NOTE: if the method fails configPair is left in an unitialized state
+bool RhConfig::ParseConfigLine(_Out_ ConfigPair* configPair, _In_z_ const char * line)
+{
+    UInt32 iLine = 0;
+    UInt32 iKey = 0;
+    UInt32 iVal = 0;
+
+    //while we haven't reached the end of the key signalled by '=', or the end of the line, or the key maxlen
+    while (line[iLine] != '=' && line[iLine] != '\0' && iKey < CONFIG_KEY_MAXLEN)
+    {
+        configPair->Key[iKey++] = line[iLine++];
+    }
+
+    //if the current char is not '=' we reached the key maxlen, or the line ended return false
+    if (line[iLine] != '=')
+    {
+        return FALSE;
+    }
+
+    configPair->Key[iKey] = '\0';
+
+    //increment to start of the value
+    iLine++;
+
+    //while we haven't reached the end of the line, or val maxlen
+    while (line[iLine] != '\0' && iVal < CONFIG_VAL_MAXLEN)
+    {
+        configPair->Value[iVal++] = line[iLine++];
+    }
+
+    //if the current char is not '\0' we didn't reach the end of the line return false
+    if (line[iLine] != '\0')
+    {
+        return FALSE;
+    }
+
+    configPair->Value[iVal] = '\0';
+
+    return TRUE;
+}
+
+#endif
diff --git a/src/coreclr/src/nativeaot/Runtime/RhConfig.h b/src/coreclr/src/nativeaot/Runtime/RhConfig.h
new file mode 100644
index 0000000000000..b6a50eb9b56cd
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/RhConfig.h
@@ -0,0 +1,151 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Provides simple configuration support through environment variables. Each variable is lazily inspected on
+// first query and the resulting value cached for future use. To keep things simple we support reading only
+// 32-bit hex quantities and a zero value is considered equivalent to the environment variable not being
+// defined. We can get more sophisticated if needs be, but the hope is that very few configuration values are
+// exposed in this manner.
+//
+// Values can also be configured through an rhconfig.ini file.  The file must be and ASCII text file, must be
+// placed next to the executing assembly, and be named rhconfig.ini.  The file consists of one config entry per line
+// in the format: <Key>=<Value> 
+// example:
+// RH_HeapVerify=1
+// RH_BreakOnAssert=1
+//
+
+
+#ifndef DACCESS_COMPILE
+
+#if defined(_DEBUG) || !defined(APP_LOCAL_RUNTIME)
+#define FEATURE_ENVIRONMENT_VARIABLE_CONFIG
+#endif
+
+class RhConfig
+{
+
+#define CONFIG_INI_FILENAME L"rhconfig.ini"
+#define CONFIG_INI_NOT_AVAIL (void*)0x1  //signal for ini file failed to load
+#define CONFIG_KEY_MAXLEN 50             //arbitrary max length of config keys increase if needed
+#define CONFIG_VAL_MAXLEN 8              //32 bit uint in hex
+
+private:
+    struct ConfigPair
+    {
+    public:
+        TCHAR Key[CONFIG_KEY_MAXLEN + 1];  //maxlen + null terminator
+        TCHAR Value[CONFIG_VAL_MAXLEN + 1]; //maxlen + null terminator
+    };
+
+    //g_iniSettings is a buffer of ConfigPair structs which when initialized is of length RCV_Count
+    //the first N settings which are set in rhconfig.ini will be initialized and the remainder with have 
+    //empty string "\0" as a Key and Value
+    //
+    //if the buffer has not been initialized (ie the ini file has not been read) the value will be NULL
+    //if we already attempted to initialize the file and could not find or read the contents the 
+    //value will be CONFIG_INI_NOT_AVAIL to distinguish from the unitialized buffer.
+    //
+    //NOTE: g_iniSettings is only set in ReadConfigIni and must be set atomically only once
+    //      using PalInterlockedCompareExchangePointer to avoid races when initializing
+private:
+    void* volatile g_iniSettings = NULL;
+
+#ifdef FEATURE_EMBEDDED_CONFIG
+    // g_embeddedSettings works similarly to g_iniSettings, except the source of the data
+    // is a data blob generated by the compiler and embedded into the executable.
+    void* volatile g_embeddedSettings = NULL;
+#endif // FEATURE_EMBEDDED_CONFIG
+
+public:
+
+#define DEFINE_VALUE_ACCESSOR(_name, defaultVal)        \
+    UInt32 Get##_name()                                 \
+    {                                                   \
+        if (m_uiConfigValuesRead & (1 << RCV_##_name))  \
+            return m_uiConfigValues[RCV_##_name];       \
+        UInt32 uiValue = ReadConfigValue(_T("RH_") _T(#_name), defaultVal); \
+        m_uiConfigValues[RCV_##_name] = uiValue;        \
+        m_uiConfigValuesRead |= 1 << RCV_##_name;       \
+        return uiValue;                                 \
+    }
+
+
+#ifdef _DEBUG
+#define DEBUG_CONFIG_VALUE(_name) DEFINE_VALUE_ACCESSOR(_name, 0)
+#define DEBUG_CONFIG_VALUE_WITH_DEFAULT(_name, defaultVal) DEFINE_VALUE_ACCESSOR(_name, defaultVal)
+#else
+#define DEBUG_CONFIG_VALUE(_name) 
+#define DEBUG_CONFIG_VALUE_WITH_DEFAULT(_name, defaultVal) 
+#endif
+#define RETAIL_CONFIG_VALUE(_name) DEFINE_VALUE_ACCESSOR(_name, 0)
+#define RETAIL_CONFIG_VALUE_WITH_DEFAULT(_name, defaultVal) DEFINE_VALUE_ACCESSOR(_name, defaultVal)
+#include "RhConfigValues.h"
+#undef DEBUG_CONFIG_VALUE
+#undef RETAIL_CONFIG_VALUE
+#undef DEBUG_CONFIG_VALUE_WITH_DEFAULT
+#undef RETAIL_CONFIG_VALUE_WITH_DEFAULT
+
+private:
+
+    UInt32 ReadConfigValue(_In_z_ const TCHAR *wszName, UInt32 uiDefault);
+
+    enum RhConfigValue
+    {
+#define DEBUG_CONFIG_VALUE(_name) RCV_##_name,
+#define RETAIL_CONFIG_VALUE(_name) RCV_##_name,
+#define DEBUG_CONFIG_VALUE_WITH_DEFAULT(_name, defaultVal) RCV_##_name,
+#define RETAIL_CONFIG_VALUE_WITH_DEFAULT(_name, defaultVal) RCV_##_name,
+#include "RhConfigValues.h"
+#undef DEBUG_CONFIG_VALUE
+#undef RETAIL_CONFIG_VALUE
+#undef DEBUG_CONFIG_VALUE_WITH_DEFAULT
+#undef RETAIL_CONFIG_VALUE_WITH_DEFAULT
+        RCV_Count
+    };
+    
+//accomidate for the maximum number of config values plus sizable buffer for whitespace 2K
+#define CONFIG_FILE_MAXLEN RCV_Count * sizeof(ConfigPair) + 2000  
+
+private:
+    _Ret_maybenull_z_ TCHAR* GetConfigPath();
+
+    //Parses one line of rhconfig.ini and populates values in the passed in configPair
+    //returns: true if the parsing was successful, false if the parsing failed. 
+    //NOTE: if the method fails configPair is left in an unitialized state
+    bool ParseConfigLine(_Out_ ConfigPair* configPair, _In_z_ const char * line);
+
+    //reads the configuration values from rhconfig.ini and updates g_iniSettings
+    //if the file is read succesfully and g_iniSettings will be set to a valid ConfigPair[] of length RCV_Count.
+    //if the file does not exist or reading the file fails,  g_iniSettings is set to CONFIG_INI_NOT_AVAIL
+    //NOTE: all return paths must set g_iniSettings 
+    void ReadConfigIni();
+
+    //reads a config value from rhconfig.ini into outputBuffer buffer returning the length of the value.
+    //lazily reads the file so if the file is not yet read, it will read it on first called
+    //if the file is not avaliable, or unreadable zero will always be returned
+    //cchOutputBuffer is the maximum number of characters to write to outputBuffer
+    UInt32 GetIniVariable(_In_z_ const TCHAR* configName, _Out_writes_all_(cchOutputBuffer) TCHAR* outputBuffer, _In_ UInt32 cchOutputBuffer);
+
+#ifdef FEATURE_EMBEDDED_CONFIG
+    void ReadEmbeddedSettings();
+
+    UInt32 GetEmbeddedVariable(_In_z_ const TCHAR* configName, _Out_writes_all_(cchOutputBuffer) TCHAR* outputBuffer, _In_ UInt32 cchOutputBuffer);
+#endif // FEATURE_EMBEDDED_CONFIG
+
+    UInt32 GetConfigVariable(_In_z_ const TCHAR* configName, const ConfigPair* configPairs, _Out_writes_all_(cchOutputBuffer) TCHAR* outputBuffer, _In_ UInt32 cchOutputBuffer);
+
+    static bool priv_isspace(char c)
+    {
+        return (c == ' ') || (c == '\t') || (c == '\n') || (c == '\r');
+    }
+
+
+    UInt32  m_uiConfigValuesRead;
+    UInt32  m_uiConfigValues[RCV_Count];
+};
+
+extern RhConfig * g_pRhConfig;
+
+#endif //!DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/RhConfigValues.h b/src/coreclr/src/nativeaot/Runtime/RhConfigValues.h
new file mode 100644
index 0000000000000..1c68fe5963def
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/RhConfigValues.h
@@ -0,0 +1,26 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Definitions of each configuration value used by the RhConfig class.
+//
+// Each variable is lazily inspected on first query and the resulting value cached for future use. To keep
+// things simple we support reading only 32-bit hex quantities and a zero value is considered equivalent to
+// the environment variable not being defined. We can get more sophisticated if needs be, but the hope is that
+// very few configuration values are exposed in this manner.
+//
+
+// By default, print assert to console and break in the debugger, if attached.  Set to 0 for a pop-up dialog on assert.
+DEBUG_CONFIG_VALUE_WITH_DEFAULT(BreakOnAssert, 1) 
+
+RETAIL_CONFIG_VALUE(HeapVerify)
+RETAIL_CONFIG_VALUE(StressLogLevel)
+RETAIL_CONFIG_VALUE(TotalStressLogSize)
+RETAIL_CONFIG_VALUE(DisableBGC)
+RETAIL_CONFIG_VALUE(UseServerGC)
+DEBUG_CONFIG_VALUE(DisallowRuntimeServicesFallback)
+DEBUG_CONFIG_VALUE(GcStressThrottleMode)    // gcstm_TriggerAlways / gcstm_TriggerOnFirstHit / gcstm_TriggerRandom
+DEBUG_CONFIG_VALUE(GcStressFreqCallsite)    // Number of times to force GC out of GcStressFreqDenom (for GCSTM_RANDOM)
+DEBUG_CONFIG_VALUE(GcStressFreqLoop)        // Number of times to force GC out of GcStressFreqDenom (for GCSTM_RANDOM)
+DEBUG_CONFIG_VALUE(GcStressFreqDenom)       // Denominator defining frequencies above, 10,000 used when left unspecified (for GCSTM_RANDOM)
+DEBUG_CONFIG_VALUE(GcStressSeed)            // Specify Seed for random generator (for GCSTM_RANDOM)
diff --git a/src/coreclr/src/nativeaot/Runtime/RuntimeInstance.cpp b/src/coreclr/src/nativeaot/Runtime/RuntimeInstance.cpp
new file mode 100644
index 0000000000000..487f332fddac7
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/RuntimeInstance.cpp
@@ -0,0 +1,543 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "holder.h"
+#include "Crst.h"
+#include "rhbinder.h"
+#include "RWLock.h"
+#include "RuntimeInstance.h"
+#include "event.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "gcrhinterface.h"
+#include "shash.h"
+#include "TypeManager.h"
+#include "eetype.h"
+#include "varint.h"
+#include "DebugEventSource.h"
+
+#include "CommonMacros.inl"
+#include "slist.inl"
+#include "eetype.inl"
+
+#ifdef  FEATURE_GC_STRESS
+enum HijackType { htLoop, htCallsite };
+bool ShouldHijackForGcStress(UIntNative CallsiteIP, HijackType ht);
+#endif // FEATURE_GC_STRESS
+
+#include "shash.inl"
+
+#ifndef DACCESS_COMPILE
+COOP_PINVOKE_HELPER(UInt8 *, RhSetErrorInfoBuffer, (UInt8 * pNewBuffer))
+{
+    return (UInt8 *) PalSetWerDataBuffer(pNewBuffer);
+}
+#endif // DACCESS_COMPILE
+
+
+ThreadStore *   RuntimeInstance::GetThreadStore()
+{
+    return m_pThreadStore;
+}
+
+COOP_PINVOKE_HELPER(UInt8 *, RhFindMethodStartAddress, (void * codeAddr))
+{
+    return dac_cast<UInt8 *>(GetRuntimeInstance()->FindMethodStartAddress(dac_cast<PTR_VOID>(codeAddr)));
+}
+
+PTR_UInt8 RuntimeInstance::FindMethodStartAddress(PTR_VOID ControlPC)
+{
+    ICodeManager * pCodeManager = FindCodeManagerByAddress(ControlPC);
+    MethodInfo methodInfo;
+    if (pCodeManager != NULL && pCodeManager->FindMethodInfo(ControlPC, &methodInfo))
+    {
+        return (PTR_UInt8)pCodeManager->GetMethodStartAddress(&methodInfo);
+    }
+
+    return NULL;
+}
+
+ICodeManager * RuntimeInstance::FindCodeManagerByAddress(PTR_VOID pvAddress)
+{
+    ReaderWriterLock::ReadHolder read(&m_ModuleListLock);
+
+    // TODO: ICodeManager support in DAC
+#ifndef DACCESS_COMPILE
+    for (CodeManagerEntry * pEntry = m_CodeManagerList.GetHead(); pEntry != NULL; pEntry = pEntry->m_pNext)
+    {
+        if (dac_cast<TADDR>(pvAddress) - dac_cast<TADDR>(pEntry->m_pvStartRange) < pEntry->m_cbRange)
+            return pEntry->m_pCodeManager;
+    }
+#endif
+
+    return NULL;
+}
+
+#ifndef DACCESS_COMPILE
+
+// Find the code manager containing the given address, which might be a return address from a managed function. The
+// address may be to another managed function, or it may be to an unmanaged function. The address may also refer to 
+// an EEType.
+ICodeManager * RuntimeInstance::FindCodeManagerForClasslibFunction(PTR_VOID address)
+{
+    // Try looking up the code manager assuming the address is for code first. This is expected to be most common.
+    ICodeManager * pCodeManager = FindCodeManagerByAddress(address);
+    if (pCodeManager != NULL)
+        return pCodeManager;
+
+    ASSERT_MSG(!Thread::IsHijackTarget(address), "not expected to be called with hijacked return address");
+
+    return NULL;
+}
+
+void * RuntimeInstance::GetClasslibFunctionFromCodeAddress(PTR_VOID address, ClasslibFunctionId functionId)
+{
+    // Find the code manager for the given address, which is an address into some managed module. It could
+    // be code, or it could be an EEType. No matter what, it's an address into a managed module in some non-Rtm
+    // type system.
+    ICodeManager * pCodeManager = FindCodeManagerForClasslibFunction(address);
+
+    // If the address isn't in a managed module then we have no classlib function.
+    if (pCodeManager == NULL)
+    {
+        return NULL;
+    }
+
+    return pCodeManager->GetClasslibFunction(functionId);
+}
+
+#endif // DACCESS_COMPILE
+
+PTR_UInt8 RuntimeInstance::GetTargetOfUnboxingAndInstantiatingStub(PTR_VOID ControlPC)
+{
+    ICodeManager * pCodeManager = FindCodeManagerByAddress(ControlPC);
+    if (pCodeManager != NULL)
+    {
+        PTR_UInt8 pData = (PTR_UInt8)pCodeManager->GetAssociatedData(ControlPC);
+        if (pData != NULL)
+        {
+            UInt8 flags = *pData++;
+
+            if ((flags & (UInt8)AssociatedDataFlags::HasUnboxingStubTarget) != 0)
+                return pData + *dac_cast<PTR_Int32>(pData);
+        }
+    }
+
+    return NULL;
+}
+
+GPTR_IMPL_INIT(RuntimeInstance, g_pTheRuntimeInstance, NULL);
+
+PTR_RuntimeInstance GetRuntimeInstance()
+{
+    return g_pTheRuntimeInstance;
+}
+
+void RuntimeInstance::EnumAllStaticGCRefs(void * pfnCallback, void * pvCallbackData)
+{
+    for (TypeManagerList::Iterator iter = m_TypeManagerList.Begin(); iter != m_TypeManagerList.End(); iter++)
+    {
+        iter->m_pTypeManager->EnumStaticGCRefs(pfnCallback, pvCallbackData);
+    }
+}
+
+void RuntimeInstance::SetLoopHijackFlags(UInt32 flag)
+{
+    for (TypeManagerList::Iterator iter = m_TypeManagerList.Begin(); iter != m_TypeManagerList.End(); iter++)
+    {
+        iter->m_pTypeManager->SetLoopHijackFlag(flag);
+    }
+}
+
+RuntimeInstance::OsModuleList* RuntimeInstance::GetOsModuleList()
+{
+    return dac_cast<DPTR(OsModuleList)>(dac_cast<TADDR>(this) + offsetof(RuntimeInstance, m_OsModuleList));
+}
+
+ReaderWriterLock& RuntimeInstance::GetTypeManagerLock()
+{
+    return m_ModuleListLock;
+}
+
+#ifndef DACCESS_COMPILE
+
+RuntimeInstance::RuntimeInstance() : 
+    m_pThreadStore(NULL),
+    m_conservativeStackReportingEnabled(false),
+    m_pUnboxingStubsRegion(NULL)
+{
+}
+
+RuntimeInstance::~RuntimeInstance()
+{
+    if (NULL != m_pThreadStore)
+    {
+        delete m_pThreadStore;
+        m_pThreadStore = NULL;
+    }
+}
+
+HANDLE  RuntimeInstance::GetPalInstance()
+{
+    return m_hPalInstance;
+}
+
+void RuntimeInstance::EnableConservativeStackReporting()
+{
+    m_conservativeStackReportingEnabled = true;
+}
+
+bool RuntimeInstance::RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, UInt32 cbRange)
+{
+    CodeManagerEntry * pEntry = new (nothrow) CodeManagerEntry();
+    if (NULL == pEntry)
+        return false;
+
+    pEntry->m_pvStartRange = pvStartRange;
+    pEntry->m_cbRange = cbRange;
+    pEntry->m_pCodeManager = pCodeManager;
+
+    {
+        ReaderWriterLock::WriteHolder write(&m_ModuleListLock);
+
+        m_CodeManagerList.PushHead(pEntry);
+    }
+
+    return true;
+}
+
+void RuntimeInstance::UnregisterCodeManager(ICodeManager * pCodeManager)
+{
+    CodeManagerEntry * pEntry = NULL;
+
+    {
+        ReaderWriterLock::WriteHolder write(&m_ModuleListLock);
+
+        for (CodeManagerList::Iterator i = m_CodeManagerList.Begin(), end = m_CodeManagerList.End(); i != end; i++)
+        {
+            if (i->m_pCodeManager == pCodeManager)
+            {
+                pEntry = *i;
+
+                m_CodeManagerList.Remove(i);
+                break;
+            }
+        }
+    }
+
+    ASSERT(pEntry != NULL);
+    delete pEntry;
+}
+
+extern "C" bool __stdcall RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, UInt32 cbRange)
+{
+    return GetRuntimeInstance()->RegisterCodeManager(pCodeManager, pvStartRange, cbRange);
+}
+
+extern "C" void __stdcall UnregisterCodeManager(ICodeManager * pCodeManager)
+{
+    return GetRuntimeInstance()->UnregisterCodeManager(pCodeManager);
+}
+
+bool RuntimeInstance::RegisterUnboxingStubs(PTR_VOID pvStartRange, UInt32 cbRange)
+{
+    ASSERT(pvStartRange != NULL && cbRange > 0);
+
+    UnboxingStubsRegion * pEntry = new (nothrow) UnboxingStubsRegion();
+    if (NULL == pEntry)
+        return false;
+
+    pEntry->m_pRegionStart = pvStartRange;
+    pEntry->m_cbRegion = cbRange;
+
+    do
+    {
+        pEntry->m_pNextRegion = m_pUnboxingStubsRegion;
+    } 
+    while (PalInterlockedCompareExchangePointer((void *volatile *)&m_pUnboxingStubsRegion, pEntry, pEntry->m_pNextRegion) != pEntry->m_pNextRegion);
+
+    return true;
+}
+
+bool RuntimeInstance::IsUnboxingStub(UInt8* pCode)
+{
+    UnboxingStubsRegion * pCurrent = m_pUnboxingStubsRegion;
+    while (pCurrent != NULL)
+    {
+        UInt8* pUnboxingStubsRegion = dac_cast<UInt8*>(pCurrent->m_pRegionStart);
+        if (pCode >= pUnboxingStubsRegion && pCode < (pUnboxingStubsRegion + pCurrent->m_cbRegion))
+            return true;
+
+        pCurrent = pCurrent->m_pNextRegion;
+    }
+
+    return false;
+}
+
+extern "C" bool __stdcall RegisterUnboxingStubs(PTR_VOID pvStartRange, UInt32 cbRange)
+{
+    return GetRuntimeInstance()->RegisterUnboxingStubs(pvStartRange, cbRange);
+}
+
+bool RuntimeInstance::RegisterTypeManager(TypeManager * pTypeManager)
+{
+    TypeManagerEntry * pEntry = new (nothrow) TypeManagerEntry();
+    if (NULL == pEntry)
+        return false;
+
+    pEntry->m_pTypeManager = pTypeManager;
+
+    {
+        ReaderWriterLock::WriteHolder write(&m_ModuleListLock);
+
+        m_TypeManagerList.PushHead(pEntry);
+    }
+
+    return true;
+}
+
+COOP_PINVOKE_HELPER(TypeManagerHandle, RhpCreateTypeManager, (HANDLE osModule, void* pModuleHeader, PTR_PTR_VOID pClasslibFunctions, UInt32 nClasslibFunctions))
+{
+    TypeManager * typeManager = TypeManager::Create(osModule, pModuleHeader, pClasslibFunctions, nClasslibFunctions);
+    GetRuntimeInstance()->RegisterTypeManager(typeManager);
+
+    // This event must occur after the module is added to the enumeration
+    if (osModule != nullptr)
+        DebugEventSource::SendModuleLoadEvent(osModule);
+
+    return TypeManagerHandle::Create(typeManager);
+}
+
+COOP_PINVOKE_HELPER(HANDLE, RhGetOSModuleForMrt, ())
+{
+    return GetRuntimeInstance()->GetPalInstance();
+}
+
+COOP_PINVOKE_HELPER(void*, RhpRegisterOsModule, (HANDLE hOsModule))
+{
+    RuntimeInstance::OsModuleEntry * pEntry = new (nothrow) RuntimeInstance::OsModuleEntry();
+    if (NULL == pEntry)
+        return nullptr; // Return null on failure.
+
+    pEntry->m_osModule = hOsModule;
+
+    {
+        RuntimeInstance *pRuntimeInstance = GetRuntimeInstance();
+        ReaderWriterLock::WriteHolder write(&pRuntimeInstance->GetTypeManagerLock());
+
+        pRuntimeInstance->GetOsModuleList()->PushHead(pEntry);
+    }
+
+    return hOsModule; // Return non-null on success
+}
+
+RuntimeInstance::TypeManagerList& RuntimeInstance::GetTypeManagerList() 
+{
+    return m_TypeManagerList;
+}
+
+// static 
+bool RuntimeInstance::Initialize(HANDLE hPalInstance)
+{
+    NewHolder<RuntimeInstance> pRuntimeInstance = new (nothrow) RuntimeInstance();
+    if (NULL == pRuntimeInstance)
+        return false;
+
+    CreateHolder<ThreadStore>  pThreadStore = ThreadStore::Create(pRuntimeInstance);
+    if (NULL == pThreadStore)
+        return false;
+
+    pThreadStore.SuppressRelease();
+    pRuntimeInstance.SuppressRelease();
+
+    pRuntimeInstance->m_pThreadStore = pThreadStore;
+    pRuntimeInstance->m_hPalInstance = hPalInstance;
+
+    ASSERT_MSG(g_pTheRuntimeInstance == NULL, "multi-instances are not supported");
+    g_pTheRuntimeInstance = pRuntimeInstance;
+
+    return true;
+}
+
+void RuntimeInstance::Destroy()
+{
+    delete this;
+}
+
+bool RuntimeInstance::ShouldHijackLoopForGcStress(UIntNative CallsiteIP)
+{
+#ifdef FEATURE_GC_STRESS
+    return ShouldHijackForGcStress(CallsiteIP, htLoop);
+#else // FEATURE_GC_STRESS
+    UNREFERENCED_PARAMETER(CallsiteIP);
+    return false;
+#endif // FEATURE_GC_STRESS
+}
+
+bool RuntimeInstance::ShouldHijackCallsiteForGcStress(UIntNative CallsiteIP)
+{
+#ifdef FEATURE_GC_STRESS
+    return ShouldHijackForGcStress(CallsiteIP, htCallsite);
+#else // FEATURE_GC_STRESS
+    UNREFERENCED_PARAMETER(CallsiteIP);
+    return false;
+#endif // FEATURE_GC_STRESS
+}
+
+COOP_PINVOKE_HELPER(UInt32, RhGetGCDescSize, (EEType* pEEType))
+{
+    return RedhawkGCInterface::GetGCDescSize(pEEType);
+}
+
+
+// Keep in sync with ndp\fxcore\src\System.Private.CoreLib\system\runtime\runtimeimports.cs
+enum RuntimeHelperKind
+{
+    AllocateObject,
+    IsInst,
+    CastClass,
+    AllocateArray,
+    CheckArrayElementType,
+};
+
+// The dictionary codegen expects a pointer that points at a memory location that points to the method pointer
+// Create indirections for all helpers used below
+
+#define DECLARE_INDIRECTION(RET_TYPE, HELPER_NAME, ARGS) \
+    EXTERN_C RET_TYPE HELPER_NAME ARGS; \
+    const PTR_VOID indirection_##HELPER_NAME = (PTR_VOID)&HELPER_NAME
+
+#define INDIRECTION(HELPER_NAME) ((PTR_VOID)&indirection_##HELPER_NAME)
+
+DECLARE_INDIRECTION(Object *, RhpNewFast, (EEType *));
+DECLARE_INDIRECTION(Object *, RhpNewFinalizable, (EEType *));
+
+DECLARE_INDIRECTION(Array *, RhpNewArray, (EEType *, int));
+
+DECLARE_INDIRECTION(Object *, RhTypeCast_IsInstanceOf, (EEType *, Object *));
+DECLARE_INDIRECTION(Object *, RhTypeCast_CheckCast, (EEType *, Object *));
+DECLARE_INDIRECTION(Object *, RhTypeCast_IsInstanceOfClass, (EEType *, Object *));
+DECLARE_INDIRECTION(Object *, RhTypeCast_CheckCastClass, (EEType *, Object *));
+DECLARE_INDIRECTION(Object *, RhTypeCast_IsInstanceOfArray, (EEType *, Object *));
+DECLARE_INDIRECTION(Object *, RhTypeCast_CheckCastArray, (EEType *, Object *));
+DECLARE_INDIRECTION(Object *, RhTypeCast_IsInstanceOfInterface, (EEType *, Object *));
+DECLARE_INDIRECTION(Object *, RhTypeCast_CheckCastInterface, (EEType *, Object *));
+
+DECLARE_INDIRECTION(void, RhTypeCast_CheckVectorElemAddr, (EEType *, Object *));
+
+#ifdef HOST_ARM
+DECLARE_INDIRECTION(Object *, RhpNewFinalizableAlign8, (EEType *));
+DECLARE_INDIRECTION(Object *, RhpNewFastMisalign, (EEType *));
+DECLARE_INDIRECTION(Object *, RhpNewFastAlign8, (EEType *));
+
+DECLARE_INDIRECTION(Array *, RhpNewArrayAlign8, (EEType *, int));
+#endif
+
+COOP_PINVOKE_HELPER(PTR_VOID, RhGetRuntimeHelperForType, (EEType * pEEType, int helperKind))
+{
+    // This implementation matches what the binder does (MetaDataEngine::*() in rh\src\tools\rhbind\MetaDataEngine.cpp)
+    // If you change the binder's behavior, change this implementation too
+
+    switch (helperKind)
+    {
+    case RuntimeHelperKind::AllocateObject:
+#ifdef HOST_ARM
+        if ((pEEType->get_RareFlags() & EEType::RareFlags::RequiresAlign8Flag) == EEType::RareFlags::RequiresAlign8Flag)
+        {
+            if (pEEType->HasFinalizer())
+                return INDIRECTION(RhpNewFinalizableAlign8);
+            else if (pEEType->get_IsValueType())            // returns true for enum types as well
+                return INDIRECTION(RhpNewFastMisalign);
+            else
+                return INDIRECTION(RhpNewFastAlign8);
+        }
+#endif
+        if (pEEType->HasFinalizer())
+            return INDIRECTION(RhpNewFinalizable);
+        else
+            return INDIRECTION(RhpNewFast);
+
+    case RuntimeHelperKind::IsInst:
+        if (pEEType->IsArray())
+            return INDIRECTION(RhTypeCast_IsInstanceOfArray);
+        else if (pEEType->IsInterface())
+            return INDIRECTION(RhTypeCast_IsInstanceOfInterface);
+        else if (pEEType->IsParameterizedType())
+            return INDIRECTION(RhTypeCast_IsInstanceOf); // Array handled above; pointers and byrefs handled here
+        else
+            return INDIRECTION(RhTypeCast_IsInstanceOfClass);
+
+    case RuntimeHelperKind::CastClass:
+        if (pEEType->IsArray())
+            return INDIRECTION(RhTypeCast_CheckCastArray);
+        else if (pEEType->IsInterface())
+            return INDIRECTION(RhTypeCast_CheckCastInterface);
+        else if (pEEType->IsParameterizedType())
+            return INDIRECTION(RhTypeCast_CheckCast); // Array handled above; pointers and byrefs handled here
+        else
+            return INDIRECTION(RhTypeCast_CheckCastClass);
+
+    case RuntimeHelperKind::AllocateArray:
+#ifdef HOST_ARM
+        if (pEEType->RequiresAlign8())
+            return INDIRECTION(RhpNewArrayAlign8);
+#endif
+        return INDIRECTION(RhpNewArray);
+
+    case RuntimeHelperKind::CheckArrayElementType:
+        return INDIRECTION(RhTypeCast_CheckVectorElemAddr);
+
+    default:
+        UNREACHABLE();
+    }
+}
+
+#undef DECLARE_INDIRECTION
+#undef INDIRECTION
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+EXTERN_C void RhpInitialDynamicInterfaceDispatch();
+
+COOP_PINVOKE_HELPER(void *, RhNewInterfaceDispatchCell, (EEType * pInterface, Int32 slotNumber))
+{
+    InterfaceDispatchCell * pCell = new (nothrow) InterfaceDispatchCell[2];
+    if (pCell == NULL)
+        return NULL;
+
+    // Due to the synchronization mechanism used to update this indirection cell we must ensure the cell's alignment is twice that of a pointer.
+    // Fortunately, Windows heap guarantees this alignment.
+    ASSERT(IS_ALIGNED(pCell, 2 * POINTER_SIZE));
+    ASSERT(IS_ALIGNED(pInterface, (InterfaceDispatchCell::IDC_CachePointerMask + 1)));
+
+    pCell[0].m_pStub = (UIntNative)&RhpInitialDynamicInterfaceDispatch;
+    pCell[0].m_pCache = ((UIntNative)pInterface) | InterfaceDispatchCell::IDC_CachePointerIsInterfacePointerOrMetadataToken;
+    pCell[1].m_pStub = 0;
+    pCell[1].m_pCache = (UIntNative)slotNumber;
+
+    return pCell;
+}
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
+
+COOP_PINVOKE_HELPER(PTR_UInt8, RhGetThreadLocalStorageForDynamicType, (UInt32 uOffset, UInt32 tlsStorageSize, UInt32 numTlsCells))
+{
+    Thread * pCurrentThread = ThreadStore::GetCurrentThread();
+
+    PTR_UInt8 pResult = pCurrentThread->GetThreadLocalStorageForDynamicType(uOffset);
+    if (pResult != NULL || tlsStorageSize == 0 || numTlsCells == 0)
+        return pResult;
+
+    ASSERT(tlsStorageSize > 0 && numTlsCells > 0);
+    return pCurrentThread->AllocateThreadLocalStorageForDynamicType(uOffset, tlsStorageSize, numTlsCells);
+}
+
+#endif // DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/RuntimeInstance.h b/src/coreclr/src/nativeaot/Runtime/RuntimeInstance.h
new file mode 100644
index 0000000000000..8a89b04858c2c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/RuntimeInstance.h
@@ -0,0 +1,127 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef __RuntimeInstance_h__
+#define __RuntimeInstance_h__
+
+class ThreadStore;
+typedef DPTR(ThreadStore) PTR_ThreadStore;
+class ICodeManager;
+struct StaticGcDesc;
+typedef SPTR(StaticGcDesc) PTR_StaticGcDesc;
+class TypeManager;
+enum GenericVarianceType : UInt8;
+
+#include "ICodeManager.h"
+
+class RuntimeInstance
+{
+    friend class AsmOffsets;
+    friend struct DefaultSListTraits<RuntimeInstance>;
+    friend class Thread;
+
+    PTR_ThreadStore             m_pThreadStore;
+    HANDLE                      m_hPalInstance; // this is the HANDLE passed into DllMain
+    ReaderWriterLock            m_ModuleListLock;
+
+public:
+    struct OsModuleEntry;
+    typedef DPTR(OsModuleEntry) PTR_OsModuleEntry;
+    struct OsModuleEntry
+    {
+        PTR_OsModuleEntry      m_pNext;
+        HANDLE                 m_osModule;
+    };
+
+    typedef SList<OsModuleEntry> OsModuleList;
+private:
+    OsModuleList                m_OsModuleList;
+
+    struct CodeManagerEntry;
+    typedef DPTR(CodeManagerEntry) PTR_CodeManagerEntry;
+
+    struct CodeManagerEntry
+    {
+        PTR_CodeManagerEntry    m_pNext;
+        PTR_VOID                m_pvStartRange;
+        UInt32                  m_cbRange;
+        ICodeManager *          m_pCodeManager;
+    };
+
+    typedef SList<CodeManagerEntry> CodeManagerList;
+    CodeManagerList             m_CodeManagerList;
+
+public:
+    struct TypeManagerEntry
+    {
+        TypeManagerEntry*         m_pNext;
+        TypeManager*              m_pTypeManager;
+    };
+
+    typedef SList<TypeManagerEntry> TypeManagerList;
+    
+private:
+    TypeManagerList             m_TypeManagerList;
+
+    bool                        m_conservativeStackReportingEnabled;
+
+    struct  UnboxingStubsRegion
+    {
+        PTR_VOID                m_pRegionStart;
+        UInt32                  m_cbRegion;
+        UnboxingStubsRegion*    m_pNextRegion;
+
+        UnboxingStubsRegion() : m_pRegionStart(0), m_cbRegion(0), m_pNextRegion(NULL) { }
+    };
+
+    UnboxingStubsRegion*        m_pUnboxingStubsRegion;
+
+    RuntimeInstance();
+
+    SList<Module>* GetModuleList();
+
+    SList<TypeManager*>* GetModuleManagerList();
+
+    bool BuildGenericTypeHashTable();
+
+    ICodeManager * FindCodeManagerForClasslibFunction(PTR_VOID address);
+
+public:
+    ~RuntimeInstance();
+    ThreadStore *   GetThreadStore();
+    HANDLE          GetPalInstance();
+
+    PTR_UInt8 FindMethodStartAddress(PTR_VOID ControlPC);
+    PTR_UInt8 GetTargetOfUnboxingAndInstantiatingStub(PTR_VOID ControlPC);
+    void EnableConservativeStackReporting();
+    bool IsConservativeStackReportingEnabled() { return m_conservativeStackReportingEnabled; }
+
+    bool RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, UInt32 cbRange);
+    void UnregisterCodeManager(ICodeManager * pCodeManager);
+
+    ICodeManager * FindCodeManagerByAddress(PTR_VOID ControlPC);
+    PTR_VOID GetClasslibFunctionFromCodeAddress(PTR_VOID address, ClasslibFunctionId functionId);
+
+    bool RegisterTypeManager(TypeManager * pTypeManager);
+    TypeManagerList& GetTypeManagerList();
+    OsModuleList* GetOsModuleList();
+    ReaderWriterLock& GetTypeManagerLock();
+
+    bool RegisterUnboxingStubs(PTR_VOID pvStartRange, UInt32 cbRange);
+    bool IsUnboxingStub(UInt8* pCode);
+
+    static bool Initialize(HANDLE hPalInstance);
+    void Destroy();
+
+    void EnumAllStaticGCRefs(void * pfnCallback, void * pvCallbackData);
+
+    bool ShouldHijackCallsiteForGcStress(UIntNative CallsiteIP);
+    bool ShouldHijackLoopForGcStress(UIntNative CallsiteIP);
+    void SetLoopHijackFlags(UInt32 flag);
+};
+typedef DPTR(RuntimeInstance) PTR_RuntimeInstance;
+
+
+PTR_RuntimeInstance GetRuntimeInstance();
+
+#endif // __RuntimeInstance_h__
diff --git a/src/coreclr/src/nativeaot/Runtime/SpinLock.h b/src/coreclr/src/nativeaot/Runtime/SpinLock.h
new file mode 100644
index 0000000000000..61c648c935a7f
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/SpinLock.h
@@ -0,0 +1,71 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#ifndef __SPINLOCK_H__
+#define __SPINLOCK_H__
+
+// #SwitchToThreadSpinning
+// 
+// If you call __SwitchToThread in a loop waiting for a condition to be met,
+// it is critical that you insert periodic sleeps.  This is because the thread
+// you are waiting for to set that condition may need your CPU, and simply
+// calling __SwitchToThread(0) will NOT guarantee that it gets a chance to run.
+// If there are other runnable threads of higher priority, or even if there
+// aren't and it is in another processor's queue, you will be spinning a very
+// long time.
+// 
+// To force all callers to consider this issue and to avoid each having to
+// duplicate the same backoff code, __SwitchToThread takes a required second 
+// parameter.  If you want it to handle backoff for you, this parameter should
+// be the number of successive calls you have made to __SwitchToThread (a loop
+// count).  If you want to take care of backing off yourself, you can pass
+// CALLER_LIMITS_SPINNING.  There are three valid cases for doing this:
+// 
+//     - You count iterations and induce a sleep periodically
+//     - The number of consecutive __SwitchToThreads is limited
+//     - Your call to __SwitchToThread includes a non-zero sleep duration
+//     
+// Lastly, to simplify this requirement for the following common coding pattern:
+//
+//     while (!condition)
+//         SwitchToThread
+//
+// you can use the YIELD_WHILE macro.
+
+#define YIELD_WHILE(condition)                                          \
+    {                                                                   \
+        UInt32 __dwSwitchCount = 0;                                     \
+        while (condition)                                               \
+        {                                                               \
+            __SwitchToThread(0, ++__dwSwitchCount);                     \
+        }                                                               \
+    }
+
+class SpinLock
+{
+private:
+    enum LOCK_STATE
+    {
+        UNLOCKED = 0,
+        LOCKED = 1
+    };
+
+    volatile Int32 m_lock;
+
+    static void Lock(SpinLock& lock)
+        { YIELD_WHILE (PalInterlockedExchange(&lock.m_lock, LOCKED) == LOCKED); }
+
+    static void Unlock(SpinLock& lock)
+        { PalInterlockedExchange(&lock.m_lock, UNLOCKED); }
+
+public:
+    SpinLock()
+        : m_lock(UNLOCKED) { }
+
+    typedef HolderNoDefaultValue<SpinLock&,
+                                 SpinLock::Lock,
+                                 SpinLock::Unlock>
+        Holder;
+};
+
+#endif
+
diff --git a/src/coreclr/src/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/src/nativeaot/Runtime/StackFrameIterator.cpp
new file mode 100644
index 0000000000000..9b6a9119a9f5c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/StackFrameIterator.cpp
@@ -0,0 +1,1914 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "RedhawkWarnings.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "holder.h"
+#include "Crst.h"
+#include "RWLock.h"
+#include "event.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "thread.inl"
+#include "stressLog.h"
+
+#include "shash.h"
+#include "RuntimeInstance.h"
+#include "rhbinder.h"
+
+#include "DebugFuncEval.h"
+
+// warning C4061: enumerator '{blah}' in switch of enum '{blarg}' is not explicitly handled by a case label
+#pragma warning(disable:4061)
+
+#if !defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: these are (currently) only implemented in assembly helpers
+
+#if defined(FEATURE_DYNAMIC_CODE)
+EXTERN_C void * RhpUniversalTransition();
+GPTR_IMPL_INIT(PTR_VOID, g_RhpUniversalTransitionAddr, (void**)&RhpUniversalTransition);
+
+EXTERN_C PTR_VOID PointerToReturnFromUniversalTransition;
+GVAL_IMPL_INIT(PTR_VOID, g_ReturnFromUniversalTransitionAddr, PointerToReturnFromUniversalTransition);
+
+EXTERN_C PTR_VOID PointerToReturnFromUniversalTransition_DebugStepTailCall;
+GVAL_IMPL_INIT(PTR_VOID, g_ReturnFromUniversalTransition_DebugStepTailCallAddr, PointerToReturnFromUniversalTransition_DebugStepTailCall);
+
+EXTERN_C PTR_VOID PointerToReturnFromCallDescrThunk;
+GVAL_IMPL_INIT(PTR_VOID, g_ReturnFromCallDescrThunkAddr, PointerToReturnFromCallDescrThunk);
+#endif
+
+#ifdef TARGET_X86
+EXTERN_C void * PointerToRhpCallFunclet2;
+GVAL_IMPL_INIT(PTR_VOID, g_RhpCallFunclet2Addr, PointerToRhpCallFunclet2);
+#endif
+EXTERN_C void * PointerToRhpCallCatchFunclet2;
+GVAL_IMPL_INIT(PTR_VOID, g_RhpCallCatchFunclet2Addr, PointerToRhpCallCatchFunclet2);
+EXTERN_C void * PointerToRhpCallFinallyFunclet2;
+GVAL_IMPL_INIT(PTR_VOID, g_RhpCallFinallyFunclet2Addr, PointerToRhpCallFinallyFunclet2);
+EXTERN_C void * PointerToRhpCallFilterFunclet2;
+GVAL_IMPL_INIT(PTR_VOID, g_RhpCallFilterFunclet2Addr, PointerToRhpCallFilterFunclet2);
+EXTERN_C void * PointerToRhpThrowEx2;
+GVAL_IMPL_INIT(PTR_VOID, g_RhpThrowEx2Addr, PointerToRhpThrowEx2);
+EXTERN_C void * PointerToRhpThrowHwEx2;
+GVAL_IMPL_INIT(PTR_VOID, g_RhpThrowHwEx2Addr, PointerToRhpThrowHwEx2);
+EXTERN_C void * PointerToRhpRethrow2;
+GVAL_IMPL_INIT(PTR_VOID, g_RhpRethrow2Addr, PointerToRhpRethrow2);
+#endif // !defined(USE_PORTABLE_HELPERS)
+
+// Addresses of functions in the DAC won't match their runtime counterparts so we
+// assign them to globals. However it is more performant in the runtime to compare
+// against immediates than to fetch the global. This macro hides the difference.
+//
+// We use a special code path for the return address from thunks as
+// having the return address public confuses today DIA stackwalker. Before we can
+// ingest the updated DIA, we're instead exposing a global void * variable
+// holding the return address.
+#ifdef DACCESS_COMPILE
+#define EQUALS_RETURN_ADDRESS(x, func_name) ((x) == g_ ## func_name ## Addr)
+#else
+#define EQUALS_RETURN_ADDRESS(x, func_name) (((x)) == (PointerTo ## func_name))
+#endif
+
+#ifdef DACCESS_COMPILE
+#define FAILFAST_OR_DAC_FAIL(x) if(!(x)) { DacError(E_FAIL); }
+#define FAILFAST_OR_DAC_FAIL_MSG(x, msg) if(!(x)) { DacError(E_FAIL); }
+#define FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY(msg) DacError(E_FAIL)
+#else
+#define FAILFAST_OR_DAC_FAIL(x) if(!(x)) { ASSERT_UNCONDITIONALLY(#x); RhFailFast(); }
+#define FAILFAST_OR_DAC_FAIL_MSG(x, msg) if(!(x)) { ASSERT_MSG((x), msg); ASSERT_UNCONDITIONALLY(#x); RhFailFast(); }
+#define FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY(msg) { ASSERT_UNCONDITIONALLY(msg); RhFailFast(); }
+#endif
+
+PTR_PInvokeTransitionFrame GetPInvokeTransitionFrame(PTR_VOID pTransitionFrame)
+{
+    return static_cast<PTR_PInvokeTransitionFrame>(pTransitionFrame);
+}
+
+StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PTR_VOID pInitialTransitionFrame)
+{
+    STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ GC ]\n");
+    ASSERT(!pThreadToWalk->DangerousCrossThreadIsHijacked());
+    InternalInit(pThreadToWalk, GetPInvokeTransitionFrame(pInitialTransitionFrame), GcStackWalkFlags);
+    PrepareToYieldFrame();
+}
+
+StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx)
+{
+    STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ hijack ]\n");
+    InternalInit(pThreadToWalk, pCtx, 0);
+    PrepareToYieldFrame();
+}
+
+void StackFrameIterator::ResetNextExInfoForSP(UIntNative SP)
+{
+    while (m_pNextExInfo && (SP > (UIntNative)dac_cast<TADDR>(m_pNextExInfo)))
+        m_pNextExInfo = m_pNextExInfo->m_pPrevExInfo;
+}
+
+void StackFrameIterator::EnterInitialInvalidState(Thread * pThreadToWalk)
+{
+    m_pThread = pThreadToWalk;
+    m_pInstance = GetRuntimeInstance();
+    m_pCodeManager = NULL;
+    m_pHijackedReturnValue = NULL;
+    m_HijackedReturnValueKind = GCRK_Unknown;
+    m_pConservativeStackRangeLowerBound = NULL;
+    m_pConservativeStackRangeUpperBound = NULL;
+    m_ShouldSkipRegularGcReporting = false;
+    m_pendingFuncletFramePointer = NULL;
+    m_pNextExInfo = pThreadToWalk->GetCurExInfo();
+    SetControlPC(0);
+}
+
+// Prepare to start a stack walk from the context listed in the supplied PInvokeTransitionFrame.
+// The supplied frame can be TOP_OF_STACK_MARKER to indicate that there are no more managed
+// frames on the stack.  Otherwise, the context in the frame always describes a callsite
+// where control transitioned from managed to unmanaged code.
+// NOTE: When a return address hijack is executed, the PC in the generated PInvokeTransitionFrame
+// matches the hijacked return address.  This PC is not guaranteed to be in managed code
+// since the hijacked return address may refer to a location where an assembly thunk called
+// into managed code.
+// NOTE: When the PC is in an assembly thunk, this function will unwind to the next managed
+// frame and may publish a conservative stack range (if and only if any of the unwound
+// thunks report a conservative range).
+void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PInvokeTransitionFrame pFrame, UInt32 dwFlags)
+{
+    // EH stackwalks are always required to unwind non-volatile floating point state.  This
+    // state is never carried by PInvokeTransitionFrames, implying that they can never be used
+    // as the initial state for an EH stackwalk.
+    ASSERT_MSG(!(dwFlags & ApplyReturnAddressAdjustment), 
+        "PInvokeTransitionFrame content is not sufficient to seed an EH stackwalk");
+
+    EnterInitialInvalidState(pThreadToWalk);
+
+    if (pFrame == TOP_OF_STACK_MARKER)
+    {
+        // There are no managed frames on the stack.  Leave the iterator in its initial invalid state.
+        return;
+    }
+
+    m_dwFlags = dwFlags;
+
+    // We need to walk the ExInfo chain in parallel with the stackwalk so that we know when we cross over 
+    // exception throw points.  So we must find our initial point in the ExInfo chain here so that we can 
+    // properly walk it in parallel.
+    ResetNextExInfoForSP((UIntNative)dac_cast<TADDR>(pFrame));
+
+#if !defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: no portable version of regdisplay
+    memset(&m_RegDisplay, 0, sizeof(m_RegDisplay));
+    m_RegDisplay.SetIP((PCODE)pFrame->m_RIP);
+    m_RegDisplay.SetAddrOfIP((PTR_PCODE)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_RIP));
+    SetControlPC(dac_cast<PTR_VOID>(*(m_RegDisplay.pIP)));
+
+    PTR_UIntNative pPreservedRegsCursor = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_PreservedRegs);
+
+#ifdef TARGET_ARM
+    m_RegDisplay.pLR = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_RIP);
+    m_RegDisplay.pR11 = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_ChainPointer);
+     
+    if (pFrame->m_Flags & PTFF_SAVE_R4)  { m_RegDisplay.pR4 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R5)  { m_RegDisplay.pR5 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R6)  { m_RegDisplay.pR6 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R7)  { m_RegDisplay.pR7 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R8)  { m_RegDisplay.pR8 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R9)  { m_RegDisplay.pR9 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R10)  { m_RegDisplay.pR10 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_SP)  { m_RegDisplay.SP  = *pPreservedRegsCursor++; }
+    m_RegDisplay.pR11 = (PTR_UIntNative) PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_FramePointer);
+    if (pFrame->m_Flags & PTFF_SAVE_R0)  { m_RegDisplay.pR0 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R1)  { m_RegDisplay.pR1 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R2)  { m_RegDisplay.pR2 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R3)  { m_RegDisplay.pR3 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_LR)  { m_RegDisplay.pLR = pPreservedRegsCursor++; }
+
+    if (pFrame->m_Flags & PTFF_R0_IS_GCREF)
+    {
+        m_pHijackedReturnValue = (PTR_RtuObjectRef) m_RegDisplay.pR0;
+        m_HijackedReturnValueKind = GCRK_Object;
+    }
+    if (pFrame->m_Flags & PTFF_R0_IS_BYREF)
+    {
+        m_pHijackedReturnValue = (PTR_RtuObjectRef) m_RegDisplay.pR0;
+        m_HijackedReturnValueKind = GCRK_Byref;
+    }
+
+#elif defined(TARGET_ARM64)
+    m_RegDisplay.pFP = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_FramePointer);
+    m_RegDisplay.pLR = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_RIP);
+
+    ASSERT(!(pFrame->m_Flags & PTFF_SAVE_FP)); // FP should never contain a GC ref because we require
+                                               // a frame pointer for methods with pinvokes
+
+    if (pFrame->m_Flags & PTFF_SAVE_X19) { m_RegDisplay.pX19 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X20) { m_RegDisplay.pX20 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X21) { m_RegDisplay.pX21 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X22) { m_RegDisplay.pX22 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X23) { m_RegDisplay.pX23 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X24) { m_RegDisplay.pX24 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X25) { m_RegDisplay.pX25 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X26) { m_RegDisplay.pX26 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X27) { m_RegDisplay.pX27 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X28) { m_RegDisplay.pX28 = pPreservedRegsCursor++; }
+
+    if (pFrame->m_Flags & PTFF_SAVE_SP) { m_RegDisplay.SP = *pPreservedRegsCursor++; }
+
+    if (pFrame->m_Flags & PTFF_SAVE_X0) { m_RegDisplay.pX0 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X1) { m_RegDisplay.pX1 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X2) { m_RegDisplay.pX2 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X3) { m_RegDisplay.pX3 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X4) { m_RegDisplay.pX4 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X5) { m_RegDisplay.pX5 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X6) { m_RegDisplay.pX6 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X7) { m_RegDisplay.pX7 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X8) { m_RegDisplay.pX8 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X9) { m_RegDisplay.pX9 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X10) { m_RegDisplay.pX10 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X11) { m_RegDisplay.pX11 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X12) { m_RegDisplay.pX12 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X13) { m_RegDisplay.pX13 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X14) { m_RegDisplay.pX14 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X15) { m_RegDisplay.pX15 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X16) { m_RegDisplay.pX16 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X17) { m_RegDisplay.pX17 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_X18) { m_RegDisplay.pX18 = pPreservedRegsCursor++; }
+
+    if (pFrame->m_Flags & PTFF_SAVE_LR) { m_RegDisplay.pLR = pPreservedRegsCursor++; }
+
+    GCRefKind retValueKind = TransitionFrameFlagsToReturnKind(pFrame->m_Flags);
+    if (retValueKind != GCRK_Scalar)
+    {
+        m_pHijackedReturnValue = (PTR_RtuObjectRef)m_RegDisplay.pX0;
+        m_HijackedReturnValueKind = retValueKind;
+    }
+
+#else // TARGET_ARM
+    if (pFrame->m_Flags & PTFF_SAVE_RBX)  { m_RegDisplay.pRbx = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_RSI)  { m_RegDisplay.pRsi = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_RDI)  { m_RegDisplay.pRdi = pPreservedRegsCursor++; }
+    ASSERT(!(pFrame->m_Flags & PTFF_SAVE_RBP)); // RBP should never contain a GC ref because we require
+                                                // a frame pointer for methods with pinvokes
+#ifdef TARGET_AMD64
+    if (pFrame->m_Flags & PTFF_SAVE_R12)  { m_RegDisplay.pR12 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R13)  { m_RegDisplay.pR13 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R14)  { m_RegDisplay.pR14 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R15)  { m_RegDisplay.pR15 = pPreservedRegsCursor++; }
+#endif // TARGET_AMD64
+
+    m_RegDisplay.pRbp = (PTR_UIntNative) PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_FramePointer);
+
+    if (pFrame->m_Flags & PTFF_SAVE_RSP)  { m_RegDisplay.SP   = *pPreservedRegsCursor++; }
+
+    if (pFrame->m_Flags & PTFF_SAVE_RAX)  { m_RegDisplay.pRax = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_RCX)  { m_RegDisplay.pRcx = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_RDX)  { m_RegDisplay.pRdx = pPreservedRegsCursor++; }
+#ifdef TARGET_AMD64
+    if (pFrame->m_Flags & PTFF_SAVE_R8 )  { m_RegDisplay.pR8  = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R9 )  { m_RegDisplay.pR9  = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R10)  { m_RegDisplay.pR10 = pPreservedRegsCursor++; }
+    if (pFrame->m_Flags & PTFF_SAVE_R11)  { m_RegDisplay.pR11 = pPreservedRegsCursor++; }
+#endif // TARGET_AMD64
+
+    if (pFrame->m_Flags & PTFF_RAX_IS_GCREF)
+    {
+        m_pHijackedReturnValue = (PTR_RtuObjectRef) m_RegDisplay.pRax;
+        m_HijackedReturnValueKind = GCRK_Object;
+    }
+    if (pFrame->m_Flags & PTFF_RAX_IS_BYREF)
+    {
+        m_pHijackedReturnValue = (PTR_RtuObjectRef) m_RegDisplay.pRax;
+        m_HijackedReturnValueKind = GCRK_Byref;
+    }
+
+#endif // TARGET_ARM
+
+#endif // defined(USE_PORTABLE_HELPERS)
+
+    // @TODO: currently, we always save all registers -- how do we handle the onese we don't save once we 
+    //        start only saving those that weren't already saved?
+
+    // This function guarantees that the final initialized context will refer to a managed
+    // frame.  In the rare case where the PC does not refer to managed code (and refers to an
+    // assembly thunk instead), unwind through the thunk sequence to find the nearest managed
+    // frame.
+    // NOTE: When thunks are present, the thunk sequence may report a conservative GC reporting
+    // lower bound that must be applied when processing the managed frame.
+
+    ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(m_ControlPC);
+
+    if (category == InManagedCode)
+    {
+        ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC));
+    }
+    else if (IsNonEHThunk(category))
+    {
+        UnwindNonEHThunkSequence();
+        ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC));
+    }
+    else
+    {
+        FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("PInvokeTransitionFrame PC points to an unexpected assembly thunk kind.");
+    }
+
+    STRESS_LOG1(LF_STACKWALK, LL_INFO10000, "   %p\n", m_ControlPC);
+}
+
+#ifndef DACCESS_COMPILE
+
+void StackFrameIterator::InternalInitForEH(Thread * pThreadToWalk, PAL_LIMITED_CONTEXT * pCtx, bool instructionFault)
+{
+    STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ EH ]\n");
+    InternalInit(pThreadToWalk, pCtx, EHStackWalkFlags);
+
+    if (instructionFault)
+    {
+        // We treat the IP as a return-address and adjust backward when doing EH-related things.  The faulting
+        // instruction IP here will be the start of the faulting instruction and so we have the right IP for
+        // EH-related things already.
+        m_dwFlags &= ~ApplyReturnAddressAdjustment;
+        PrepareToYieldFrame();
+        m_dwFlags |= ApplyReturnAddressAdjustment;
+    }
+    else
+    {
+        PrepareToYieldFrame();
+    }
+
+    STRESS_LOG1(LF_STACKWALK, LL_INFO10000, "   %p\n", m_ControlPC);
+}
+
+void StackFrameIterator::InternalInitForStackTrace()
+{
+    STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ StackTrace ]\n");
+    Thread * pThreadToWalk = ThreadStore::GetCurrentThread();
+    PTR_VOID pFrame = pThreadToWalk->GetTransitionFrameForStackTrace();
+    InternalInit(pThreadToWalk, GetPInvokeTransitionFrame(pFrame), StackTraceStackWalkFlags);
+    PrepareToYieldFrame();
+}
+
+#endif //!DACCESS_COMPILE
+
+// Prepare to start a stack walk from the context listed in the supplied PAL_LIMITED_CONTEXT.
+// The supplied context can describe a location in either managed or unmanaged code.  In the
+// latter case the iterator is left in an invalid state when this function returns.
+void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx, UInt32 dwFlags)
+{
+    ASSERT((dwFlags & MethodStateCalculated) == 0);
+
+    EnterInitialInvalidState(pThreadToWalk);
+
+    m_dwFlags = dwFlags;
+
+    // We need to walk the ExInfo chain in parallel with the stackwalk so that we know when we cross over 
+    // exception throw points.  So we must find our initial point in the ExInfo chain here so that we can 
+    // properly walk it in parallel.
+    ResetNextExInfoForSP(pCtx->GetSp());
+
+    // This codepath is used by the hijack stackwalk and we can get arbitrary ControlPCs from there.  If this
+    // context has a non-managed control PC, then we're done.
+    if (!m_pInstance->FindCodeManagerByAddress(dac_cast<PTR_VOID>(pCtx->GetIp())))
+        return;
+
+    //
+    // control state
+    //
+    SetControlPC(dac_cast<PTR_VOID>(pCtx->GetIp()));
+    m_RegDisplay.SP   = pCtx->GetSp();
+    m_RegDisplay.IP   = pCtx->GetIp();
+    m_RegDisplay.pIP  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, IP);
+
+#ifdef TARGET_ARM
+    //
+    // preserved regs
+    //
+    m_RegDisplay.pR4  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R4);
+    m_RegDisplay.pR5  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R5);
+    m_RegDisplay.pR6  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R6);
+    m_RegDisplay.pR7  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R7);
+    m_RegDisplay.pR8  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R8);
+    m_RegDisplay.pR9  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R9);
+    m_RegDisplay.pR10 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R10);
+    m_RegDisplay.pR11 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R11);
+    m_RegDisplay.pLR  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, LR);
+
+    //
+    // preserved vfp regs
+    //
+    for (Int32 i = 0; i < 16 - 8; i++)
+    {
+        m_RegDisplay.D[i] = pCtx->D[i];
+    }
+    //
+    // scratch regs
+    //
+    m_RegDisplay.pR0  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R0);
+
+#elif defined(TARGET_ARM64)
+    //
+    // preserved regs
+    //
+    m_RegDisplay.pX19 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X19);
+    m_RegDisplay.pX20 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X20);
+    m_RegDisplay.pX21 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X21);
+    m_RegDisplay.pX22 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X22);
+    m_RegDisplay.pX23 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X23);
+    m_RegDisplay.pX24 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X24);
+    m_RegDisplay.pX25 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X25);
+    m_RegDisplay.pX26 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X26);
+    m_RegDisplay.pX27 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X27);
+    m_RegDisplay.pX28 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X28);
+    m_RegDisplay.pFP = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, FP);
+    m_RegDisplay.pLR = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, LR);
+
+    //
+    // preserved vfp regs
+    //
+    for (Int32 i = 0; i < 16 - 8; i++)
+    {
+        m_RegDisplay.D[i] = pCtx->D[i];
+    }
+    //
+    // scratch regs
+    //
+    m_RegDisplay.pX0 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X0);
+    m_RegDisplay.pX1 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, X1);
+    // TODO: Copy X2-X7 when we start supporting HVA's
+
+#elif defined(UNIX_AMD64_ABI)
+    //
+    // preserved regs
+    //
+    m_RegDisplay.pRbp = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rbp);
+    m_RegDisplay.pRbx = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rbx);
+    m_RegDisplay.pR12 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R12);
+    m_RegDisplay.pR13 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R13);
+    m_RegDisplay.pR14 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R14);
+    m_RegDisplay.pR15 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R15);
+
+    //
+    // scratch regs
+    //
+    m_RegDisplay.pRax = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rax);
+    m_RegDisplay.pRcx = NULL;
+    m_RegDisplay.pRdx = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rdx);
+    m_RegDisplay.pRsi = NULL;
+    m_RegDisplay.pRdi = NULL;
+    m_RegDisplay.pR8  = NULL;
+    m_RegDisplay.pR9  = NULL;
+    m_RegDisplay.pR10 = NULL;
+    m_RegDisplay.pR11 = NULL;
+
+#elif defined(TARGET_X86) || defined(TARGET_AMD64)
+    //
+    // preserved regs
+    //
+    m_RegDisplay.pRbp = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rbp);
+    m_RegDisplay.pRsi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rsi);
+    m_RegDisplay.pRdi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rdi);
+    m_RegDisplay.pRbx = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rbx);
+#ifdef TARGET_AMD64
+    m_RegDisplay.pR12 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R12);
+    m_RegDisplay.pR13 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R13);
+    m_RegDisplay.pR14 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R14);
+    m_RegDisplay.pR15 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, R15);
+    //
+    // preserved xmm regs
+    //
+    memcpy(m_RegDisplay.Xmm, &pCtx->Xmm6, sizeof(m_RegDisplay.Xmm));
+#endif // TARGET_AMD64
+
+    //
+    // scratch regs
+    //
+    m_RegDisplay.pRax = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pCtx, Rax);
+    m_RegDisplay.pRcx = NULL;
+    m_RegDisplay.pRdx = NULL;
+#ifdef TARGET_AMD64
+    m_RegDisplay.pR8  = NULL;
+    m_RegDisplay.pR9  = NULL;
+    m_RegDisplay.pR10 = NULL;
+    m_RegDisplay.pR11 = NULL;
+#endif // TARGET_AMD64
+#else
+    PORTABILITY_ASSERT("StackFrameIterator::InternalInit");
+#endif // TARGET_ARM
+}
+
+PTR_VOID StackFrameIterator::HandleExCollide(PTR_ExInfo pExInfo)
+{
+    STRESS_LOG3(LF_STACKWALK, LL_INFO10000, "   [ ex collide ] kind = %d, pass = %d, idxCurClause = %d\n", 
+                pExInfo->m_kind, pExInfo->m_passNumber, pExInfo->m_idxCurClause);
+
+    PTR_VOID collapsingTargetFrame = NULL;
+    UInt32 curFlags = m_dwFlags;
+
+    // Capture and clear the pending funclet frame pointer (if any).  This field is only set
+    // when stack walks collide with active exception dispatch, and only exists to save the
+    // funclet frame pointer until the next ExInfo collision (which has now occurred).
+    PTR_VOID activeFuncletFramePointer = m_pendingFuncletFramePointer;
+    m_pendingFuncletFramePointer = NULL;
+
+    // If we aren't invoking a funclet (i.e. idxCurClause == -1), and we're doing a GC stackwalk, we don't 
+    // want the 2nd-pass collided behavior because that behavior assumes that the previous frame was a 
+    // funclet, which isn't the case when taking a GC at some points in the EH dispatch code.  So we treat it
+    // as if the 2nd pass hasn't actually started yet.
+    if ((pExInfo->m_passNumber == 1) || 
+        (pExInfo->m_idxCurClause == 0xFFFFFFFF)) 
+    {
+        FAILFAST_OR_DAC_FAIL_MSG(!(curFlags & ApplyReturnAddressAdjustment),
+            "did not expect to collide with a 1st-pass ExInfo during a EH stackwalk");
+        InternalInit(m_pThread, pExInfo->m_pExContext, curFlags);
+        m_pNextExInfo = pExInfo->m_pPrevExInfo;
+        CalculateCurrentMethodState();
+        ASSERT(IsValid());
+
+        if ((pExInfo->m_kind & EK_HardwareFault) && (curFlags & RemapHardwareFaultsToSafePoint))
+            m_effectiveSafePointAddress = GetCodeManager()->RemapHardwareFaultToGCSafePoint(&m_methodInfo, m_ControlPC);
+    }
+    else
+    {
+        ASSERT_MSG(activeFuncletFramePointer != NULL,
+            "collided with an active funclet invoke but the funclet frame pointer is unknown");
+
+        //
+        // Copy our state from the previous StackFrameIterator
+        //
+        this->UpdateFromExceptionDispatch((PTR_StackFrameIterator)&pExInfo->m_frameIter);
+
+        // Sync our 'current' ExInfo with the updated state (we may have skipped other dispatches)
+        ResetNextExInfoForSP(m_RegDisplay.GetSP());
+        
+        // In case m_ControlPC is pre-adjusted, counteract here, since the caller of this routine 
+        // will apply the adjustment again once we return. If the m_ControlPC is not pre-adjusted, 
+        // this is simply an no-op.
+        m_ControlPC = m_OriginalControlPC;
+        
+        m_dwFlags = curFlags;
+
+        // The iterator has been moved to the "owner frame" (either a parent funclet or the main
+        // code body) of the funclet being invoked by this ExInfo.  As a result, both the active
+        // funclet and the current frame must be "part of the same function" and therefore must
+        // have identical frame pointer values.
+
+        CalculateCurrentMethodState();
+        ASSERT(IsValid());
+        ASSERT(m_FramePointer == activeFuncletFramePointer);
+
+        if ((m_ControlPC != 0) &&           // the dispatch in ExInfo could have gone unhandled
+            (m_dwFlags & CollapseFunclets))
+        {
+            // GC stack walks must skip the owner frame since GC information for the entire function
+            // has already been reported by the leafmost active funclet.  In general, the GC stack walk
+            // must skip all parent frames that are "part of the same function" (i.e., have the same
+            // frame pointer).
+            collapsingTargetFrame = activeFuncletFramePointer;
+        }
+    }
+    return collapsingTargetFrame;
+}
+
+void StackFrameIterator::UpdateFromExceptionDispatch(PTR_StackFrameIterator pSourceIterator)
+{
+    ASSERT(m_pendingFuncletFramePointer == NULL);
+    PreservedRegPtrs thisFuncletPtrs = this->m_funcletPtrs;
+
+    // Blast over 'this' with everything from the 'source'.  
+    *this = *pSourceIterator;
+
+    // Clear the funclet frame pointer (if any) that was loaded from the previous iterator.
+    // This field does not relate to the transferrable state of the previous iterator (it
+    // instead tracks the frame-by-frame progression of a particular iterator instance) and
+    // therefore has no meaning in the context of the current stack walk.
+    m_pendingFuncletFramePointer = NULL;
+
+    // Then, put back the pointers to the funclet's preserved registers (since those are the correct values
+    // until the funclet completes, at which point the values will be copied back to the ExInfo's REGDISPLAY).
+
+#ifdef TARGET_ARM
+    m_RegDisplay.pR4  = thisFuncletPtrs.pR4 ;
+    m_RegDisplay.pR5  = thisFuncletPtrs.pR5 ;
+    m_RegDisplay.pR6  = thisFuncletPtrs.pR6 ;
+    m_RegDisplay.pR7  = thisFuncletPtrs.pR7 ;
+    m_RegDisplay.pR8  = thisFuncletPtrs.pR8 ;
+    m_RegDisplay.pR9  = thisFuncletPtrs.pR9 ;
+    m_RegDisplay.pR10 = thisFuncletPtrs.pR10;
+    m_RegDisplay.pR11 = thisFuncletPtrs.pR11;
+
+#elif defined(TARGET_ARM64)
+    m_RegDisplay.pX19 = thisFuncletPtrs.pX19;
+    m_RegDisplay.pX20 = thisFuncletPtrs.pX20;
+    m_RegDisplay.pX21 = thisFuncletPtrs.pX21;
+    m_RegDisplay.pX22 = thisFuncletPtrs.pX22;
+    m_RegDisplay.pX23 = thisFuncletPtrs.pX23;
+    m_RegDisplay.pX24 = thisFuncletPtrs.pX24;
+    m_RegDisplay.pX25 = thisFuncletPtrs.pX25;
+    m_RegDisplay.pX26 = thisFuncletPtrs.pX26;
+    m_RegDisplay.pX27 = thisFuncletPtrs.pX27;
+    m_RegDisplay.pX28 = thisFuncletPtrs.pX28;
+    m_RegDisplay.pFP = thisFuncletPtrs.pFP; 
+
+#elif defined(UNIX_AMD64_ABI)
+    // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code.
+    m_RegDisplay.pRbp = thisFuncletPtrs.pRbp;
+    m_RegDisplay.pRbx = thisFuncletPtrs.pRbx;
+    m_RegDisplay.pR12 = thisFuncletPtrs.pR12;
+    m_RegDisplay.pR13 = thisFuncletPtrs.pR13;
+    m_RegDisplay.pR14 = thisFuncletPtrs.pR14;
+    m_RegDisplay.pR15 = thisFuncletPtrs.pR15;
+
+#elif defined(TARGET_X86) || defined(TARGET_AMD64) 
+    // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code.
+    m_RegDisplay.pRbp = thisFuncletPtrs.pRbp;
+    m_RegDisplay.pRdi = thisFuncletPtrs.pRdi;
+    m_RegDisplay.pRsi = thisFuncletPtrs.pRsi;
+    m_RegDisplay.pRbx = thisFuncletPtrs.pRbx;
+#ifdef TARGET_AMD64
+    m_RegDisplay.pR12 = thisFuncletPtrs.pR12;
+    m_RegDisplay.pR13 = thisFuncletPtrs.pR13;
+    m_RegDisplay.pR14 = thisFuncletPtrs.pR14;
+    m_RegDisplay.pR15 = thisFuncletPtrs.pR15;
+#endif // TARGET_AMD64
+#else
+    PORTABILITY_ASSERT("StackFrameIterator::UpdateFromExceptionDispatch");
+#endif
+}
+
+#ifdef TARGET_AMD64
+typedef DPTR(Fp128) PTR_Fp128;
+#endif
+
+// The invoke of a funclet is a bit special and requires an assembly thunk, but we don't want to break the
+// stackwalk due to this.  So this routine will unwind through the assembly thunks used to invoke funclets.
+// It's also used to disambiguate exceptionally- and non-exceptionally-invoked funclets.
+void StackFrameIterator::UnwindFuncletInvokeThunk()
+{
+    ASSERT((m_dwFlags & MethodStateCalculated) == 0);
+
+#if defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: Currently no funclet invoke defined in a portable way
+    return;
+#else // defined(USE_PORTABLE_HELPERS)
+    ASSERT(CategorizeUnadjustedReturnAddress(m_ControlPC) == InFuncletInvokeThunk);
+
+    PTR_UIntNative SP;
+
+#ifdef TARGET_X86
+    // First, unwind RhpCallFunclet
+    SP = (PTR_UIntNative)(m_RegDisplay.SP + 0x4);   // skip the saved assembly-routine-EBP
+    m_RegDisplay.SetAddrOfIP(SP);
+    m_RegDisplay.SetIP(*SP++);
+    m_RegDisplay.SetSP((UIntNative)dac_cast<TADDR>(SP));
+    SetControlPC(dac_cast<PTR_VOID>(*(m_RegDisplay.pIP)));
+
+    ASSERT(
+        EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2) ||
+        EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallFinallyFunclet2) ||
+        EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallFilterFunclet2)
+        );
+#endif
+
+    bool isFilterInvoke = EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallFilterFunclet2);
+
+#if defined(UNIX_AMD64_ABI) 
+    SP = (PTR_UIntNative)(m_RegDisplay.SP);
+    
+    if (isFilterInvoke)
+    {
+        SP++; // stack alignment
+    }
+    else
+    {
+        // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code.
+        m_funcletPtrs.pRbp = m_RegDisplay.pRbp;
+        m_funcletPtrs.pRbx = m_RegDisplay.pRbx;
+        m_funcletPtrs.pR12 = m_RegDisplay.pR12;
+        m_funcletPtrs.pR13 = m_RegDisplay.pR13;
+        m_funcletPtrs.pR14 = m_RegDisplay.pR14;
+        m_funcletPtrs.pR15 = m_RegDisplay.pR15;
+
+        if (EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2))
+        {
+            SP += 6 + 1; // 6 locals and stack alignment
+        }
+        else
+        {
+            SP += 3; // 3 locals
+        }
+    }
+    
+    m_RegDisplay.pRbp = SP++;
+    m_RegDisplay.pRbx = SP++;
+    m_RegDisplay.pR12 = SP++;
+    m_RegDisplay.pR13 = SP++;
+    m_RegDisplay.pR14 = SP++;
+    m_RegDisplay.pR15 = SP++;
+#elif defined(TARGET_AMD64)
+    static const int ArgumentsScratchAreaSize = 4 * 8;
+    
+    PTR_Fp128 xmm = (PTR_Fp128)(m_RegDisplay.SP + ArgumentsScratchAreaSize);
+
+    for (int i = 0; i < 10; i++)
+    {
+        m_RegDisplay.Xmm[i] = *xmm++;
+    }
+
+    SP = (PTR_UIntNative)xmm;
+
+    if (isFilterInvoke)
+    {
+        SP++; // stack alignment
+    }
+    else
+    {
+        // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code.
+        m_funcletPtrs.pRbp = m_RegDisplay.pRbp;
+        m_funcletPtrs.pRdi = m_RegDisplay.pRdi;
+        m_funcletPtrs.pRsi = m_RegDisplay.pRsi;
+        m_funcletPtrs.pRbx = m_RegDisplay.pRbx;
+        m_funcletPtrs.pR12 = m_RegDisplay.pR12;
+        m_funcletPtrs.pR13 = m_RegDisplay.pR13;
+        m_funcletPtrs.pR14 = m_RegDisplay.pR14;
+        m_funcletPtrs.pR15 = m_RegDisplay.pR15;
+        
+        if (EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2))
+        {
+            SP += 3; // 3 locals
+        }
+        else
+        {
+            SP++; // 1 local
+        }
+    }
+
+    m_RegDisplay.pRbp = SP++;
+    m_RegDisplay.pRdi = SP++;
+    m_RegDisplay.pRsi = SP++;
+    m_RegDisplay.pRbx = SP++;
+    m_RegDisplay.pR12 = SP++;
+    m_RegDisplay.pR13 = SP++;
+    m_RegDisplay.pR14 = SP++;
+    m_RegDisplay.pR15 = SP++;
+    
+#elif defined(TARGET_X86)
+    SP = (PTR_UIntNative)(m_RegDisplay.SP);
+
+    if (!isFilterInvoke)
+    {
+        // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code.
+        m_funcletPtrs.pRbp = m_RegDisplay.pRbp;
+        m_funcletPtrs.pRdi = m_RegDisplay.pRdi;
+        m_funcletPtrs.pRsi = m_RegDisplay.pRsi;
+        m_funcletPtrs.pRbx = m_RegDisplay.pRbx;
+    }
+
+    if (EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2))
+    {
+        SP += 2; // 2 locals
+    }
+    else
+    {
+        SP++; // 1 local
+    }
+    m_RegDisplay.pRdi = SP++;
+    m_RegDisplay.pRsi = SP++;
+    m_RegDisplay.pRbx = SP++;
+    m_RegDisplay.pRbp = SP++;
+#elif defined(TARGET_ARM)
+
+    PTR_UInt64 d = (PTR_UInt64)(m_RegDisplay.SP);
+
+    for (int i = 0; i < 8; i++)
+    {
+        m_RegDisplay.D[i] = *d++;
+    }
+
+    SP = (PTR_UIntNative)d;
+
+    if (!isFilterInvoke)
+    {
+        // RhpCallCatchFunclet puts a couple of extra things on the stack that aren't put there by the other two
+        // thunks, but we don't need to know what they are here, so we just skip them.
+        SP += EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2) ? 3 : 1;
+
+        // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code.
+        m_funcletPtrs.pR4  = m_RegDisplay.pR4;
+        m_funcletPtrs.pR5  = m_RegDisplay.pR5;
+        m_funcletPtrs.pR6  = m_RegDisplay.pR6;
+        m_funcletPtrs.pR7  = m_RegDisplay.pR7;
+        m_funcletPtrs.pR8  = m_RegDisplay.pR8;
+        m_funcletPtrs.pR9  = m_RegDisplay.pR9;
+        m_funcletPtrs.pR10 = m_RegDisplay.pR10;
+        m_funcletPtrs.pR11 = m_RegDisplay.pR11;
+    }
+
+    m_RegDisplay.pR4 = SP++;
+    m_RegDisplay.pR5 = SP++;
+    m_RegDisplay.pR6 = SP++;
+    m_RegDisplay.pR7 = SP++;
+    m_RegDisplay.pR8 = SP++;
+    m_RegDisplay.pR9 = SP++;
+    m_RegDisplay.pR10 = SP++;
+    m_RegDisplay.pR11 = SP++;
+    
+#elif defined(TARGET_ARM64)
+    PTR_UInt64 d = (PTR_UInt64)(m_RegDisplay.SP);
+
+    for (int i = 0; i < 8; i++)
+    {
+        m_RegDisplay.D[i] = *d++;
+    }
+
+    SP = (PTR_UIntNative)d;
+
+    if (!isFilterInvoke)
+    {
+        // RhpCallCatchFunclet puts a couple of extra things on the stack that aren't put there by the other two
+        // thunks, but we don't need to know what they are here, so we just skip them.
+        SP += EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2) ? 4 : 2;
+
+        // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code.
+        m_funcletPtrs.pX19  = m_RegDisplay.pX19;
+        m_funcletPtrs.pX20  = m_RegDisplay.pX20;
+        m_funcletPtrs.pX21  = m_RegDisplay.pX21;
+        m_funcletPtrs.pX22  = m_RegDisplay.pX22;
+        m_funcletPtrs.pX23  = m_RegDisplay.pX23;
+        m_funcletPtrs.pX24  = m_RegDisplay.pX24;
+        m_funcletPtrs.pX25  = m_RegDisplay.pX25; 
+        m_funcletPtrs.pX26  = m_RegDisplay.pX26;
+        m_funcletPtrs.pX27  = m_RegDisplay.pX27;
+        m_funcletPtrs.pX28  = m_RegDisplay.pX28;
+        m_funcletPtrs.pFP   = m_RegDisplay.pFP;
+    }
+
+    m_RegDisplay.pFP  = SP++;
+
+    m_RegDisplay.SetAddrOfIP((PTR_PCODE)SP);
+    m_RegDisplay.SetIP(*SP++); 
+
+    m_RegDisplay.pX19 = SP++;
+    m_RegDisplay.pX20 = SP++;
+    m_RegDisplay.pX21 = SP++;
+    m_RegDisplay.pX22 = SP++;
+    m_RegDisplay.pX23 = SP++;
+    m_RegDisplay.pX24 = SP++;
+    m_RegDisplay.pX25 = SP++;
+    m_RegDisplay.pX26 = SP++;
+    m_RegDisplay.pX27 = SP++;
+    m_RegDisplay.pX28 = SP++;
+
+#else
+    SP = (PTR_UIntNative)(m_RegDisplay.SP);
+    ASSERT_UNCONDITIONALLY("NYI for this arch");
+#endif
+
+#if !defined(TARGET_ARM64)
+    m_RegDisplay.SetAddrOfIP((PTR_PCODE)SP);
+    m_RegDisplay.SetIP(*SP++);
+#endif
+
+    m_RegDisplay.SetSP((UIntNative)dac_cast<TADDR>(SP));
+    SetControlPC(dac_cast<PTR_VOID>(*(m_RegDisplay.pIP)));
+
+    // We expect to be called by the runtime's C# EH implementation, and since this function's notion of how 
+    // to unwind through the stub is brittle relative to the stub itself, we want to check as soon as we can.
+    ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC) && "unwind from funclet invoke stub failed");
+#endif // defined(USE_PORTABLE_HELPERS)
+}
+
+// For a given target architecture, the layout of this structure must precisely match the
+// stack frame layout used by the associated architecture-specific RhpUniversalTransition
+// implementation.
+struct UniversalTransitionStackFrame
+{
+
+// In DAC builds, the "this" pointer refers to an object in the DAC host.
+#define GET_POINTER_TO_FIELD(_FieldName) \
+    (PTR_UIntNative)PTR_HOST_MEMBER(UniversalTransitionStackFrame, this, _FieldName)
+
+#if defined(UNIX_AMD64_ABI)
+
+    // Conservative GC reporting must be applied to everything between the base of the
+    // ReturnBlock and the top of the StackPassedArgs.
+private:
+    Fp128 m_fpArgRegs[8];                   // ChildSP+000 CallerSP-0D0 (0x80 bytes)    (xmm0-xmm7)
+    UIntNative m_returnBlock[2];            // ChildSP+080 CallerSP-050 (0x10 bytes)
+    UIntNative m_intArgRegs[6];             // ChildSP+090 CallerSP-040 (0x30 bytes)    (rdi,rsi,rcx,rdx,r8,r9)
+    UIntNative m_alignmentPad;              // ChildSP+0C0 CallerSP-010 (0x8 bytes)
+    UIntNative m_callerRetaddr;             // ChildSP+0C8 CallerSP-008 (0x8 bytes)
+    UIntNative m_stackPassedArgs[1];        // ChildSP+0D0 CallerSP+000 (unknown size)
+
+public:
+    PTR_UIntNative get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); }
+    PTR_UIntNative get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_callerRetaddr); }
+    PTR_UIntNative get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_returnBlock[0]); }
+
+    void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet)
+    {
+        // RhpUniversalTransition does not touch any non-volatile state on amd64.
+        UNREFERENCED_PARAMETER(pRegisterSet);
+    }
+
+#elif defined(TARGET_AMD64)
+
+    // Conservative GC reporting must be applied to everything between the base of the
+    // ReturnBlock and the top of the StackPassedArgs.
+private:
+    UIntNative m_calleeArgumentHomes[4];    // ChildSP+000 CallerSP-080 (0x20 bytes)
+    Fp128 m_fpArgRegs[4];                   // ChildSP+020 CallerSP-060 (0x40 bytes)    (xmm0-xmm3)
+    UIntNative m_returnBlock[2];            // ChildSP+060 CallerSP-020 (0x10 bytes)
+    UIntNative m_alignmentPad;              // ChildSP+070 CallerSP-010 (0x8 bytes)
+    UIntNative m_callerRetaddr;             // ChildSP+078 CallerSP-008 (0x8 bytes)
+    UIntNative m_intArgRegs[4];             // ChildSP+080 CallerSP+000 (0x20 bytes)    (rcx,rdx,r8,r9)
+    UIntNative m_stackPassedArgs[1];        // ChildSP+0a0 CallerSP+020 (unknown size)
+
+public:
+    PTR_UIntNative get_CallerSP() { return GET_POINTER_TO_FIELD(m_intArgRegs[0]); }
+    PTR_UIntNative get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_callerRetaddr); }
+    PTR_UIntNative get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_returnBlock[0]); }
+
+    void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet)
+    {
+        // RhpUniversalTransition does not touch any non-volatile state on amd64.
+        UNREFERENCED_PARAMETER(pRegisterSet);
+    }
+
+#elif defined(TARGET_ARM)
+
+    // Conservative GC reporting must be applied to everything between the base of the
+    // ReturnBlock and the top of the StackPassedArgs.
+private:
+    UIntNative m_pushedR11;                 // ChildSP+000 CallerSP-078 (0x4 bytes)     (r11)
+    UIntNative m_pushedLR;                  // ChildSP+004 CallerSP-074 (0x4 bytes)     (lr)
+    UInt64 m_fpArgRegs[8];                  // ChildSP+008 CallerSP-070 (0x40 bytes)    (d0-d7)
+    UInt64 m_returnBlock[4];                // ChildSP+048 CallerSP-030 (0x20 bytes)
+    UIntNative m_intArgRegs[4];             // ChildSP+068 CallerSP-010 (0x10 bytes)    (r0-r3)
+    UIntNative m_stackPassedArgs[1];        // ChildSP+078 CallerSP+000 (unknown size)
+
+public:
+    PTR_UIntNative get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); }
+    PTR_UIntNative get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_pushedLR); }
+    PTR_UIntNative get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_returnBlock[0]); }
+
+    void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet)
+    {
+        pRegisterSet->pR11 = GET_POINTER_TO_FIELD(m_pushedR11);
+    }
+
+#elif defined(TARGET_X86)
+
+    // Conservative GC reporting must be applied to everything between the base of the
+    // IntArgRegs and the top of the StackPassedArgs.
+private:
+    UIntNative m_intArgRegs[2];             // ChildSP+000 CallerSP-018 (0x8 bytes)     (edx,ecx)
+    UIntNative m_returnBlock[2];            // ChildSP+008 CallerSP-010 (0x8 bytes)
+    UIntNative m_pushedEBP;                 // ChildSP+010 CallerSP-008 (0x4 bytes)
+    UIntNative m_callerRetaddr;             // ChildSP+014 CallerSP-004 (0x4 bytes)
+    UIntNative m_stackPassedArgs[1];        // ChildSP+018 CallerSP+000 (unknown size)
+
+public:
+    PTR_UIntNative get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); }
+    PTR_UIntNative get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_callerRetaddr); }
+    PTR_UIntNative get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_intArgRegs[0]); }
+
+    void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet)
+    {
+        pRegisterSet->pRbp = GET_POINTER_TO_FIELD(m_pushedEBP);
+    }
+
+#elif defined(TARGET_ARM64)
+
+    // Conservative GC reporting must be applied to everything between the base of the
+    // ReturnBlock and the top of the StackPassedArgs.
+private:
+    UIntNative m_pushedFP;                  // ChildSP+000     CallerSP-0C0 (0x08 bytes)    (fp)
+    UIntNative m_pushedLR;                  // ChildSP+008     CallerSP-0B8 (0x08 bytes)    (lr)
+    UInt64 m_fpArgRegs[8];                  // ChildSP+010     CallerSP-0B0 (0x40 bytes)    (d0-d7)
+    UIntNative m_returnBlock[4];            // ChildSP+050     CallerSP-070 (0x40 bytes)
+    UIntNative m_intArgRegs[9];             // ChildSP+070     CallerSP-050 (0x48 bytes)    (x0-x8)
+    UIntNative m_alignmentPad;              // ChildSP+0B8     CallerSP-008 (0x08 bytes)
+    UIntNative m_stackPassedArgs[1];        // ChildSP+0C0     CallerSP+000 (unknown size)
+
+public:
+    PTR_UIntNative get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); }
+    PTR_UIntNative get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_pushedLR); }
+    PTR_UIntNative get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_returnBlock[0]); }
+
+    void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet)
+    {
+        pRegisterSet->pFP = GET_POINTER_TO_FIELD(m_pushedFP);
+    }
+#elif defined(TARGET_WASM)
+private:
+    // WASMTODO: #error NYI for this arch
+    UIntNative m_stackPassedArgs[1];        // Placeholder
+public:
+    PTR_UIntNative get_CallerSP() { PORTABILITY_ASSERT("@TODO: FIXME:WASM"); return NULL; }
+    PTR_UIntNative get_AddressOfPushedCallerIP() { PORTABILITY_ASSERT("@TODO: FIXME:WASM"); return NULL; }
+    PTR_UIntNative get_LowerBoundForConservativeReporting() { PORTABILITY_ASSERT("@TODO: FIXME:WASM"); return NULL; }
+
+    void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet)
+    {
+        UNREFERENCED_PARAMETER(pRegisterSet);
+        PORTABILITY_ASSERT("@TODO: FIXME:WASM");
+    }
+#else
+#error NYI for this arch
+#endif
+
+#undef GET_POINTER_TO_FIELD
+
+};
+
+typedef DPTR(UniversalTransitionStackFrame) PTR_UniversalTransitionStackFrame;
+
+// NOTE: This function always publishes a non-NULL conservative stack range lower bound.
+//
+// NOTE: In x86 cases, the unwound callsite often uses a calling convention that expects some amount
+// of stack-passed argument space to be callee-popped before control returns (or unwinds) to the
+// callsite.  Since the callsite signature (and thus the amount of callee-popped space) is unknown,
+// the recovered SP does not account for the callee-popped space is therefore "wrong" for the
+// purposes of unwind.  This implies that any x86 function which calls into RhpUniversalTransition
+// must have a frame pointer to ensure that the incorrect SP value is ignored and does not break the
+// unwind.
+void StackFrameIterator::UnwindUniversalTransitionThunk()
+{
+    ASSERT((m_dwFlags & MethodStateCalculated) == 0);
+
+#if defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: Corresponding helper code is only defined in assembly code
+    return;
+#else // defined(USE_PORTABLE_HELPERS)
+    ASSERT(CategorizeUnadjustedReturnAddress(m_ControlPC) == InUniversalTransitionThunk);
+
+    // The current PC is within RhpUniversalTransition, so establish a view of the surrounding stack frame.
+    // NOTE: In DAC builds, the pointer will refer to a newly constructed object in the DAC host.
+    UniversalTransitionStackFrame * stackFrame = (PTR_UniversalTransitionStackFrame)m_RegDisplay.SP;
+
+    stackFrame->UnwindNonVolatileRegisters(&m_RegDisplay);
+
+    PTR_UIntNative addressOfPushedCallerIP = stackFrame->get_AddressOfPushedCallerIP();
+    m_RegDisplay.SetAddrOfIP((PTR_PCODE)addressOfPushedCallerIP);
+    m_RegDisplay.SetIP(*addressOfPushedCallerIP);
+    m_RegDisplay.SetSP((UIntNative)dac_cast<TADDR>(stackFrame->get_CallerSP()));
+    SetControlPC(dac_cast<PTR_VOID>(*(m_RegDisplay.pIP)));
+
+    // All universal transition cases rely on conservative GC reporting being applied to the
+    // full argument set that flowed into the call.  Report the lower bound of this range (the
+    // caller will compute the upper bound).
+    PTR_UIntNative pLowerBound = stackFrame->get_LowerBoundForConservativeReporting();
+    ASSERT(pLowerBound != NULL);
+    ASSERT(m_pConservativeStackRangeLowerBound == NULL);
+    m_pConservativeStackRangeLowerBound = pLowerBound;
+#endif // defined(USE_PORTABLE_HELPERS)
+}
+
+#ifdef TARGET_AMD64
+#define STACK_ALIGN_SIZE 16
+#elif defined(TARGET_ARM)
+#define STACK_ALIGN_SIZE 8
+#elif defined(TARGET_ARM64)
+#define STACK_ALIGN_SIZE 16
+#elif defined(TARGET_X86)
+#define STACK_ALIGN_SIZE 4
+#elif defined(TARGET_WASM)
+#define STACK_ALIGN_SIZE 4
+#endif
+
+#ifdef TARGET_AMD64
+struct CALL_DESCR_CONTEXT
+{
+    UIntNative  Rbp;
+    UIntNative  Rsi;
+    UIntNative  Rbx;
+    UIntNative  IP;
+};
+#elif defined(TARGET_ARM)
+struct CALL_DESCR_CONTEXT
+{
+    UIntNative  R4;
+    UIntNative  R5;
+    UIntNative  R7;
+    UIntNative  IP;
+};
+#elif defined(TARGET_ARM64)
+struct CALL_DESCR_CONTEXT
+{
+    UIntNative  FP;
+    UIntNative  IP;
+    UIntNative  X19;
+    UIntNative  X20;
+};
+#elif defined(TARGET_X86)
+struct CALL_DESCR_CONTEXT
+{
+    UIntNative  Rbx;
+    UIntNative  Rbp;
+    UIntNative  IP;
+};
+#elif defined (TARGET_WASM)
+struct CALL_DESCR_CONTEXT
+{
+    UIntNative  IP;
+};
+#else
+#error NYI - For this arch
+#endif
+
+typedef DPTR(CALL_DESCR_CONTEXT) PTR_CALL_DESCR_CONTEXT;
+
+void StackFrameIterator::UnwindCallDescrThunk()
+{
+    ASSERT((m_dwFlags & MethodStateCalculated) == 0);
+
+#if defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: Corresponding helper code is only defined in assembly code
+    return;
+#else // defined(USE_PORTABLE_HELPERS)
+    ASSERT(CategorizeUnadjustedReturnAddress(m_ControlPC) == InCallDescrThunk);
+
+    UIntNative newSP;
+#ifdef TARGET_AMD64
+    // RBP points to the SP that we want to capture. (This arrangement allows for
+    // the arguments from this function to be loaded into memory with an adjustment
+    // to SP, like an alloca
+    newSP = *(PTR_UIntNative)m_RegDisplay.pRbp;
+
+    PTR_CALL_DESCR_CONTEXT pContext = (PTR_CALL_DESCR_CONTEXT)newSP;
+
+    m_RegDisplay.pRbp = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, Rbp);
+    m_RegDisplay.pRsi = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, Rsi);
+    m_RegDisplay.pRbx = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, Rbx);
+
+    // And adjust SP to be the state that it should be in just after returning from
+    // the CallDescrFunction
+    newSP += sizeof(CALL_DESCR_CONTEXT);
+#elif defined(TARGET_ARM)
+    // R7 points to the SP that we want to capture. (This arrangement allows for
+    // the arguments from this function to be loaded into memory with an adjustment
+    // to SP, like an alloca
+    newSP = *(PTR_UIntNative)m_RegDisplay.pR7;
+    PTR_CALL_DESCR_CONTEXT pContext = (PTR_CALL_DESCR_CONTEXT)newSP;
+
+    m_RegDisplay.pR4 = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, R4);
+    m_RegDisplay.pR5 = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, R5);
+    m_RegDisplay.pR7 = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, R7);
+
+    // And adjust SP to be the state that it should be in just after returning from
+    // the CallDescrFunction
+    newSP += sizeof(CALL_DESCR_CONTEXT);
+
+#elif defined(TARGET_ARM64)
+    // pFP points to the SP that we want to capture. (This arrangement allows for
+    // the arguments from this function to be loaded into memory with an adjustment
+    // to SP, like an alloca
+    newSP = *(PTR_UIntNative)m_RegDisplay.pFP;
+    PTR_CALL_DESCR_CONTEXT pContext = (PTR_CALL_DESCR_CONTEXT)newSP;
+
+    m_RegDisplay.pX19 = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, X19);
+    m_RegDisplay.pX20 = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, X20);
+
+    // And adjust SP to be the state that it should be in just after returning from
+    // the CallDescrFunction
+    newSP += sizeof(CALL_DESCR_CONTEXT);
+
+#elif defined(TARGET_X86)
+    // RBP points to the SP that we want to capture. (This arrangement allows for
+    // the arguments from this function to be loaded into memory with an adjustment
+    // to SP, like an alloca
+    newSP = *(PTR_UIntNative)m_RegDisplay.pRbp;
+
+    PTR_CALL_DESCR_CONTEXT pContext = (PTR_CALL_DESCR_CONTEXT)(newSP - offsetof(CALL_DESCR_CONTEXT, Rbp));
+
+    m_RegDisplay.pRbp = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, Rbp);
+    m_RegDisplay.pRbx = PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, Rbx);
+
+    // And adjust SP to be the state that it should be in just after returning from
+    // the CallDescrFunction
+    newSP += sizeof(CALL_DESCR_CONTEXT) - offsetof(CALL_DESCR_CONTEXT, Rbp);
+
+#else
+    PORTABILITY_ASSERT("UnwindCallDescrThunk");
+    PTR_CALL_DESCR_CONTEXT pContext = NULL;
+#endif
+
+    m_RegDisplay.SetAddrOfIP(PTR_TO_MEMBER(CALL_DESCR_CONTEXT, pContext, IP));
+    m_RegDisplay.SetIP(pContext->IP);
+    m_RegDisplay.SetSP(newSP);
+    SetControlPC(dac_cast<PTR_VOID>(pContext->IP));
+
+#endif // defined(USE_PORTABLE_HELPERS)
+}
+
+void StackFrameIterator::UnwindThrowSiteThunk()
+{
+    ASSERT((m_dwFlags & MethodStateCalculated) == 0);
+
+#if defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: no portable version of throw helpers
+    return;
+#else // defined(USE_PORTABLE_HELPERS)
+    ASSERT(CategorizeUnadjustedReturnAddress(m_ControlPC) == InThrowSiteThunk);
+
+    const UIntNative STACKSIZEOF_ExInfo = ((sizeof(ExInfo) + (STACK_ALIGN_SIZE-1)) & ~(STACK_ALIGN_SIZE-1));
+#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
+    const UIntNative SIZEOF_OutgoingScratch = 0x20;
+#else
+    const UIntNative SIZEOF_OutgoingScratch = 0;
+#endif
+
+    PTR_PAL_LIMITED_CONTEXT pContext = (PTR_PAL_LIMITED_CONTEXT)
+                                        (m_RegDisplay.SP + SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo);
+
+#if defined(UNIX_AMD64_ABI)
+    m_RegDisplay.pRbp = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbp);
+    m_RegDisplay.pRbx = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbx);
+    m_RegDisplay.pR12 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R12);
+    m_RegDisplay.pR13 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R13);
+    m_RegDisplay.pR14 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R14);
+    m_RegDisplay.pR15 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R15);
+#elif defined(TARGET_AMD64)
+    m_RegDisplay.pRbp = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbp);
+    m_RegDisplay.pRdi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rdi);
+    m_RegDisplay.pRsi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rsi);
+    m_RegDisplay.pRbx = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbx);
+    m_RegDisplay.pR12 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R12);
+    m_RegDisplay.pR13 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R13);
+    m_RegDisplay.pR14 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R14);
+    m_RegDisplay.pR15 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R15);
+#elif defined(TARGET_ARM)
+    m_RegDisplay.pR4  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R4);
+    m_RegDisplay.pR5  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R5);
+    m_RegDisplay.pR6  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R6);
+    m_RegDisplay.pR7  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R7);
+    m_RegDisplay.pR8  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R8);
+    m_RegDisplay.pR9  = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R9);
+    m_RegDisplay.pR10 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R10);
+    m_RegDisplay.pR11 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, R11);
+#elif defined(TARGET_ARM64)
+    m_RegDisplay.pX19 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X19);
+    m_RegDisplay.pX20 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X20);
+    m_RegDisplay.pX21 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X21);
+    m_RegDisplay.pX22 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X22);
+    m_RegDisplay.pX23 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X23);
+    m_RegDisplay.pX24 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X24);
+    m_RegDisplay.pX25 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X25);
+    m_RegDisplay.pX26 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X26);
+    m_RegDisplay.pX27 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X27);
+    m_RegDisplay.pX28 = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, X28);
+    m_RegDisplay.pFP = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, FP);
+#elif defined(TARGET_X86)
+    m_RegDisplay.pRbp = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbp);
+    m_RegDisplay.pRdi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rdi);
+    m_RegDisplay.pRsi = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rsi);
+    m_RegDisplay.pRbx = PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, Rbx);
+#else
+    ASSERT_UNCONDITIONALLY("NYI for this arch");
+#endif
+
+    m_RegDisplay.SetAddrOfIP(PTR_TO_MEMBER(PAL_LIMITED_CONTEXT, pContext, IP));
+    m_RegDisplay.SetIP(pContext->IP);
+    m_RegDisplay.SetSP(pContext->GetSp());
+    SetControlPC(dac_cast<PTR_VOID>(pContext->IP));
+
+    // We expect the throw site to be in managed code, and since this function's notion of how to unwind 
+    // through the stub is brittle relative to the stub itself, we want to check as soon as we can.
+    ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC) && "unwind from throw site stub failed");
+#endif // defined(USE_PORTABLE_HELPERS)
+}
+
+bool StackFrameIterator::IsValid()
+{
+    return (m_ControlPC != 0);
+}
+
+void StackFrameIterator::Next()
+{
+    NextInternal();
+    STRESS_LOG1(LF_STACKWALK, LL_INFO10000, "   %p\n", m_ControlPC);
+}
+
+void StackFrameIterator::NextInternal()
+{
+UnwindOutOfCurrentManagedFrame:
+    ASSERT(m_dwFlags & MethodStateCalculated);
+    m_dwFlags &= ~(ExCollide|MethodStateCalculated|UnwoundReversePInvoke);
+    ASSERT(IsValid());
+
+    m_pHijackedReturnValue = NULL;
+    m_HijackedReturnValueKind = GCRK_Unknown;
+
+#ifdef _DEBUG
+    SetControlPC(dac_cast<PTR_VOID>((void*)666));
+#endif // _DEBUG
+
+    // Clear any preceding published conservative range.  The current unwind will compute a new range
+    // from scratch if one is needed.
+    m_pConservativeStackRangeLowerBound = NULL;
+    m_pConservativeStackRangeUpperBound = NULL;
+
+#if defined(_DEBUG) && !defined(DACCESS_COMPILE)
+    UIntNative DEBUG_preUnwindSP = m_RegDisplay.GetSP();
+#endif
+
+    PTR_VOID pPreviousTransitionFrame;
+    FAILFAST_OR_DAC_FAIL(GetCodeManager()->UnwindStackFrame(&m_methodInfo, &m_RegDisplay, &pPreviousTransitionFrame));
+    
+    bool doingFuncletUnwind = GetCodeManager()->IsFunclet(&m_methodInfo);
+
+    if (pPreviousTransitionFrame != NULL)
+    {
+        ASSERT(!doingFuncletUnwind);
+
+        if (pPreviousTransitionFrame == TOP_OF_STACK_MARKER)
+        {
+            SetControlPC(0);
+        }
+        else
+        {
+            // NOTE: If this is an EH stack walk, then reinitializing the iterator using the GC stack
+            // walk flags is incorrect.  That said, this is OK because the exception dispatcher will
+            // immediately trigger a failfast when it sees the UnwoundReversePInvoke flag.
+            // NOTE: This can generate a conservative stack range if the recovered PInvoke callsite
+            // resides in an assembly thunk and not in normal managed code.  In this case InternalInit
+            // will unwind through the thunk and back to the nearest managed frame, and therefore may
+            // see a conservative range reported by one of the thunks encountered during this "nested"
+            // unwind.
+            InternalInit(m_pThread, GetPInvokeTransitionFrame(pPreviousTransitionFrame), GcStackWalkFlags);
+            ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC));
+        }
+        m_dwFlags |= UnwoundReversePInvoke;
+    }
+    else
+    {
+        // if the thread is safe to walk, it better not have a hijack in place.
+        ASSERT((ThreadStore::GetCurrentThread() == m_pThread) || !m_pThread->DangerousCrossThreadIsHijacked());
+
+        SetControlPC(dac_cast<PTR_VOID>(*(m_RegDisplay.GetAddrOfIP())));
+
+        PTR_VOID collapsingTargetFrame = NULL;
+
+        // Starting from the unwound return address, unwind further (if needed) until reaching
+        // either the next managed frame (i.e., the next frame that should be yielded from the
+        // stack frame iterator) or a collision point that requires complex handling.
+
+        bool exCollide = false;
+        ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(m_ControlPC);
+
+        if (doingFuncletUnwind)
+        {
+            ASSERT(m_pendingFuncletFramePointer == NULL);
+            ASSERT(m_FramePointer != NULL);
+
+            if (category == InFuncletInvokeThunk)
+            {
+                // The iterator is unwinding out of an exceptionally invoked funclet.  Before proceeding,
+                // record the funclet frame pointer so that the iterator can verify that the remainder of
+                // the stack walk encounters "owner frames" (i.e., parent funclets or the main code body)
+                // in the expected order.
+                // NOTE: m_pendingFuncletFramePointer will be cleared by HandleExCollide the stack walk
+                // collides with the ExInfo that invoked this funclet.
+                m_pendingFuncletFramePointer = m_FramePointer;
+
+                // Unwind through the funclet invoke assembly thunk to reach the topmost managed frame in
+                // the exception dispatch code.  All non-GC stack walks collide at this point (whereas GC
+                // stack walks collide at the throw site which is reached after processing all of the
+                // exception dispatch frames).
+                UnwindFuncletInvokeThunk();
+                if (!(m_dwFlags & CollapseFunclets))
+                {
+                    exCollide = true;
+                }
+            }
+            else if (category == InManagedCode)
+            {
+                // Non-exceptionally invoked funclet case.  The caller is processed as a normal managed
+                // frame, with the caveat that funclet collapsing must be applied in GC stack walks (since
+                // the caller is either a parent funclet or the main code body and the leafmost funclet
+                // already provided GC information for the entire function).
+                if (m_dwFlags & CollapseFunclets)
+                {
+                    collapsingTargetFrame = m_FramePointer;
+                }
+            }
+            else
+            {
+                FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("Unexpected thunk encountered when unwinding out of a funclet.");
+            }
+        }
+        else if (category != InManagedCode)
+        {
+            // Unwinding the current (non-funclet) managed frame revealed that its caller is one of the
+            // well-known assembly thunks.  Unwind through the thunk to find the next managed frame
+            // that should be yielded from the stack frame iterator.
+            // NOTE: It is generally possible for a sequence of multiple thunks to appear "on top of
+            // each other" on the stack (e.g., the CallDescrThunk can be used to invoke the
+            // UniversalTransitionThunk), but EH thunks can never appear in such sequences.
+
+            if (IsNonEHThunk(category))
+            {
+                // Unwind the current sequence of one or more thunks until the next managed frame is reached.
+                // NOTE: This can generate a conservative stack range if one or more of the thunks in the
+                // sequence report a conservative lower bound.
+                UnwindNonEHThunkSequence();
+            }
+            else if (category == InThrowSiteThunk)
+            {
+                // EH stack walks collide at the funclet invoke thunk and are never expected to encounter
+                // throw sites (except in illegal cases such as exceptions escaping from the managed
+                // exception dispatch code itself).
+                FAILFAST_OR_DAC_FAIL_MSG(!(m_dwFlags & ApplyReturnAddressAdjustment),
+                    "EH stack walk is attempting to propagate an exception across a throw site.");
+
+                UnwindThrowSiteThunk();
+
+                if (m_dwFlags & CollapseFunclets)
+                {
+                    UIntNative postUnwindSP = m_RegDisplay.SP;
+
+                    if (m_pNextExInfo && (postUnwindSP > ((UIntNative)dac_cast<TADDR>(m_pNextExInfo))))
+                    {
+                        // This GC stack walk has processed all managed exception frames associated with the
+                        // current throw site, meaning it has now collided with the associated ExInfo.
+                        exCollide = true;
+                    }
+                }
+            }
+            else
+            {
+                FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("Unexpected thunk encountered when unwinding out of a non-funclet.");
+            }
+        }
+
+        if (exCollide)
+        {
+            // OK, so we just hit (collided with) an exception throw point.  We continue by consulting the 
+            // ExInfo.
+
+            // In the GC stackwalk, this means walking all the way off the end of the managed exception
+            // dispatch code to the throw site.  In the EH stackwalk, this means hitting the special funclet
+            // invoke ASM thunks.
+
+            // Double-check that the ExInfo that is being consulted is at or below the 'current' stack pointer
+            ASSERT(DEBUG_preUnwindSP <= (UIntNative)m_pNextExInfo);
+
+            ASSERT(collapsingTargetFrame == NULL);
+
+            collapsingTargetFrame = HandleExCollide(m_pNextExInfo);
+        }
+
+        // Now that all assembly thunks and ExInfo collisions have been processed, it is guaranteed
+        // that the next managed frame has been located.  The located frame must now be yielded
+        // from the iterator with the one and only exception being cases where a managed frame must
+        // be skipped due to funclet collapsing.
+
+        ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC));
+
+        if (collapsingTargetFrame != NULL)
+        {
+            // The iterator is positioned on a parent funclet or main code body in a function where GC
+            // information has already been reported by the leafmost funclet, implying that the current
+            // frame needs to be skipped by the GC stack walk.  In general, the GC stack walk must skip
+            // all parent frames that are "part of the same function" (i.e., have the same frame
+            // pointer).
+            ASSERT(m_dwFlags & CollapseFunclets);
+            CalculateCurrentMethodState();
+            ASSERT(IsValid());
+            FAILFAST_OR_DAC_FAIL(m_FramePointer == collapsingTargetFrame);
+
+            // Fail if the skipped frame has no associated conservative stack range (since any
+            // attached stack range is about to be dropped without ever being reported to the GC).
+            // This should never happen since funclet collapsing cases and only triggered when
+            // unwinding out of managed frames and never when unwinding out of the thunks that report
+            // conservative ranges.
+            FAILFAST_OR_DAC_FAIL(m_pConservativeStackRangeLowerBound == NULL);
+
+            STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "[ KeepUnwinding ]\n");
+            goto UnwindOutOfCurrentManagedFrame;
+        }
+
+        // Before yielding this frame, indicate that it was located via an ExInfo collision as
+        // opposed to normal unwind.
+        if (exCollide)
+            m_dwFlags |= ExCollide;
+    }
+
+    // At this point, the iterator is in an invalid state if there are no more managed frames
+    // on the current stack, and is otherwise positioned on the next managed frame to yield to
+    // the caller.
+    PrepareToYieldFrame();
+}
+
+// NOTE: This function will publish a non-NULL conservative stack range lower bound if and
+// only if one or more of the thunks in the sequence report conservative stack ranges.
+void StackFrameIterator::UnwindNonEHThunkSequence()
+{
+    ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(m_ControlPC);
+    ASSERT(IsNonEHThunk(category));
+
+    // Unwind the current sequence of thunks until the next managed frame is reached, being
+    // careful to detect and aggregate any conservative stack ranges reported by the thunks.
+    PTR_UIntNative pLowestLowerBound = NULL;
+    PTR_UIntNative pPrecedingLowerBound = NULL;
+    while (category != InManagedCode)
+    {
+        ASSERT(m_pConservativeStackRangeLowerBound == NULL);
+
+        if (category == InCallDescrThunk)
+        {
+            UnwindCallDescrThunk();
+        }
+        else if (category == InUniversalTransitionThunk)
+        {
+            UnwindUniversalTransitionThunk();
+            ASSERT(m_pConservativeStackRangeLowerBound != NULL);
+        }
+        else
+        {
+            FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("Unexpected thunk encountered when unwinding a non-EH thunk sequence.");
+        }
+
+        if (m_pConservativeStackRangeLowerBound != NULL)
+        {
+            // The newly unwound thunk reported a conservative stack range lower bound.  The thunk
+            // sequence being unwound needs to generate a single conservative range that will be
+            // reported along with the managed frame eventually yielded by the iterator.  To ensure
+            // sufficient reporting, this range always extends from the first (i.e., lowest) lower
+            // bound all the way to the top of the outgoing arguments area in the next managed frame.
+            // This aggregate range therefore covers all intervening thunk frames (if any), and also
+            // covers all necessary conservative ranges in the pathological case where a sequence of
+            // thunks contains multiple frames which report distinct conservative lower bound values.
+            //
+            // Capture the initial lower bound, and assert that the lower bound values are compatible
+            // with the "aggregate range" approach described above (i.e., that they never exceed the
+            // unwound thunk's stack frame and are always larger than all previously encountered lower
+            // bound values).
+
+            if (pLowestLowerBound == NULL)
+                pLowestLowerBound = m_pConservativeStackRangeLowerBound;
+
+            FAILFAST_OR_DAC_FAIL(m_pConservativeStackRangeLowerBound < (PTR_UIntNative)m_RegDisplay.SP);
+            FAILFAST_OR_DAC_FAIL(m_pConservativeStackRangeLowerBound > pPrecedingLowerBound);
+            pPrecedingLowerBound = m_pConservativeStackRangeLowerBound;
+            m_pConservativeStackRangeLowerBound = NULL;
+        }
+
+        category = CategorizeUnadjustedReturnAddress(m_ControlPC);
+    }
+
+    // The iterator has reached the next managed frame.  Publish the computed lower bound value.
+    ASSERT(m_pConservativeStackRangeLowerBound == NULL);
+    m_pConservativeStackRangeLowerBound = pLowestLowerBound;
+}
+
+// This function is called immediately before a given frame is yielded from the iterator
+// (i.e., before a given frame is exposed outside of the iterator).  At yield points,
+// iterator must either be invalid (indicating that all managed frames have been processed)
+// or must describe a valid managed frame.  In the latter case, some common postprocessing
+// steps must always be applied before the frame is exposed outside of the iterator.
+void StackFrameIterator::PrepareToYieldFrame()
+{
+    if (!IsValid())
+        return;
+
+    ASSERT(m_pInstance->FindCodeManagerByAddress(m_ControlPC));
+
+    bool atDebuggerHijackSite = (this->m_ControlPC == (PTR_VOID)(TADDR)DebugFuncEval::GetMostRecentFuncEvalHijackInstructionPointer());
+
+    if (atDebuggerHijackSite)
+    {
+        FAILFAST_OR_DAC_FAIL_MSG(m_pConservativeStackRangeLowerBound != NULL,
+            "Debugger hijack unwind is missing the required conservative range from a preceding transition thunk.");
+    }
+
+    if (m_dwFlags & ApplyReturnAddressAdjustment)
+    {
+        FAILFAST_OR_DAC_FAIL_MSG(!atDebuggerHijackSite,
+            "EH stack walk is attempting to propagate an exception across a debugger hijack site.");
+
+        m_ControlPC = AdjustReturnAddressBackward(m_ControlPC);
+    }
+
+    m_ShouldSkipRegularGcReporting = false;
+
+    // Each time a managed frame is yielded, configure the iterator to explicitly indicate
+    // whether or not unwinding to the current frame has revealed a stack range that must be
+    // conservatively reported by the GC.
+    if ((m_pConservativeStackRangeLowerBound != NULL) && (m_dwFlags & CollapseFunclets))
+    {
+        // Conservatively reported stack ranges always correspond to the full extent of the
+        // argument set (including stack-passed arguments and spilled argument registers) that
+        // flowed into a managed callsite which called into the runtime.  The runtime has no
+        // knowledge of the callsite signature in these cases, and unwind through these callsites
+        // is only possible via the associated assembly thunk (e.g., the ManagedCalloutThunk or
+        // UniversalTransitionThunk).
+        //
+        // The iterator is currently positioned on the managed frame which contains the callsite of
+        // interest.  The lower bound of the argument set was already computed while unwinding
+        // through the assembly thunk.  The upper bound of the argument set is always at or below
+        // the top of the outgoing arguments area in the current managed frame (i.e., in the
+        // managed frame which contains the callsite).
+        //
+        // Compute a conservative upper bound and then publish the total range so that it can be
+        // observed by the current GC stack walk (via HasStackRangeToReportConservatively).  Note
+        // that the upper bound computation never mutates m_RegDisplay.
+        CalculateCurrentMethodState();
+        ASSERT(IsValid());
+
+        if (!atDebuggerHijackSite)
+        {
+            UIntNative rawUpperBound = GetCodeManager()->GetConservativeUpperBoundForOutgoingArgs(&m_methodInfo, &m_RegDisplay);
+            m_pConservativeStackRangeUpperBound = (PTR_UIntNative)rawUpperBound;
+        }
+        else
+        {
+            // Debugger hijack points differ from all other unwind cases in that they are not
+            // guaranteed to be GC safe points, which implies that regular GC reporting will not
+            // protect the GC references that the function was using (in registers and/or in local
+            // stack slots) at the time of the hijack.
+            //
+            // GC references held in registers at the time of the hijack are reported by the
+            // debugger and therefore do not need to be handled here.  (The debugger does this by
+            // conservatively reporting the entire CONTEXT record which lists the full register
+            // set that was observed when the thread was stopped at the hijack point.)
+            //
+            // This code is therefore only responsible for reporting the GC references that were
+            // stored on the stack at the time of the hijack.  Conceptually, this is done by
+            // conservatively reporting the entire stack frame.  Since debugger hijack unwind
+            // always occurs via a UniversalTransitionThunk, the conservative lower bound
+            // published by the thunk can be used as a workable lower bound for the entire stack
+            // frame.
+            //
+            // Computing a workable upper bound is more difficult, especially because the stack
+            // frame of a funclet can contain FP-relative locals which reside arbitrarily far up
+            // the stack compared to the current SP.  The top of the thread's stack is currently
+            // used as an extremely conservative upper bound as away to cover all cases without
+            // introducing more stack walker complexity.
+
+            PTR_VOID pStackLow;
+            PTR_VOID pStackHigh;
+#ifndef DACCESS_COMPILE
+            m_pThread->GetStackBounds(&pStackLow, &pStackHigh);
+#endif
+            m_pConservativeStackRangeUpperBound = (PTR_UIntNative)pStackHigh;
+            m_ShouldSkipRegularGcReporting = true;
+        }
+
+        ASSERT(m_pConservativeStackRangeLowerBound != NULL);
+        ASSERT(m_pConservativeStackRangeUpperBound != NULL);
+        ASSERT(m_pConservativeStackRangeUpperBound > m_pConservativeStackRangeLowerBound);
+    }
+    else
+    {
+        m_pConservativeStackRangeLowerBound = NULL;
+        m_pConservativeStackRangeUpperBound = NULL;
+    }
+}
+
+REGDISPLAY * StackFrameIterator::GetRegisterSet()
+{
+    ASSERT(IsValid());
+    return &m_RegDisplay;
+}
+
+PTR_VOID StackFrameIterator::GetEffectiveSafePointAddress()
+{
+    ASSERT(IsValid());
+    return m_effectiveSafePointAddress;
+}
+
+PTR_ICodeManager StackFrameIterator::GetCodeManager()
+{
+    ASSERT(IsValid());
+    return m_pCodeManager;
+}
+
+MethodInfo * StackFrameIterator::GetMethodInfo()
+{
+    ASSERT(IsValid());
+    return &m_methodInfo;
+}
+
+#ifdef DACCESS_COMPILE
+#define FAILFAST_OR_DAC_RETURN_FALSE(x) if(!(x)) return false;
+#else
+#define FAILFAST_OR_DAC_RETURN_FALSE(x) if(!(x)) { ASSERT_UNCONDITIONALLY(#x); RhFailFast(); }
+#endif
+
+void StackFrameIterator::CalculateCurrentMethodState()
+{
+    if (m_dwFlags & MethodStateCalculated)
+        return;
+
+    // Assume that the caller is likely to be in the same module
+    if (m_pCodeManager == NULL || !m_pCodeManager->FindMethodInfo(m_ControlPC, &m_methodInfo))
+    {
+        m_pCodeManager = dac_cast<PTR_ICodeManager>(m_pInstance->FindCodeManagerByAddress(m_ControlPC));
+        FAILFAST_OR_DAC_FAIL(m_pCodeManager);
+
+        FAILFAST_OR_DAC_FAIL(m_pCodeManager->FindMethodInfo(m_ControlPC, &m_methodInfo));
+    }
+
+    m_effectiveSafePointAddress = m_ControlPC;
+    m_FramePointer = GetCodeManager()->GetFramePointer(&m_methodInfo, &m_RegDisplay);
+
+    m_dwFlags |= MethodStateCalculated;
+}
+
+bool StackFrameIterator::GetHijackedReturnValueLocation(PTR_RtuObjectRef * pLocation, GCRefKind * pKind)
+{
+    if (GCRK_Unknown == m_HijackedReturnValueKind)
+        return false;
+
+    ASSERT((GCRK_Scalar < m_HijackedReturnValueKind) && (m_HijackedReturnValueKind <= GCRK_LastValid));
+
+    *pLocation = m_pHijackedReturnValue;
+    *pKind = m_HijackedReturnValueKind;
+    return true;
+}
+
+void StackFrameIterator::SetControlPC(PTR_VOID controlPC)
+{
+    m_OriginalControlPC = m_ControlPC = controlPC;
+}
+
+bool StackFrameIterator::IsNonEHThunk(ReturnAddressCategory category)
+{
+    switch (category)
+    {
+        default:
+            return false;
+        case InUniversalTransitionThunk:
+        case InCallDescrThunk:
+            return true;
+    }
+}
+
+bool StackFrameIterator::IsValidReturnAddress(PTR_VOID pvAddress)
+{
+    // These are return addresses into functions that call into managed (non-funclet) code, so we might see
+    // them as hijacked return addresses.
+    ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(pvAddress);
+
+    // All non-EH thunks call out to normal managed code, implying that return addresses into
+    // them can be hijacked.
+    if (IsNonEHThunk(category))
+        return true;
+
+    // Throw site thunks call out to managed code, but control never returns from the managed
+    // callee.  As a result, return addresses into these thunks can be hijacked, but the
+    // hijacks will never execute.
+    if (category == InThrowSiteThunk)
+        return true;
+
+    return (NULL != GetRuntimeInstance()->FindCodeManagerByAddress(pvAddress));
+}
+
+// Support for conservatively reporting GC references in a stack range. This is used when managed methods with
+// an unknown signature potentially including GC references call into the runtime and we need to let a GC
+// proceed (typically because we call out into managed code again). Instead of storing signature metadata for
+// every possible managed method that might make such a call we identify a small range of the stack that might
+// contain outgoing arguments. We then report every pointer that looks like it might refer to the GC heap as a
+// fixed interior reference.
+
+bool StackFrameIterator::HasStackRangeToReportConservatively()
+{
+    // When there's no range to report both the lower and upper bounds will be NULL.
+    return IsValid() && (m_pConservativeStackRangeUpperBound != NULL);
+}
+
+void StackFrameIterator::GetStackRangeToReportConservatively(PTR_RtuObjectRef * ppLowerBound, PTR_RtuObjectRef * ppUpperBound)
+{
+    ASSERT(HasStackRangeToReportConservatively());
+    *ppLowerBound = (PTR_RtuObjectRef)m_pConservativeStackRangeLowerBound;
+    *ppUpperBound = (PTR_RtuObjectRef)m_pConservativeStackRangeUpperBound;
+}
+
+PTR_VOID StackFrameIterator::AdjustReturnAddressBackward(PTR_VOID controlPC)
+{
+#ifdef TARGET_ARM
+    return (PTR_VOID)(((PTR_UInt8)controlPC) - 2);
+#elif defined(TARGET_ARM64)
+    return (PTR_VOID)(((PTR_UInt8)controlPC) - 4);
+#else
+    return (PTR_VOID)(((PTR_UInt8)controlPC) - 1);
+#endif
+}
+
+// Given a return address, determine the category of function where it resides.  In
+// general, return addresses encountered by the stack walker are required to reside in
+// managed code unless they reside in one of the well-known assembly thunks.
+
+// static
+StackFrameIterator::ReturnAddressCategory StackFrameIterator::CategorizeUnadjustedReturnAddress(PTR_VOID returnAddress)
+{    
+#if defined(USE_PORTABLE_HELPERS) // @TODO: CORERT: no portable thunks are defined
+
+    return InManagedCode;
+
+#else // defined(USE_PORTABLE_HELPERS)
+
+#if defined(FEATURE_DYNAMIC_CODE)
+    if (EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromCallDescrThunk))
+    {
+        return InCallDescrThunk;
+    }
+    else if (EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransition) ||
+             EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransition_DebugStepTailCall))
+    {
+        return InUniversalTransitionThunk;
+    }
+#endif
+
+    if (EQUALS_RETURN_ADDRESS(returnAddress, RhpThrowEx2) ||
+        EQUALS_RETURN_ADDRESS(returnAddress, RhpThrowHwEx2) ||
+        EQUALS_RETURN_ADDRESS(returnAddress, RhpRethrow2))
+    {
+        return InThrowSiteThunk; 
+    }
+
+    if (
+#ifdef TARGET_X86
+        EQUALS_RETURN_ADDRESS(returnAddress, RhpCallFunclet2)
+#else
+        EQUALS_RETURN_ADDRESS(returnAddress, RhpCallCatchFunclet2) ||
+        EQUALS_RETURN_ADDRESS(returnAddress, RhpCallFinallyFunclet2) ||
+        EQUALS_RETURN_ADDRESS(returnAddress, RhpCallFilterFunclet2)
+#endif
+        )
+    {
+        return InFuncletInvokeThunk;
+    }
+
+    return InManagedCode;
+#endif // defined(USE_PORTABLE_HELPERS)
+}
+
+bool StackFrameIterator::ShouldSkipRegularGcReporting()
+{
+    return m_ShouldSkipRegularGcReporting;
+}
+
+#ifndef DACCESS_COMPILE
+
+COOP_PINVOKE_HELPER(Boolean, RhpSfiInit, (StackFrameIterator* pThis, PAL_LIMITED_CONTEXT* pStackwalkCtx, Boolean instructionFault))
+{
+    Thread * pCurThread = ThreadStore::GetCurrentThread();
+
+    // The stackwalker is intolerant to hijacked threads, as it is largely expecting to be called from C++
+    // where the hijack state of the thread is invariant.  Because we've exposed the iterator out to C#, we 
+    // need to unhijack every time we callback into C++ because the thread could have been hijacked during our
+    // time executing C#.
+    pCurThread->Unhijack();
+
+    // Passing NULL is a special-case to request a standard managed stack trace for the current thread.
+    if (pStackwalkCtx == NULL)
+        pThis->InternalInitForStackTrace();
+    else
+        pThis->InternalInitForEH(pCurThread, pStackwalkCtx, instructionFault);
+
+    bool isValid = pThis->IsValid();
+    if (isValid)
+        pThis->CalculateCurrentMethodState();
+    return isValid ? Boolean_true : Boolean_false;
+}
+
+COOP_PINVOKE_HELPER(Boolean, RhpSfiNext, (StackFrameIterator* pThis, UInt32* puExCollideClauseIdx, Boolean* pfUnwoundReversePInvoke))
+{
+    // The stackwalker is intolerant to hijacked threads, as it is largely expecting to be called from C++
+    // where the hijack state of the thread is invariant.  Because we've exposed the iterator out to C#, we 
+    // need to unhijack every time we callback into C++ because the thread could have been hijacked during our
+    // time executing C#.
+    ThreadStore::GetCurrentThread()->Unhijack();
+
+    const UInt32 MaxTryRegionIdx = 0xFFFFFFFF;
+
+    ExInfo * pCurExInfo = pThis->m_pNextExInfo;
+    pThis->Next();
+    bool isValid = pThis->IsValid();
+    if (isValid)
+        pThis->CalculateCurrentMethodState();
+
+    if (pThis->m_dwFlags & StackFrameIterator::ExCollide)
+    {
+        ASSERT(pCurExInfo->m_idxCurClause != MaxTryRegionIdx);
+        *puExCollideClauseIdx = pCurExInfo->m_idxCurClause;
+        pCurExInfo->m_kind = (ExKind)(pCurExInfo->m_kind | EK_SupersededFlag);
+    }
+    else
+    {
+        *puExCollideClauseIdx = MaxTryRegionIdx;
+    }
+
+    *pfUnwoundReversePInvoke = (pThis->m_dwFlags & StackFrameIterator::UnwoundReversePInvoke) 
+                                    ? Boolean_true 
+                                    : Boolean_false;
+    return isValid;
+}
+
+#endif // !DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/StackFrameIterator.h b/src/coreclr/src/nativeaot/Runtime/StackFrameIterator.h
new file mode 100644
index 0000000000000..fe28f53f3ff89
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/StackFrameIterator.h
@@ -0,0 +1,211 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "ICodeManager.h"
+
+struct ExInfo;
+typedef DPTR(ExInfo) PTR_ExInfo;
+typedef VPTR(ICodeManager) PTR_ICodeManager;
+
+enum ExKind : UInt8
+{
+    EK_HardwareFault = 2,
+    EK_SupersededFlag  = 8,
+};
+
+struct EHEnum
+{
+    ICodeManager * m_pCodeManager;
+    EHEnumState m_state;
+};
+
+EXTERN_C Boolean FASTCALL RhpSfiInit(StackFrameIterator* pThis, PAL_LIMITED_CONTEXT* pStackwalkCtx, Boolean instructionFault);
+EXTERN_C Boolean FASTCALL RhpSfiNext(StackFrameIterator* pThis, UInt32* puExCollideClauseIdx, Boolean* pfUnwoundReversePInvoke);
+
+struct PInvokeTransitionFrame;
+typedef DPTR(PInvokeTransitionFrame) PTR_PInvokeTransitionFrame;
+typedef DPTR(PAL_LIMITED_CONTEXT) PTR_PAL_LIMITED_CONTEXT;
+
+class StackFrameIterator
+{
+    friend class AsmOffsets;
+    friend Boolean FASTCALL RhpSfiInit(StackFrameIterator* pThis, PAL_LIMITED_CONTEXT* pStackwalkCtx, Boolean instructionFault);
+    friend Boolean FASTCALL RhpSfiNext(StackFrameIterator* pThis, UInt32* puExCollideClauseIdx, Boolean* pfUnwoundReversePInvoke);
+
+public:
+    StackFrameIterator() {}
+    StackFrameIterator(Thread * pThreadToWalk, PTR_VOID pInitialTransitionFrame);
+    StackFrameIterator(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx);
+
+
+    bool             IsValid();
+    void             CalculateCurrentMethodState();
+    void             Next();
+    PTR_VOID         GetEffectiveSafePointAddress();
+    REGDISPLAY *     GetRegisterSet();
+    PTR_ICodeManager GetCodeManager();
+    MethodInfo *     GetMethodInfo();
+    bool             GetHijackedReturnValueLocation(PTR_RtuObjectRef * pLocation, GCRefKind * pKind);
+    void             SetControlPC(PTR_VOID controlPC);
+
+    static bool     IsValidReturnAddress(PTR_VOID pvAddress);
+
+    // Support for conservatively reporting GC references in a stack range. This is used when managed methods
+    // with an unknown signature potentially including GC references call into the runtime and we need to let
+    // a GC proceed (typically because we call out into managed code again). Instead of storing signature
+    // metadata for every possible managed method that might make such a call we identify a small range of the
+    // stack that might contain outgoing arguments. We then report every pointer that looks like it might
+    // refer to the GC heap as a fixed interior reference.
+    bool HasStackRangeToReportConservatively();
+    void GetStackRangeToReportConservatively(PTR_RtuObjectRef * ppLowerBound, PTR_RtuObjectRef * ppUpperBound);
+
+    // Debugger Hijacked frame looks very much like a usual managed frame except when the 
+    // frame must be reported conservatively, and when that happens, regular GC reporting should be skipped
+    bool ShouldSkipRegularGcReporting();
+
+private:
+    // The invoke of a funclet is a bit special and requires an assembly thunk, but we don't want to break the
+    // stackwalk due to this.  So this routine will unwind through the assembly thunks used to invoke funclets.
+    // It's also used to disambiguate exceptionally- and non-exceptionally-invoked funclets.
+    void UnwindFuncletInvokeThunk();
+    void UnwindThrowSiteThunk();
+
+    // If our control PC indicates that we're in the universal transition thunk that we use to generically
+    // dispatch arbitrary managed calls, then handle the stack walk specially.
+    // NOTE: This function always publishes a non-NULL conservative stack range lower bound.
+    void UnwindUniversalTransitionThunk();
+
+    // If our control PC indicates that we're in the call descr thunk that we use to call an arbitrary managed
+    // function with an arbitrary signature from a normal managed function handle the stack walk specially.
+    void UnwindCallDescrThunk();
+
+    void EnterInitialInvalidState(Thread * pThreadToWalk);
+
+    void InternalInit(Thread * pThreadToWalk, PTR_PInvokeTransitionFrame pFrame, UInt32 dwFlags); // GC stackwalk
+    void InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx, UInt32 dwFlags);  // EH and hijack stackwalk, and collided unwind
+    void InternalInitForEH(Thread * pThreadToWalk, PAL_LIMITED_CONTEXT * pCtx, bool instructionFault); // EH stackwalk
+    void InternalInitForStackTrace();  // Environment.StackTrace
+
+    PTR_VOID HandleExCollide(PTR_ExInfo pExInfo);
+    void NextInternal();
+
+    // This will walk m_pNextExInfo from its current value until it finds the next ExInfo at a higher address
+    // than the SP reference value passed in.  This is useful when 'restarting' the stackwalk from a 
+    // particular PInvokeTransitionFrame or after we have a 'collided unwind' that may skip over ExInfos. 
+    void ResetNextExInfoForSP(UIntNative SP);
+
+    void UpdateFromExceptionDispatch(PTR_StackFrameIterator pSourceIterator);
+
+    // helpers to ApplyReturnAddressAdjustment
+    PTR_VOID AdjustReturnAddressForward(PTR_VOID controlPC);
+    PTR_VOID AdjustReturnAddressBackward(PTR_VOID controlPC);
+
+    void UnwindNonEHThunkSequence();
+    void PrepareToYieldFrame();
+
+    enum ReturnAddressCategory
+    {
+        InManagedCode,
+        InThrowSiteThunk,
+        InFuncletInvokeThunk,
+        InCallDescrThunk,
+        InUniversalTransitionThunk,
+    };
+
+    static ReturnAddressCategory CategorizeUnadjustedReturnAddress(PTR_VOID returnAddress);
+    static bool IsNonEHThunk(ReturnAddressCategory category);
+
+    enum Flags
+    {
+        // If this flag is set, each unwind will apply a -1 to the ControlPC.  This is used by EH to ensure 
+        // that the ControlPC of a callsite stays within the containing try region.
+        ApplyReturnAddressAdjustment = 1,
+
+        // Used by the GC stackwalk, this flag will ensure that multiple funclet frames for a given method
+        // activation will be given only one callback.  The one callback is given for the most nested physical
+        // stack frame of a given activation of a method.  (i.e. the leafmost funclet)
+        CollapseFunclets             = 2,
+
+        // This is a state returned by Next() which indicates that we just crossed an ExInfo in our unwind.
+        ExCollide                    = 4,
+
+        // If a hardware fault frame is encountered, report its control PC at the binder-inserted GC safe 
+        // point immediately after the prolog of the most nested enclosing try-region's handler.
+        RemapHardwareFaultsToSafePoint = 8,
+
+        MethodStateCalculated = 0x10,
+
+        // This is a state returned by Next() which indicates that we just unwound a reverse pinvoke method
+        UnwoundReversePInvoke = 0x20,
+
+        GcStackWalkFlags = (CollapseFunclets | RemapHardwareFaultsToSafePoint),
+        EHStackWalkFlags = ApplyReturnAddressAdjustment,
+        StackTraceStackWalkFlags = GcStackWalkFlags
+    };
+
+    struct PreservedRegPtrs
+    {
+#ifdef TARGET_ARM
+        PTR_UIntNative pR4;
+        PTR_UIntNative pR5;
+        PTR_UIntNative pR6;
+        PTR_UIntNative pR7;
+        PTR_UIntNative pR8;
+        PTR_UIntNative pR9;
+        PTR_UIntNative pR10;
+        PTR_UIntNative pR11;
+#elif defined(TARGET_ARM64)
+        PTR_UIntNative pX19;
+        PTR_UIntNative pX20;
+        PTR_UIntNative pX21;
+        PTR_UIntNative pX22;
+        PTR_UIntNative pX23;
+        PTR_UIntNative pX24;
+        PTR_UIntNative pX25;
+        PTR_UIntNative pX26;
+        PTR_UIntNative pX27;
+        PTR_UIntNative pX28;
+        PTR_UIntNative pFP;
+#elif defined(UNIX_AMD64_ABI)
+        PTR_UIntNative pRbp;
+        PTR_UIntNative pRbx;
+        PTR_UIntNative pR12;
+        PTR_UIntNative pR13;
+        PTR_UIntNative pR14;
+        PTR_UIntNative pR15;
+#else // TARGET_ARM
+        PTR_UIntNative pRbp;
+        PTR_UIntNative pRdi;
+        PTR_UIntNative pRsi;
+        PTR_UIntNative pRbx;
+#ifdef TARGET_AMD64
+        PTR_UIntNative pR12;
+        PTR_UIntNative pR13;
+        PTR_UIntNative pR14;
+        PTR_UIntNative pR15;
+#endif // TARGET_AMD64
+#endif // TARGET_ARM
+    };
+
+protected:
+    Thread *            m_pThread;
+    RuntimeInstance *   m_pInstance;
+    PTR_VOID            m_FramePointer;
+    PTR_VOID            m_ControlPC;
+    REGDISPLAY          m_RegDisplay;
+    PTR_ICodeManager    m_pCodeManager;
+    MethodInfo          m_methodInfo;
+    PTR_VOID            m_effectiveSafePointAddress;
+    PTR_RtuObjectRef    m_pHijackedReturnValue;
+    GCRefKind           m_HijackedReturnValueKind;
+    PTR_UIntNative      m_pConservativeStackRangeLowerBound;
+    PTR_UIntNative      m_pConservativeStackRangeUpperBound;
+    UInt32              m_dwFlags;
+    PTR_ExInfo          m_pNextExInfo;
+    PTR_VOID            m_pendingFuncletFramePointer;
+    PreservedRegPtrs    m_funcletPtrs;  // @TODO: Placing the 'scratch space' in the StackFrameIterator is not
+                                        // preferred because not all StackFrameIterators require this storage 
+                                        // space.  However, the implementation simpler by doing it this way.
+    bool                m_ShouldSkipRegularGcReporting;
+    PTR_VOID            m_OriginalControlPC;
+};
+
diff --git a/src/coreclr/src/nativeaot/Runtime/SyncClean.cpp b/src/coreclr/src/nativeaot/Runtime/SyncClean.cpp
new file mode 100644
index 0000000000000..927d3f5bf307a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/SyncClean.cpp
@@ -0,0 +1,29 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "holder.h"
+#include "SpinLock.h"
+#include "rhbinder.h"
+#include "CachedInterfaceDispatch.h"
+
+#include "SyncClean.hpp"
+
+void SyncClean::Terminate()
+{
+    CleanUp();
+}
+
+void SyncClean::CleanUp ()
+{
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+    // Update any interface dispatch caches that were unsafe to modify outside of this GC.
+    ReclaimUnusedInterfaceDispatchCaches();
+#endif
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/SyncClean.hpp b/src/coreclr/src/nativeaot/Runtime/SyncClean.hpp
new file mode 100644
index 0000000000000..c9ea16263075e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/SyncClean.hpp
@@ -0,0 +1,17 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef _SYNCCLEAN_HPP_
+#define _SYNCCLEAN_HPP_
+
+// We keep a list of memory blocks to be freed at the end of GC, but before we resume EE.
+// To make this work, we need to make sure that these data are accessed in cooperative GC
+// mode.
+
+class SyncClean {
+public:
+    static void Terminate ();
+    static void CleanUp ();
+};
+
+#endif
diff --git a/src/coreclr/src/nativeaot/Runtime/ThunksMapping.cpp b/src/coreclr/src/nativeaot/Runtime/ThunksMapping.cpp
new file mode 100644
index 0000000000000..7076a5af30fbe
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/ThunksMapping.cpp
@@ -0,0 +1,266 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "CommonMacros.inl"
+#include "volatile.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+
+
+#ifdef FEATURE_RX_THUNKS
+
+#ifdef TARGET_AMD64
+#define THUNK_SIZE  20
+#elif TARGET_X86
+#define THUNK_SIZE  12
+#elif TARGET_ARM
+#define THUNK_SIZE  20
+#else
+#define THUNK_SIZE  (2 * OS_PAGE_SIZE) // This will cause RhpGetNumThunksPerBlock to return 0
+#endif
+
+static_assert((THUNK_SIZE % 4) == 0, "Thunk stubs size not aligned correctly. This will cause runtime failures.");
+
+#define THUNKS_MAP_SIZE 0x8000     // 32 K
+
+#ifdef TARGET_ARM
+//*****************************************************************************
+//  Encode a 16-bit immediate mov/movt in ARM Thumb2 Instruction (format T2_N)
+//*****************************************************************************
+void EncodeThumb2Mov16(UInt16 * pCode, UInt16 value, UInt8 rDestination, bool topWord)
+{
+    pCode[0] = ((topWord ? 0xf2c0 : 0xf240) |
+        ((value >> 12) & 0x000f) |
+        ((value >> 1) & 0x0400));
+    pCode[1] = (((value << 4) & 0x7000) |
+        (value & 0x00ff) |
+        (rDestination << 8));
+}
+
+//*****************************************************************************
+//  Encode a 32-bit immediate mov in ARM Thumb2 Instruction (format T2_N)
+//*****************************************************************************
+void EncodeThumb2Mov32(UInt16 * pCode, UInt32 value, UInt8 rDestination)
+{
+    EncodeThumb2Mov16(pCode, (UInt16)(value & 0x0000ffff), rDestination, false);
+    EncodeThumb2Mov16(pCode + 2, (UInt16)(value >> 16), rDestination, true);
+}
+#endif
+
+COOP_PINVOKE_HELPER(int, RhpGetNumThunkBlocksPerMapping, ())
+{
+    static_assert((THUNKS_MAP_SIZE % OS_PAGE_SIZE) == 0, "Thunks map size should be in multiples of pages");
+
+    return THUNKS_MAP_SIZE / OS_PAGE_SIZE;
+}
+
+COOP_PINVOKE_HELPER(int, RhpGetNumThunksPerBlock, ())
+{
+    return min(
+        OS_PAGE_SIZE / THUNK_SIZE,                              // Number of thunks that can fit in a page
+        (OS_PAGE_SIZE - POINTER_SIZE) / (POINTER_SIZE * 2)      // Number of pointer pairs, minus the jump stub cell, that can fit in a page
+    );
+}
+
+COOP_PINVOKE_HELPER(int, RhpGetThunkSize, ())
+{
+    return THUNK_SIZE;
+}
+
+COOP_PINVOKE_HELPER(void*, RhpGetThunkDataBlockAddress, (void* pThunkStubAddress))
+{
+    return (void*)(((UIntNative)pThunkStubAddress & ~(OS_PAGE_SIZE - 1)) + THUNKS_MAP_SIZE);
+}
+
+COOP_PINVOKE_HELPER(void*, RhpGetThunkStubsBlockAddress, (void* pThunkDataAddress))
+{
+    return (void*)(((UIntNative)pThunkDataAddress & ~(OS_PAGE_SIZE - 1)) - THUNKS_MAP_SIZE);
+}
+
+COOP_PINVOKE_HELPER(int, RhpGetThunkBlockSize, ())
+{
+    return OS_PAGE_SIZE;
+}
+
+EXTERN_C REDHAWK_API void* __cdecl RhAllocateThunksMapping()
+{
+#ifdef WIN32
+
+    void * pNewMapping = PalVirtualAlloc(NULL, THUNKS_MAP_SIZE * 2, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+    if (pNewMapping == NULL)
+        return NULL;
+
+    void * pThunksSection = pNewMapping;
+    void * pDataSection = (UInt8*)pNewMapping + THUNKS_MAP_SIZE;
+
+#else
+
+    // Note: On secure linux systems, we can't add execute permissions to a mapped virtual memory if it was not created 
+    // with execute permissions in the first place. This is why we create the virtual section with RX permissions, then
+    // reduce it to RW for the data section and RX for the stubs section after generating the stubs instructions.
+    void * pNewMapping = PalVirtualAlloc(NULL, THUNKS_MAP_SIZE * 2, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READ);
+    if (pNewMapping == NULL)
+        return NULL;
+
+    void * pThunksSection = pNewMapping;
+    void * pDataSection = (UInt8*)pNewMapping + THUNKS_MAP_SIZE;
+
+    if (!PalVirtualProtect(pDataSection, THUNKS_MAP_SIZE, PAGE_READWRITE) ||
+        !PalVirtualProtect(pThunksSection, THUNKS_MAP_SIZE, PAGE_EXECUTE_READWRITE))
+    {
+        PalVirtualFree(pNewMapping, 0, MEM_RELEASE);
+        return NULL;
+    }
+
+#endif
+
+    int numBlocksPerMap = RhpGetNumThunkBlocksPerMapping();
+    int numThunksPerBlock = RhpGetNumThunksPerBlock();
+
+    for (int m = 0; m < numBlocksPerMap; m++)
+    {
+        UInt8* pDataBlockAddress = (UInt8*)pDataSection + m * OS_PAGE_SIZE;
+        UInt8* pThunkBlockAddress = (UInt8*)pThunksSection + m * OS_PAGE_SIZE;
+
+        for (int i = 0; i < numThunksPerBlock; i++)
+        {
+            UInt8* pCurrentThunkAddress = pThunkBlockAddress + THUNK_SIZE * i;
+            UInt8* pCurrentDataAddress = pDataBlockAddress + i * POINTER_SIZE * 2;
+
+#ifdef TARGET_AMD64
+
+            // mov r10,<thunk data address>
+            // jmp [r10 + <delta to get to last qword in data page]
+
+            *((UInt16*)pCurrentThunkAddress) = 0xba49;
+            pCurrentThunkAddress += 2;
+            *((void **)pCurrentThunkAddress) = (void *)pCurrentDataAddress;
+            pCurrentThunkAddress += 8;
+
+            *((UInt32*)pCurrentThunkAddress) = 0x00a2ff41;
+            pCurrentThunkAddress += 3;
+            *((UInt32*)pCurrentThunkAddress) = OS_PAGE_SIZE - POINTER_SIZE - (i * POINTER_SIZE * 2);
+            pCurrentThunkAddress += 4;
+
+            // nops for alignment
+            *pCurrentThunkAddress++ = 0x90;
+            *pCurrentThunkAddress++ = 0x90;
+            *pCurrentThunkAddress++ = 0x90;
+
+#elif TARGET_X86
+
+            // mov eax,<thunk data address>
+            // jmp [eax + <delta to get to last dword in data page]
+
+            *pCurrentThunkAddress++ = 0xb8;
+            *((void **)pCurrentThunkAddress) = (void *)pCurrentDataAddress;
+            pCurrentThunkAddress += 4;
+
+            *((UInt16*)pCurrentThunkAddress) = 0xa0ff;
+            pCurrentThunkAddress += 2;
+            *((UInt32*)pCurrentThunkAddress) = OS_PAGE_SIZE - POINTER_SIZE - (i * POINTER_SIZE * 2);
+            pCurrentThunkAddress += 4;
+
+            // nops for alignment
+            *pCurrentThunkAddress++ = 0x90;
+
+#elif TARGET_ARM
+
+            // mov r12,<thunk data address>
+            // str r12,[sp,#-4]
+            // ldr r12,[r12, <delta to get to last dword in data page]
+            // bx r12
+
+            EncodeThumb2Mov32((UInt16*)pCurrentThunkAddress, (UInt32)pCurrentDataAddress, 12);
+            pCurrentThunkAddress += 8;
+
+            *((UInt32*)pCurrentThunkAddress) = 0xcc04f84d;
+            pCurrentThunkAddress += 4;
+
+            *((UInt32*)pCurrentThunkAddress) = 0xc000f8dc | ((OS_PAGE_SIZE - POINTER_SIZE - (i * POINTER_SIZE * 2)) << 16);
+            pCurrentThunkAddress += 4;
+
+            *((UInt16*)pCurrentThunkAddress) = 0x4760;
+            pCurrentThunkAddress += 2;
+
+            // nops for alignment
+            *((UInt16*)pCurrentThunkAddress) = 0xbf00;
+            pCurrentThunkAddress += 2;
+
+#else
+            UNREFERENCED_PARAMETER(pCurrentDataAddress);
+            UNREFERENCED_PARAMETER(pCurrentThunkAddress);
+            PORTABILITY_ASSERT("RhAllocateThunksMapping");
+#endif
+        }
+    }
+
+    if (!PalVirtualProtect(pThunksSection, THUNKS_MAP_SIZE, PAGE_EXECUTE_READ))
+    {
+        PalVirtualFree(pNewMapping, 0, MEM_RELEASE);
+        return NULL;
+    }
+
+    return pThunksSection;
+}
+
+#else // FEATURE_RX_THUNKS
+
+COOP_PINVOKE_HELPER(void*, RhpGetThunksBase, ());
+COOP_PINVOKE_HELPER(int, RhpGetNumThunkBlocksPerMapping, ());
+COOP_PINVOKE_HELPER(int, RhpGetNumThunksPerBlock, ());
+COOP_PINVOKE_HELPER(int, RhpGetThunkSize, ());
+COOP_PINVOKE_HELPER(int, RhpGetThunkBlockSize, ());
+
+EXTERN_C REDHAWK_API void* __cdecl RhAllocateThunksMapping()
+{
+    static void* pThunksTemplateAddress = NULL;
+
+    void *pThunkMap = NULL;
+
+    int thunkBlocksPerMapping = RhpGetNumThunkBlocksPerMapping();
+    int thunkBlockSize = RhpGetThunkBlockSize();
+    int templateSize = thunkBlocksPerMapping * thunkBlockSize;
+
+    if (pThunksTemplateAddress == NULL)
+    {
+        // First, we use the thunks directly from the thunks template sections in the module until all
+        // thunks in that template are used up.
+        pThunksTemplateAddress = RhpGetThunksBase();
+        pThunkMap = pThunksTemplateAddress;
+    }
+    else
+    {
+        // We've already used the thunks template in the module for some previous thunks, and we 
+        // cannot reuse it here. Now we need to create a new mapping of the thunks section in order to have 
+        // more thunks
+
+        UInt8* pModuleBase = (UInt8*)PalGetModuleHandleFromPointer(pThunksTemplateAddress);
+        int templateRva = (int)((UInt8*)RhpGetThunksBase() - pModuleBase);
+
+        if (!PalAllocateThunksFromTemplate((HANDLE)pModuleBase, templateRva, templateSize, &pThunkMap))
+            return NULL;
+    }
+
+    if (!PalMarkThunksAsValidCallTargets(
+        pThunkMap,
+        RhpGetThunkSize(),
+        RhpGetNumThunksPerBlock(),
+        thunkBlockSize,
+        thunkBlocksPerMapping))
+    {
+        if (pThunkMap != pThunksTemplateAddress)
+            PalFreeThunksFromTemplate(pThunkMap);
+
+        return NULL;
+    }
+
+    return pThunkMap;
+}
+
+#endif // FEATURE_RX_THUNKS
diff --git a/src/coreclr/src/nativeaot/Runtime/TypeManager.cpp b/src/coreclr/src/nativeaot/Runtime/TypeManager.cpp
new file mode 100644
index 0000000000000..36896a516b0c2
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/TypeManager.cpp
@@ -0,0 +1,171 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "holder.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "shash.h"
+#include "RWLock.h"
+#include "varint.h"
+#include "rhbinder.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "event.h"
+#include "threadstore.h"
+#include "TypeManager.h"
+
+/* static */
+TypeManager * TypeManager::Create(HANDLE osModule, void * pModuleHeader, void** pClasslibFunctions, UInt32 nClasslibFunctions)
+{
+    ReadyToRunHeader * pReadyToRunHeader = (ReadyToRunHeader *)pModuleHeader;
+
+    // Sanity check the signature magic
+    ASSERT(pReadyToRunHeader->Signature == ReadyToRunHeaderConstants::Signature);
+    if (pReadyToRunHeader->Signature != ReadyToRunHeaderConstants::Signature)
+        return nullptr;
+
+    // Only the current major version is supported currently
+    ASSERT(pReadyToRunHeader->MajorVersion == ReadyToRunHeaderConstants::CurrentMajorVersion);
+    if (pReadyToRunHeader->MajorVersion != ReadyToRunHeaderConstants::CurrentMajorVersion)
+        return nullptr;
+
+    return new (nothrow) TypeManager(osModule, pReadyToRunHeader, pClasslibFunctions, nClasslibFunctions);
+}
+
+TypeManager::TypeManager(HANDLE osModule, ReadyToRunHeader * pHeader, void** pClasslibFunctions, UInt32 nClasslibFunctions)
+    : m_osModule(osModule), m_pHeader(pHeader),
+      m_pClasslibFunctions(pClasslibFunctions), m_nClasslibFunctions(nClasslibFunctions)
+{
+    int length;
+    m_pStaticsGCDataSection = (UInt8*)GetModuleSection(ReadyToRunSectionType::GCStaticRegion, &length);
+    m_pStaticsGCInfo = (StaticGcDesc*)GetModuleSection(ReadyToRunSectionType::GCStaticDesc, &length);
+    m_pThreadStaticsDataSection = (UInt8*)GetModuleSection(ReadyToRunSectionType::ThreadStaticRegion, &length);
+    m_pThreadStaticsGCInfo = (StaticGcDesc*)GetModuleSection(ReadyToRunSectionType::ThreadStaticGCDescRegion, &length);
+    m_pTlsIndex = (UInt32*)GetModuleSection(ReadyToRunSectionType::ThreadStaticIndex, &length);
+    m_pLoopHijackFlag = (UInt32*)GetModuleSection(ReadyToRunSectionType::LoopHijackFlag, &length);
+    m_pDispatchMapTable = (DispatchMap **)GetModuleSection(ReadyToRunSectionType::InterfaceDispatchTable, &length);
+}
+
+void * TypeManager::GetModuleSection(ReadyToRunSectionType sectionId, int * length)
+{
+    ModuleInfoRow * pModuleInfoRows = (ModuleInfoRow *)(m_pHeader + 1);
+
+    ASSERT(m_pHeader->EntrySize == sizeof(ModuleInfoRow));
+
+    // TODO: Binary search
+    for (int i = 0; i < m_pHeader->NumberOfSections; i++)
+    {
+        ModuleInfoRow * pCurrent = pModuleInfoRows + i;
+        if ((int32_t)sectionId == pCurrent->SectionId)
+        {
+            *length = pCurrent->GetLength();
+            return pCurrent->Start;
+        }
+    }
+
+    *length = 0;
+    return nullptr;
+}
+
+void * TypeManager::GetClasslibFunction(ClasslibFunctionId functionId)
+{
+    uint32_t id = (uint32_t)functionId;
+
+    if (id >= m_nClasslibFunctions)
+        return nullptr;
+
+    return m_pClasslibFunctions[id];
+}
+
+bool TypeManager::ModuleInfoRow::HasEndPointer()
+{
+    return Flags & (int32_t)ModuleInfoFlags::HasEndPointer;
+}
+
+int TypeManager::ModuleInfoRow::GetLength()
+{
+    if (HasEndPointer())
+    {
+        return (int)((UInt8*)End - (UInt8*)Start);
+    }
+    else
+    {
+        return sizeof(void*);
+    }
+}
+
+void TypeManager::EnumStaticGCRefsBlock(void * pfnCallback, void * pvCallbackData, StaticGcDesc* pStaticGcInfo)
+{
+    if (pStaticGcInfo == NULL)
+        return;
+
+    for (UInt32 idxSeries = 0; idxSeries < pStaticGcInfo->m_numSeries; idxSeries++)
+    {
+        PTR_StaticGcDescGCSeries pSeries = dac_cast<PTR_StaticGcDescGCSeries>(dac_cast<TADDR>(pStaticGcInfo) +
+            offsetof(StaticGcDesc, m_series) + (idxSeries * sizeof(StaticGcDesc::GCSeries)));
+
+        // The m_startOffset field is really 32-bit relocation (IMAGE_REL_BASED_RELPTR32) to the GC static base of the type
+        // the GCSeries is describing for. This makes it tolerable to the symbol sorting that the linker conducts.
+        PTR_RtuObjectRef    pRefLocation = dac_cast<PTR_RtuObjectRef>(dac_cast<PTR_UInt8>(&pSeries->m_startOffset) + (Int32)pSeries->m_startOffset);
+        UInt32              numObjects = pSeries->m_size;
+
+        RedhawkGCInterface::BulkEnumGcObjRef(pRefLocation, numObjects, pfnCallback, pvCallbackData);
+    }
+}
+
+void TypeManager::EnumThreadStaticGCRefsBlock(void * pfnCallback, void * pvCallbackData, StaticGcDesc* pStaticGcInfo, UInt8* pbThreadStaticData)
+{
+    if (pStaticGcInfo == NULL)
+        return;
+
+    for (UInt32 idxSeries = 0; idxSeries < pStaticGcInfo->m_numSeries; idxSeries++)
+    {
+        PTR_StaticGcDescGCSeries pSeries = dac_cast<PTR_StaticGcDescGCSeries>(dac_cast<TADDR>(pStaticGcInfo) +
+            offsetof(StaticGcDesc, m_series) + (idxSeries * sizeof(StaticGcDesc::GCSeries)));
+
+        // The m_startOffset field is really a 32-bit relocation (IMAGE_REL_SECREL) to the TLS section.
+        UInt8* pTlsObject = pbThreadStaticData + pSeries->m_startOffset;
+        PTR_RtuObjectRef    pRefLocation = dac_cast<PTR_RtuObjectRef>(pTlsObject);
+        UInt32              numObjects = pSeries->m_size;
+
+        RedhawkGCInterface::BulkEnumGcObjRef(pRefLocation, numObjects, pfnCallback, pvCallbackData);
+    }
+}
+
+void TypeManager::EnumStaticGCRefs(void * pfnCallback, void * pvCallbackData)
+{
+    // Regular statics.
+    EnumStaticGCRefsBlock(pfnCallback, pvCallbackData, m_pStaticsGCInfo);
+    
+    // Thread local statics.
+    if (m_pThreadStaticsGCInfo != NULL)
+    {
+        FOREACH_THREAD(pThread)
+        {
+            // To calculate the address of the data for each thread's TLS fields we need two values:
+            //  1) The TLS slot index allocated for this module by the OS loader. We keep a pointer to this
+            //     value in the module header.
+            //  2) The offset into the TLS block at which managed data begins. 
+            EnumThreadStaticGCRefsBlock(pfnCallback, pvCallbackData, m_pThreadStaticsGCInfo,
+                dac_cast<UInt8*>(pThread->GetThreadLocalStorage(*m_pTlsIndex, 0)));
+        }
+        END_FOREACH_THREAD
+    }
+}
+
+HANDLE TypeManager::GetOsModuleHandle()
+{
+    return m_osModule;
+}
+
+TypeManager* TypeManagerHandle::AsTypeManager()
+{
+    return (TypeManager*)_value;
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/TypeManager.h b/src/coreclr/src/nativeaot/Runtime/TypeManager.h
new file mode 100644
index 0000000000000..193a6cb3707ae
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/TypeManager.h
@@ -0,0 +1,76 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#pragma once
+#include "ModuleHeaders.h"
+#include "ICodeManager.h"
+
+struct StaticGcDesc;
+class DispatchMap;
+typedef unsigned char       UInt8;
+
+class TypeManager
+{
+    // NOTE: Part of this layout is a contract with the managed side in TypeManagerHandle.cs
+    HANDLE                      m_osModule;
+    ReadyToRunHeader *          m_pHeader;
+    DispatchMap**               m_pDispatchMapTable;
+    StaticGcDesc*               m_pStaticsGCInfo;
+    StaticGcDesc*               m_pThreadStaticsGCInfo;
+    UInt8*                      m_pStaticsGCDataSection;
+    UInt8*                      m_pThreadStaticsDataSection;
+    UInt32*                     m_pTlsIndex;  // Pointer to TLS index if this module uses thread statics 
+    void**                      m_pClasslibFunctions;
+    UInt32                      m_nClasslibFunctions;
+    UInt32*                     m_pLoopHijackFlag; 
+
+    TypeManager(HANDLE osModule, ReadyToRunHeader * pHeader, void** pClasslibFunctions, UInt32 nClasslibFunctions);
+
+public:
+    static TypeManager * Create(HANDLE osModule, void * pModuleHeader, void** pClasslibFunctions, UInt32 nClasslibFunctions);
+    void * GetModuleSection(ReadyToRunSectionType sectionId, int * length);
+    void EnumStaticGCRefs(void * pfnCallback, void * pvCallbackData);
+    HANDLE GetOsModuleHandle();
+    void* GetClasslibFunction(ClasslibFunctionId functionId);
+    UInt32* GetPointerToTlsIndex() { return m_pTlsIndex; }
+    void SetLoopHijackFlag(UInt32 flag) { if (m_pLoopHijackFlag != nullptr) *m_pLoopHijackFlag = flag; }
+
+private:
+    
+    struct ModuleInfoRow
+    {
+        int32_t SectionId;
+        int32_t Flags;
+        void * Start;
+        void * End;
+
+        bool HasEndPointer();
+        int GetLength();
+    };
+
+    void EnumStaticGCRefsBlock(void * pfnCallback, void * pvCallbackData, StaticGcDesc* pStaticGcInfo);
+    void EnumThreadStaticGCRefsBlock(void * pfnCallback, void * pvCallbackData, StaticGcDesc* pStaticGcInfo, UInt8* pbThreadStaticData);
+};
+
+// TypeManagerHandle represents an AOT module in MRT based runtimes.
+// These handles are a pointer to a TypeManager.
+struct TypeManagerHandle
+{
+    static TypeManagerHandle Null()
+    {
+        TypeManagerHandle handle;
+        handle._value = nullptr;
+        return handle;
+    }
+
+    static TypeManagerHandle Create(TypeManager * value)
+    {
+        TypeManagerHandle handle;
+        handle._value = value;
+        return handle;
+    }
+
+    void *_value;
+
+    TypeManager* AsTypeManager();
+};
+
diff --git a/src/coreclr/src/nativeaot/Runtime/UniversalTransitionHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/UniversalTransitionHelpers.cpp
new file mode 100644
index 0000000000000..377c5f8998582
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/UniversalTransitionHelpers.cpp
@@ -0,0 +1,67 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+
+#ifdef _DEBUG
+#define TRASH_SAVED_ARGUMENT_REGISTERS
+#endif
+
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+
+//
+// Define tables of predictable distinguished values that RhpUniversalTransition can use to
+// trash argument registers after they have been saved into the transition frame.
+//
+// Trashing these registers is a testability aid that makes it easier to detect bugs where
+// the transition frame content is not correctly propagated to the eventual callee.
+//
+// In the absence of trashing, such bugs can become undetectable if the code that
+// dispatches the call happens to never touch the impacted argument register (e.g., xmm3 on
+// amd64 or d5 on arm32). In such a case, the original enregistered argument will flow
+// unmodified into the eventual callee, obscuring the fact that the dispatcher failed to
+// propagate the transition frame copy of this register.
+//
+// These tables are manually aligned as a conservative safeguard to ensure that the
+// consumers can use arbitrary access widths without ever needing to worry about alignment.
+// The comments in each table show the %d/%f renderings of each 32-bit value, plus the
+// %I64d/%f rendering of the combined 64-bit value of each aligned pair of 32-bit values.
+//
+
+#define TRASH_VALUE_ALIGNMENT 16
+
+EXTERN_C
+DECLSPEC_ALIGN(TRASH_VALUE_ALIGNMENT)
+const UInt32 RhpIntegerTrashValues[] = {
+ // Lo32         Hi32               Lo32       Hi32        Hi32:Lo32
+ // -----------  -----------        ---------  ---------   ------------------
+    0x07801001U, 0x07802002U,   // (125833217, 125837314) (540467148372316161)
+    0x07803003U, 0x07804004U,   // (125841411, 125845508) (540502341334347779)
+    0x07805005U, 0x07806006U,   // (125849605, 125853702) (540537534296379397)
+    0x07807007U, 0x07808008U,   // (125857799, 125861896) (540572727258411015)
+    0x07809009U, 0x0780a00aU,   // (125865993, 125870090) (540607920220442633)
+    0x0780b00bU, 0x0780c00cU,   // (125874187, 125878284) (540643113182474251)
+    0x0780d00dU, 0x0780e00eU,   // (125882381, 125886478) (540678306144505869)
+    0x0780f00fU, 0x07810010U,   // (125890575, 125894672) (540713499106537487)
+};
+
+EXTERN_C
+DECLSPEC_ALIGN(TRASH_VALUE_ALIGNMENT)
+const UInt32 RhpFpTrashValues[] = {
+ // Lo32         Hi32               Lo32                 Hi32                  Hi32:Lo32
+ // -----------  -----------        -------------------  -------------------   -------------------
+    0x42001001U, 0x42002002U,   // (32.0156288146972660, 32.0312576293945310) (8657061952.00781440)
+    0x42003003U, 0x42004004U,   // (32.0468864440917970, 32.0625152587890630) (8724187200.02344320)
+    0x42005005U, 0x42006006U,   // (32.0781440734863280, 32.0937728881835940) (8791312448.03907200)
+    0x42007007U, 0x42008008U,   // (32.1094017028808590, 32.1250305175781250) (8858437696.05470090)
+    0x42009009U, 0x4200a00aU,   // (32.1406593322753910, 32.1562881469726560) (8925562944.07032970)
+    0x4200b00bU, 0x4200c00cU,   // (32.1719169616699220, 32.1875457763671880) (8992688192.08595850)
+    0x4200d00dU, 0x4200e00eU,   // (32.2031745910644530, 32.2188034057617190) (9059813440.10158730)
+    0x4200f00fU, 0x42010010U,   // (32.2344322204589840, 32.2500610351562500) (9126938688.11721610)
+};
+
+#endif // TRASH_SAVED_ARGUMENT_REGISTERS
+
diff --git a/src/coreclr/src/nativeaot/Runtime/allocheap.cpp b/src/coreclr/src/nativeaot/Runtime/allocheap.cpp
new file mode 100644
index 0000000000000..c5d1e9ede3fea
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/allocheap.cpp
@@ -0,0 +1,372 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "DebugMacrosExt.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "holder.h"
+#include "Crst.h"
+#include "Range.h"
+#ifdef FEATURE_RWX_MEMORY
+#include "memaccessmgr.h"
+#endif
+#include "allocheap.h"
+
+#include "CommonMacros.inl"
+#include "slist.inl"
+
+using namespace rh::util;
+
+//-------------------------------------------------------------------------------------------------
+AllocHeap::AllocHeap()
+    : m_blockList(),
+      m_rwProtectType(PAGE_READWRITE),
+      m_roProtectType(PAGE_READWRITE),
+#ifdef FEATURE_RWX_MEMORY
+      m_pAccessMgr(NULL),
+      m_hCurPageRW(),
+#endif // FEATURE_RWX_MEMORY
+      m_pNextFree(NULL),
+      m_pFreeCommitEnd(NULL),
+      m_pFreeReserveEnd(NULL),
+      m_pbInitialMem(NULL),
+      m_fShouldFreeInitialMem(false),
+      m_lock(CrstAllocHeap)
+      COMMA_INDEBUG(m_fIsInit(false))
+{
+    ASSERT(!_UseAccessManager());
+}
+
+#ifdef FEATURE_RWX_MEMORY
+//-------------------------------------------------------------------------------------------------
+AllocHeap::AllocHeap(
+    UInt32 rwProtectType,
+    UInt32 roProtectType,
+    MemAccessMgr* pAccessMgr)
+    : m_blockList(),
+      m_rwProtectType(rwProtectType),
+      m_roProtectType(roProtectType == 0 ? rwProtectType : roProtectType),
+      m_pAccessMgr(pAccessMgr),
+      m_hCurPageRW(),
+      m_pNextFree(NULL),
+      m_pFreeCommitEnd(NULL),
+      m_pFreeReserveEnd(NULL),
+      m_pbInitialMem(NULL),
+      m_fShouldFreeInitialMem(false),
+      m_lock(CrstAllocHeap)
+      COMMA_INDEBUG(m_fIsInit(false))
+{
+    ASSERT(!_UseAccessManager() || (m_rwProtectType != m_roProtectType && m_pAccessMgr != NULL));
+}
+#endif // FEATURE_RWX_MEMORY
+
+//-------------------------------------------------------------------------------------------------
+bool AllocHeap::Init()
+{
+    ASSERT(!m_fIsInit);
+    INDEBUG(m_fIsInit = true;)
+
+    return true;
+}
+
+//-------------------------------------------------------------------------------------------------
+// This is for using pre-allocated memory on heap construction.
+// Should never use this more than once, and should always follow construction of heap.
+
+bool AllocHeap::Init(
+    UInt8 *    pbInitialMem,
+    UIntNative cbInitialMemCommit,
+    UIntNative cbInitialMemReserve,
+    bool       fShouldFreeInitialMem)
+{
+    ASSERT(!m_fIsInit);
+
+#ifdef FEATURE_RWX_MEMORY
+    // Manage the committed portion of memory
+    if (_UseAccessManager())
+    {
+        m_pAccessMgr->ManageMemoryRange(MemRange(pbInitialMem, cbInitialMemCommit), true);
+    }
+#endif // FEATURE_RWX_MEMORY
+
+    BlockListElem *pBlock = new (nothrow) BlockListElem(pbInitialMem, cbInitialMemReserve);
+    if (pBlock == NULL)
+        return false;
+    m_blockList.PushHead(pBlock);
+
+    if (!_UpdateMemPtrs(pbInitialMem,
+                        pbInitialMem + cbInitialMemCommit,
+                        pbInitialMem + cbInitialMemReserve))
+    {
+        return false;
+    }
+
+    m_pbInitialMem = pbInitialMem;
+    m_fShouldFreeInitialMem = fShouldFreeInitialMem;
+
+    INDEBUG(m_fIsInit = true;)
+    return true;
+}
+
+//-------------------------------------------------------------------------------------------------
+AllocHeap::~AllocHeap()
+{
+    while (!m_blockList.IsEmpty())
+    {
+        BlockListElem *pCur = m_blockList.PopHead();
+        if (pCur->GetStart() != m_pbInitialMem || m_fShouldFreeInitialMem)
+            PalVirtualFree(pCur->GetStart(), pCur->GetLength(), MEM_RELEASE);
+        delete pCur;
+    }
+}
+
+//-------------------------------------------------------------------------------------------------
+UInt8 * AllocHeap::_Alloc(
+    UIntNative cbMem,
+    UIntNative alignment
+    WRITE_ACCESS_HOLDER_ARG
+    )
+{
+#ifndef FEATURE_RWX_MEMORY
+    const void* pRWAccessHolder = NULL;
+#endif // FEATURE_RWX_MEMORY
+
+    ASSERT((alignment & (alignment - 1)) == 0); // Power of 2 only.
+    ASSERT(alignment <= OS_PAGE_SIZE);          // Can't handle this right now.
+    ASSERT((m_rwProtectType == m_roProtectType) == (pRWAccessHolder == NULL));
+    ASSERT(!_UseAccessManager() || pRWAccessHolder != NULL);
+
+    if (_UseAccessManager() && pRWAccessHolder == NULL)
+        return NULL;
+
+    CrstHolder lock(&m_lock);
+
+    UInt8 * pbMem = _AllocFromCurBlock(cbMem, alignment PASS_WRITE_ACCESS_HOLDER_ARG);
+    if (pbMem != NULL)
+        return pbMem;
+
+    // Must allocate new block
+    if (!_AllocNewBlock(cbMem))
+        return NULL;
+
+    pbMem = _AllocFromCurBlock(cbMem, alignment PASS_WRITE_ACCESS_HOLDER_ARG);
+    ASSERT_MSG(pbMem != NULL, "AllocHeap::Alloc: failed to alloc mem after new block alloc");
+
+    return pbMem;
+}
+
+//-------------------------------------------------------------------------------------------------
+UInt8 * AllocHeap::Alloc(
+    UIntNative cbMem
+    WRITE_ACCESS_HOLDER_ARG)
+{
+    return _Alloc(cbMem, 1 PASS_WRITE_ACCESS_HOLDER_ARG);
+}
+
+//-------------------------------------------------------------------------------------------------
+UInt8 * AllocHeap::AllocAligned(
+    UIntNative cbMem,
+    UIntNative alignment
+    WRITE_ACCESS_HOLDER_ARG)
+{
+    return _Alloc(cbMem, alignment PASS_WRITE_ACCESS_HOLDER_ARG);
+}
+
+//-------------------------------------------------------------------------------------------------
+bool AllocHeap::Contains(void* pvMem, UIntNative cbMem)
+{
+    MemRange range(pvMem, cbMem);
+    for (BlockList::Iterator it = m_blockList.Begin(); it != m_blockList.End(); ++it)
+    {
+        if (it->Contains(range))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+#ifdef FEATURE_RWX_MEMORY
+//-------------------------------------------------------------------------------------------------
+bool AllocHeap::_AcquireWriteAccess(
+    UInt8* pvMem,
+    UIntNative cbMem,
+    WriteAccessHolder* pHolder)
+{
+    ASSERT(!_UseAccessManager() || m_pAccessMgr != NULL);
+
+    if (_UseAccessManager())
+        return m_pAccessMgr->AcquireWriteAccess(MemRange(pvMem, cbMem), m_hCurPageRW, pHolder);
+    else
+        return true;
+}
+
+//-------------------------------------------------------------------------------------------------
+bool AllocHeap::AcquireWriteAccess(
+    void* pvMem,
+    UIntNative cbMem,
+    WriteAccessHolder* pHolder)
+{
+    return _AcquireWriteAccess(static_cast<UInt8*>(pvMem), cbMem, pHolder);
+}
+#endif // FEATURE_RWX_MEMORY
+
+//-------------------------------------------------------------------------------------------------
+bool AllocHeap::_UpdateMemPtrs(UInt8* pNextFree, UInt8* pFreeCommitEnd, UInt8* pFreeReserveEnd)
+{
+    ASSERT(MemRange(pNextFree, pFreeReserveEnd).Contains(MemRange(pNextFree, pFreeCommitEnd)));
+    ASSERT(ALIGN_DOWN(pFreeCommitEnd, OS_PAGE_SIZE) == pFreeCommitEnd);
+    ASSERT(ALIGN_DOWN(pFreeReserveEnd, OS_PAGE_SIZE) == pFreeReserveEnd);
+
+#ifdef FEATURE_RWX_MEMORY
+    // See if we need to update current allocation holder or protect committed pages.
+    if (_UseAccessManager())
+    {
+        if (pFreeCommitEnd - pNextFree > 0)
+        {
+#ifndef STRESS_MEMACCESSMGR
+            // Create or update the alloc cache, used to speed up new allocations.
+            // If there is available commited memory and either m_pNextFree is
+            // being updated past a page boundary or the current cache is empty,
+            // then update the cache.
+            if (ALIGN_DOWN(m_pNextFree, OS_PAGE_SIZE) != ALIGN_DOWN(pNextFree, OS_PAGE_SIZE) ||
+                m_hCurPageRW.GetRange().GetLength() == 0)
+            {
+                // Update current alloc page write access holder.
+                if (!_AcquireWriteAccess(ALIGN_DOWN(pNextFree, OS_PAGE_SIZE),
+                                         OS_PAGE_SIZE,
+                                         &m_hCurPageRW))
+                {
+                    return false;
+                }
+            }
+#endif // STRESS_MEMACCESSMGR
+
+        }
+        else
+        {   // No available committed memory. Release the cache.
+            m_hCurPageRW.Release();
+        }
+    }
+#endif // FEATURE_RWX_MEMORY
+
+    m_pNextFree = pNextFree;
+    m_pFreeCommitEnd = pFreeCommitEnd;
+    m_pFreeReserveEnd = pFreeReserveEnd;
+    return true;
+}
+
+//-------------------------------------------------------------------------------------------------
+bool AllocHeap::_UpdateMemPtrs(UInt8* pNextFree, UInt8* pFreeCommitEnd)
+{
+    return _UpdateMemPtrs(pNextFree, pFreeCommitEnd, m_pFreeReserveEnd);
+}
+
+//-------------------------------------------------------------------------------------------------
+bool AllocHeap::_UpdateMemPtrs(UInt8* pNextFree)
+{
+    return _UpdateMemPtrs(pNextFree, m_pFreeCommitEnd);
+}
+
+//-------------------------------------------------------------------------------------------------
+bool AllocHeap::_AllocNewBlock(UIntNative cbMem)
+{
+    cbMem = ALIGN_UP(max(cbMem, s_minBlockSize), OS_PAGE_SIZE);;
+
+    UInt8 * pbMem = reinterpret_cast<UInt8*>
+        (PalVirtualAlloc(NULL, cbMem, MEM_COMMIT, m_roProtectType));
+
+    if (pbMem == NULL)
+        return false;
+
+    BlockListElem *pBlockListElem = new (nothrow) BlockListElem(pbMem, cbMem);
+    if (pBlockListElem == NULL)
+    {
+        PalVirtualFree(pbMem, 0, MEM_RELEASE);
+        return false;
+    }
+
+    // Add to the list. While there is no race for writers (we hold the lock) we have the
+    // possibility of simultaneous readers, and using the interlocked version creates a
+    // memory barrier to make sure any reader sees a consistent list.
+    m_blockList.PushHeadInterlocked(pBlockListElem);
+
+    return _UpdateMemPtrs(pbMem, pbMem + cbMem, pbMem + cbMem);
+}
+
+//-------------------------------------------------------------------------------------------------
+UInt8 * AllocHeap::_AllocFromCurBlock(
+    UIntNative cbMem,
+    UIntNative alignment
+    WRITE_ACCESS_HOLDER_ARG)
+{
+    UInt8 * pbMem = NULL;
+
+    cbMem += (UInt8 *)ALIGN_UP(m_pNextFree, alignment) - m_pNextFree;
+
+    if (m_pNextFree + cbMem <= m_pFreeCommitEnd ||
+        _CommitFromCurBlock(cbMem))
+    {
+        ASSERT(cbMem + m_pNextFree <= m_pFreeCommitEnd);
+#ifdef FEATURE_RWX_MEMORY
+        if (pRWAccessHolder != NULL)
+        {
+            if (!_AcquireWriteAccess(m_pNextFree, cbMem, pRWAccessHolder))
+                return NULL;
+        }
+#endif // FEATURE_RWX_MEMORY
+        pbMem = ALIGN_UP(m_pNextFree, alignment);
+
+        if (!_UpdateMemPtrs(m_pNextFree + cbMem))
+            return NULL;
+    }
+
+    return pbMem;
+}
+
+//-------------------------------------------------------------------------------------------------
+bool AllocHeap::_CommitFromCurBlock(UIntNative cbMem)
+{
+    ASSERT(m_pFreeCommitEnd < m_pNextFree + cbMem);
+
+    if (m_pNextFree + cbMem <= m_pFreeReserveEnd)
+    {
+        UIntNative cbMemToCommit = ALIGN_UP(cbMem, OS_PAGE_SIZE);
+
+#ifdef FEATURE_RWX_MEMORY
+        if (_UseAccessManager())
+        {
+            if (!m_pAccessMgr->ManageMemoryRange(MemRange(m_pFreeCommitEnd, cbMemToCommit), false))
+                return false;
+        }
+        else
+        {
+            UInt32 oldProtectType;
+            if (!PalVirtualProtect(m_pFreeCommitEnd, cbMemToCommit, m_roProtectType, &oldProtectType))
+                return false;
+        }
+#endif // FEATURE_RWX_MEMORY
+
+        return _UpdateMemPtrs(m_pNextFree, m_pFreeCommitEnd + cbMemToCommit);
+    }
+
+    return false;
+}
+
+//-------------------------------------------------------------------------------------------------
+void * __cdecl operator new(size_t n, AllocHeap * alloc)
+{
+    return alloc->Alloc(n);
+}
+
+//-------------------------------------------------------------------------------------------------
+void * __cdecl operator new[](size_t n, AllocHeap * alloc)
+{
+    return alloc->Alloc(n);
+}
+
diff --git a/src/coreclr/src/nativeaot/Runtime/allocheap.h b/src/coreclr/src/nativeaot/Runtime/allocheap.h
new file mode 100644
index 0000000000000..171d63109b557
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/allocheap.h
@@ -0,0 +1,122 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "forward_declarations.h"
+
+#ifdef FEATURE_RWX_MEMORY
+#define WRITE_ACCESS_HOLDER_ARG                 , rh::util::WriteAccessHolder *pRWAccessHolder
+#define WRITE_ACCESS_HOLDER_ARG_NULL_DEFAULT    , rh::util::WriteAccessHolder *pRWAccessHolder = NULL
+#define PASS_WRITE_ACCESS_HOLDER_ARG            , pRWAccessHolder
+#else // FEATURE_RWX_MEMORY
+#define WRITE_ACCESS_HOLDER_ARG
+#define WRITE_ACCESS_HOLDER_ARG_NULL_DEFAULT
+#define PASS_WRITE_ACCESS_HOLDER_ARG
+#endif // FEATURE_RWX_MEMORY
+
+class AllocHeap
+{
+  public:
+    AllocHeap();
+
+#ifdef FEATURE_RWX_MEMORY
+    // If pAccessMgr is non-NULL, it will be used to manage R/W access to the memory allocated.
+    AllocHeap(UInt32 rwProtectType = PAGE_READWRITE,
+              UInt32 roProtectType = 0, // 0 indicates "same as rwProtectType"
+              rh::util::MemAccessMgr* pAccessMgr = NULL);
+#endif // FEATURE_RWX_MEMORY
+
+    bool Init();
+
+    bool Init(UInt8 *    pbInitialMem,
+              UIntNative cbInitialMemCommit,
+              UIntNative cbInitialMemReserve,
+              bool       fShouldFreeInitialMem);
+
+    ~AllocHeap();
+
+    // If AllocHeap was created with a MemAccessMgr, pRWAccessHolder must be non-NULL.
+    // On return, the holder will permit R/W access to the allocated memory until it
+    // is destructed.
+    UInt8 * Alloc(UIntNative cbMem WRITE_ACCESS_HOLDER_ARG_NULL_DEFAULT);
+
+    // If AllocHeap was created with a MemAccessMgr, pRWAccessHolder must be non-NULL.
+    // On return, the holder will permit R/W access to the allocated memory until it
+    // is destructed.
+    UInt8 * AllocAligned(UIntNative cbMem,
+                         UIntNative alignment
+                         WRITE_ACCESS_HOLDER_ARG_NULL_DEFAULT);
+
+    // Returns true if this AllocHeap owns the memory range [pvMem, pvMem+cbMem)
+    bool Contains(void * pvMem,
+                  UIntNative cbMem);
+
+#ifdef FEATURE_RWX_MEMORY
+    // Used with previously-allocated memory for which RW access is needed again.
+    // Returns true on success. R/W access will be granted until the holder is
+    // destructed.
+    bool AcquireWriteAccess(void* pvMem,
+                            UIntNative cbMem,
+                            rh::util::WriteAccessHolder* pHolder);
+#endif // FEATURE_RWX_MEMORY
+
+  private:
+    // Allocation Helpers
+    UInt8* _Alloc(UIntNative cbMem, UIntNative alignment WRITE_ACCESS_HOLDER_ARG);
+    bool _AllocNewBlock(UIntNative cbMem);
+    UInt8* _AllocFromCurBlock(UIntNative cbMem, UIntNative alignment WRITE_ACCESS_HOLDER_ARG);
+    bool _CommitFromCurBlock(UIntNative cbMem);
+
+    // Access protection helpers
+#ifdef FEATURE_RWX_MEMORY
+    bool _AcquireWriteAccess(UInt8* pvMem, UIntNative cbMem, rh::util::WriteAccessHolder* pHolder);
+#endif // FEATURE_RWX_MEMORY
+    bool _UpdateMemPtrs(UInt8* pNextFree, UInt8* pFreeCommitEnd, UInt8* pFreeReserveEnd);
+    bool _UpdateMemPtrs(UInt8* pNextFree, UInt8* pFreeCommitEnd);
+    bool _UpdateMemPtrs(UInt8* pNextFree);
+    bool _UseAccessManager() { return m_rwProtectType != m_roProtectType; }
+
+    static const UIntNative s_minBlockSize = OS_PAGE_SIZE;
+
+    typedef rh::util::MemRange Block;
+    typedef DPTR(Block) PTR_Block;
+    struct BlockListElem : public Block
+    {
+        BlockListElem(Block const & block)
+            : Block(block)
+            {}
+
+        BlockListElem(UInt8 * pbMem, UIntNative  cbMem)
+            : Block(pbMem, cbMem)
+            {}
+
+        Block       m_block;
+        PTR_Block   m_pNext;
+    };
+
+    typedef SList<BlockListElem>    BlockList;
+    BlockList                       m_blockList;
+
+    UInt32                          m_rwProtectType; // READ/WRITE/EXECUTE/etc
+    UInt32                          m_roProtectType; // What to do with fully allocated and initialized pages.
+
+#ifdef FEATURE_RWX_MEMORY
+    rh::util::MemAccessMgr*         m_pAccessMgr;
+    rh::util::WriteAccessHolder     m_hCurPageRW;   // Used to hold RW access to the current allocation page
+                                                    // Passed as pHint to MemAccessMgr::AcquireWriteAccess.
+#endif // FEATURE_RWX_MEMORY
+    UInt8 *                         m_pNextFree;
+    UInt8 *                         m_pFreeCommitEnd;
+    UInt8 *                         m_pFreeReserveEnd;
+
+    UInt8 *                         m_pbInitialMem;
+    bool                            m_fShouldFreeInitialMem;
+
+    Crst                            m_lock;
+
+    INDEBUG(bool                    m_fIsInit;)
+};
+typedef DPTR(AllocHeap) PTR_AllocHeap;
+
+//-------------------------------------------------------------------------------------------------
+void * __cdecl operator new(size_t n, AllocHeap * alloc);
+void * __cdecl operator new[](size_t n, AllocHeap * alloc);
+
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/AllocFast.S b/src/coreclr/src/nativeaot/Runtime/amd64/AllocFast.S
new file mode 100644
index 0000000000000..c1098e78245d1
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/AllocFast.S
@@ -0,0 +1,343 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+// allocation context then automatically fallback to the slow allocation path.
+//  RDI == EEType
+NESTED_ENTRY RhpNewFast, _TEXT, NoHandler
+        push_nonvol_reg rbx
+        mov         rbx, rdi
+
+        // rax = GetThread()
+        INLINE_GETTHREAD
+
+        //
+        // rbx contains EEType pointer
+        //
+        mov         edx, [rbx + OFFSETOF__EEType__m_uBaseSize]
+
+        //
+        // rax: Thread pointer
+        // rbx: EEType pointer
+        // rdx: base size
+        //
+
+        mov         rsi, [rax + OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        add         rdx, rsi
+        cmp         rdx, [rax + OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        ja          LOCAL_LABEL(RhpNewFast_RarePath)
+
+        // set the new alloc pointer
+        mov         [rax + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rdx
+
+        mov         rax, rsi
+
+        // set the new object's EEType pointer
+        mov         [rsi], rbx
+
+        .cfi_remember_state
+        pop_nonvol_reg rbx
+        ret
+
+        .cfi_restore_state
+        .cfi_def_cfa_offset 16          // workaround cfi_restore_state bug
+LOCAL_LABEL(RhpNewFast_RarePath):
+        mov         rdi, rbx            // restore EEType
+        xor         esi, esi
+        pop_nonvol_reg rbx
+        jmp         C_FUNC(RhpNewObject)
+
+NESTED_END RhpNewFast, _TEXT
+
+
+
+// Allocate non-array object with finalizer
+//  RDI == EEType
+LEAF_ENTRY RhpNewFinalizable, _TEXT
+        mov         esi, GC_ALLOC_FINALIZE
+        jmp         C_FUNC(RhpNewObject)
+LEAF_END RhpNewFinalizable, _TEXT
+
+
+
+// Allocate non-array object
+//  RDI == EEType
+//  ESI == alloc flags
+NESTED_ENTRY RhpNewObject, _TEXT, NoHandler
+
+        PUSH_COOP_PINVOKE_FRAME rcx
+        END_PROLOGUE
+
+        // RCX: transition frame
+
+        // Preserve the EEType in RBX
+        mov         rbx, rdi
+
+        mov         edx, [rdi + OFFSETOF__EEType__m_uBaseSize]      // cbSize
+
+        // Call the rest of the allocation helper.
+        // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        call        C_FUNC(RhpGcAlloc)
+
+        // Set the new object's EEType pointer on success.
+        test        rax, rax
+        jz          LOCAL_LABEL(NewOutOfMemory)
+        mov         [rax + OFFSETOF__Object__m_pEEType], rbx
+
+        // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        mov         esi, [rbx + OFFSETOF__EEType__m_uBaseSize]
+.att_syntax
+        cmp         $RH_LARGE_OBJECT_SIZE, %rsi
+.intel_syntax noprefix
+        jb          LOCAL_LABEL(New_SkipPublish)
+        mov         rdi, rax            // rdi: object
+                                        // rsi: already contains object size
+        call        C_FUNC(RhpPublishObject) // rax: this function returns the object that was passed-in
+LOCAL_LABEL(New_SkipPublish):
+
+        .cfi_remember_state
+        POP_COOP_PINVOKE_FRAME
+        ret
+
+        .cfi_restore_state
+        .cfi_def_cfa_offset 96          // workaround cfi_restore_state bug
+LOCAL_LABEL(NewOutOfMemory):
+        // This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mov         rdi, rbx            // EEType pointer
+        xor         esi, esi            // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        jmp         EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation)
+NESTED_END RhpNewObject, _TEXT
+
+
+// Allocate a string.
+//  RDI == EEType
+//  ESI == character/element count
+NESTED_ENTRY RhNewString, _TEXT, NoHandler
+        // we want to limit the element count to the non-negative 32-bit int range
+        cmp         rsi, MAX_STRING_LENGTH
+        ja          LOCAL_LABEL(StringSizeOverflow)
+
+        push_nonvol_reg rbx
+        push_nonvol_reg r12
+        push_register rcx       // padding
+
+        mov         rbx, rdi    // save EEType
+        mov         r12, rsi    // save element count
+
+        // rax = GetThread()
+        INLINE_GETTHREAD
+
+        mov         rcx, rax    // rcx = Thread*
+
+        // Compute overall allocation size (align(base size + (element size * elements), 8)).
+        lea         rax, [r12 * STRING_COMPONENT_SIZE + STRING_BASE_SIZE + 7]
+        and         rax, -8
+
+        // rax == string size
+        // rbx == EEType
+        // rcx == Thread*
+        // r12 == element count
+
+        mov         rdx, rax
+        add         rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        jc          LOCAL_LABEL(RhNewString_RarePath)
+
+        // rax == new alloc ptr
+        // rbx == EEType
+        // rcx == Thread*
+        // rdx == string size
+        // r12 == element count
+        cmp         rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        ja          LOCAL_LABEL(RhNewString_RarePath)
+
+        mov         [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax
+
+        // calc the new object pointer
+        sub         rax, rdx
+
+        mov         [rax + OFFSETOF__Object__m_pEEType], rbx
+        mov         [rax + OFFSETOF__String__m_Length], r12d
+
+        .cfi_remember_state
+        pop_register rcx       // padding
+        pop_nonvol_reg r12
+        pop_nonvol_reg rbx
+        ret
+
+        .cfi_restore_state
+        .cfi_def_cfa_offset 32  // workaround cfi_restore_state bug
+LOCAL_LABEL(RhNewString_RarePath):
+        mov         rdi, rbx    // restore EEType
+        mov         rsi, r12    // restore element count
+        // passing string size in rdx
+
+        pop_register rcx        // padding
+        pop_nonvol_reg r12
+        pop_nonvol_reg rbx
+        jmp C_FUNC(RhpNewArrayRare)
+
+LOCAL_LABEL(StringSizeOverflow):
+        // We get here if the size of the final string object can't be represented as an unsigned 
+        // 32-bit value. We're going to tail-call to a managed helper that will throw
+        // an OOM exception that the caller of this allocator understands.
+
+        // rdi holds EEType pointer already
+        xor         esi, esi            // Indicate that we should throw OOM.
+        jmp         EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation)
+
+NESTED_END RhNewString, _TEXT
+
+
+// Allocate one dimensional, zero based array (SZARRAY).
+//  RDI == EEType
+//  ESI == element count
+NESTED_ENTRY RhpNewArray, _TEXT, NoHandler
+        // we want to limit the element count to the non-negative 32-bit int range
+        cmp         rsi, 0x07fffffff
+        ja          LOCAL_LABEL(ArraySizeOverflow)
+
+        push_nonvol_reg rbx
+        push_nonvol_reg r12
+        push_register rcx       // padding
+
+        mov         rbx, rdi    // save EEType
+        mov         r12, rsi    // save element count
+
+        // rax = GetThread()
+        INLINE_GETTHREAD
+
+        mov         rcx, rax    // rcx = Thread*
+
+        // Compute overall allocation size (align(base size + (element size * elements), 8)).
+        movzx       eax, word ptr [rbx + OFFSETOF__EEType__m_usComponentSize]
+        mul         r12
+        mov         edx, [rbx + OFFSETOF__EEType__m_uBaseSize]
+        add         rax, rdx
+        add         rax, 7
+        and         rax, -8
+
+        // rax == array size
+        // rbx == EEType
+        // rcx == Thread*
+        // r12 == element count
+
+        mov         rdx, rax
+        add         rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        jc          LOCAL_LABEL(RhpNewArray_RarePath)
+
+        // rax == new alloc ptr
+        // rbx == EEType
+        // rcx == Thread*
+        // rdx == array size
+        // r12 == element count
+        cmp         rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        ja          LOCAL_LABEL(RhpNewArray_RarePath)
+
+        mov         [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax
+
+        // calc the new object pointer
+        sub         rax, rdx
+
+        mov         [rax + OFFSETOF__Object__m_pEEType], rbx
+        mov         [rax + OFFSETOF__Array__m_Length], r12d
+
+        .cfi_remember_state
+        pop_register rcx       // padding
+        pop_nonvol_reg r12
+        pop_nonvol_reg rbx
+        ret
+
+        .cfi_restore_state
+        .cfi_def_cfa_offset 32  // workaround cfi_restore_state bug
+LOCAL_LABEL(RhpNewArray_RarePath):
+        mov         rdi, rbx    // restore EEType
+        mov         rsi, r12    // restore element count
+        // passing array size in rdx
+
+        pop_register rcx        // padding
+        pop_nonvol_reg r12
+        pop_nonvol_reg rbx
+        jmp C_FUNC(RhpNewArrayRare)
+
+LOCAL_LABEL(ArraySizeOverflow):
+        // We get here if the size of the final array object can't be represented as an unsigned 
+        // 32-bit value. We're going to tail-call to a managed helper that will throw
+        // an overflow exception that the caller of this allocator understands.
+
+        // rdi holds EEType pointer already
+        mov         esi, 1              // Indicate that we should throw OverflowException
+        jmp         EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation)
+
+NESTED_END RhpNewArray, _TEXT
+
+NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler
+
+        // rdi == EEType
+        // rsi == element count
+        // rdx == array size
+
+        PUSH_COOP_PINVOKE_FRAME rcx
+        END_PROLOGUE
+
+        // rcx: transition frame
+
+        // Preserve the EEType in RBX
+        mov         rbx, rdi
+        // Preserve the element count in R12
+        mov         r12, rsi
+        // Preserve the size in R13
+        mov         r13, rdx
+
+        // passing EEType in rdi
+        xor         rsi, rsi        // uFlags
+        // pasing size in rdx
+        // pasing pTransitionFrame in rcx
+
+        // Call the rest of the allocation helper.
+        // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        call        C_FUNC(RhpGcAlloc)
+
+        // Set the new object's EEType pointer and length on success.
+        test        rax, rax
+        jz          LOCAL_LABEL(ArrayOutOfMemory)
+        mov         [rax + OFFSETOF__Object__m_pEEType], rbx
+        mov         [rax + OFFSETOF__Array__m_Length], r12d
+
+        // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+.att_syntax
+        cmp         $RH_LARGE_OBJECT_SIZE, %r13
+.intel_syntax noprefix
+        jb          LOCAL_LABEL(NewArray_SkipPublish)
+        mov         rdi, rax            // rcx: object
+        mov         rsi, r13            // rdx: object size
+        call        C_FUNC(RhpPublishObject) // rax: this function returns the object that was passed-in
+LOCAL_LABEL(NewArray_SkipPublish):
+
+        .cfi_remember_state
+        POP_COOP_PINVOKE_FRAME
+        ret
+
+        .cfi_restore_state
+        .cfi_def_cfa_offset 96          // workaround cfi_restore_state bug
+LOCAL_LABEL(ArrayOutOfMemory):
+        // This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mov         rdi, rbx            // EEType pointer
+        xor         esi, esi            // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        jmp         EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation)
+
+NESTED_END RhpNewArrayRare, _TEXT
+
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/AllocFast.asm b/src/coreclr/src/nativeaot/Runtime/amd64/AllocFast.asm
new file mode 100644
index 0000000000000..bd73a8ffe711f
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/AllocFast.asm
@@ -0,0 +1,274 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include asmmacros.inc
+
+
+;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+;; allocation context then automatically fallback to the slow allocation path.
+;;  RCX == EEType
+LEAF_ENTRY RhpNewFast, _TEXT
+
+        ;; rdx = GetThread(), TRASHES rax
+        INLINE_GETTHREAD rdx, rax
+
+        ;;
+        ;; rcx contains EEType pointer
+        ;;
+        mov         r8d, [rcx + OFFSETOF__EEType__m_uBaseSize]
+
+        ;;
+        ;; eax: base size
+        ;; rcx: EEType pointer
+        ;; rdx: Thread pointer
+        ;;
+
+        mov         rax, [rdx + OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        add         r8, rax
+        cmp         r8, [rdx + OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        ja          RhpNewFast_RarePath
+
+        ;; set the new alloc pointer
+        mov         [rdx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], r8
+
+        ;; set the new object's EEType pointer
+        mov         [rax], rcx
+        ret
+
+RhpNewFast_RarePath:
+        xor         edx, edx
+        jmp         RhpNewObject
+
+LEAF_END RhpNewFast, _TEXT
+
+
+
+;; Allocate non-array object with finalizer
+;;  RCX == EEType
+LEAF_ENTRY RhpNewFinalizable, _TEXT
+        mov         edx, GC_ALLOC_FINALIZE
+        jmp         RhpNewObject
+LEAF_END RhpNewFinalizable, _TEXT
+
+
+
+;; Allocate non-array object
+;;  RCX == EEType
+;;  EDX == alloc flags
+NESTED_ENTRY RhpNewObject, _TEXT
+
+        PUSH_COOP_PINVOKE_FRAME r9
+        END_PROLOGUE
+
+        ; R9: transition frame
+
+        ;; Preserve the EEType in RSI
+        mov         rsi, rcx
+
+        mov         r8d, [rsi + OFFSETOF__EEType__m_uBaseSize]      ; cbSize
+
+        ;; Call the rest of the allocation helper.
+        ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        call        RhpGcAlloc
+
+        ;; Set the new object's EEType pointer on success.
+        test        rax, rax
+        jz          NewOutOfMemory
+        mov         [rax + OFFSETOF__Object__m_pEEType], rsi
+
+        ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        mov         edx, [rsi + OFFSETOF__EEType__m_uBaseSize]
+        cmp         rdx, RH_LARGE_OBJECT_SIZE
+        jb          New_SkipPublish
+        mov         rcx, rax            ;; rcx: object
+                                        ;; rdx: already contains object size
+        call        RhpPublishObject    ;; rax: this function returns the object that was passed-in
+New_SkipPublish: 
+
+        POP_COOP_PINVOKE_FRAME
+        ret
+
+NewOutOfMemory:
+        ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         rcx, rsi            ; EEType pointer
+        xor         edx, edx            ; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        jmp         RhExceptionHandling_FailedAllocation
+NESTED_END RhpNewObject, _TEXT
+
+
+;; Allocate a string.
+;;  RCX == EEType
+;;  EDX == character/element count
+LEAF_ENTRY RhNewString, _TEXT
+
+        ; we want to limit the element count to the non-negative 32-bit int range
+        cmp         rdx, MAX_STRING_LENGTH
+        ja          StringSizeOverflow
+
+        ; Compute overall allocation size (align(base size + (element size * elements), 8)).
+        lea         rax, [(rdx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 7)]
+        and         rax, -8
+
+        ; rax == string size
+        ; rcx == EEType
+        ; rdx == element count
+
+        INLINE_GETTHREAD r10, r8
+
+        mov         r8, rax
+        add         rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        jc          RhpNewArrayRare
+
+        ; rax == new alloc ptr
+        ; rcx == EEType
+        ; rdx == element count
+        ; r8 == array size
+        ; r10 == thread
+        cmp         rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        ja          RhpNewArrayRare
+
+        mov         [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax
+
+        ; calc the new object pointer
+        sub         rax, r8
+
+        mov         [rax + OFFSETOF__Object__m_pEEType], rcx
+        mov         [rax + OFFSETOF__String__m_Length], edx
+
+        ret
+
+StringSizeOverflow:
+        ; We get here if the size of the final string object can't be represented as an unsigned 
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an OOM exception that the caller of this allocator understands.
+
+        ; rcx holds EEType pointer already
+        xor         edx, edx            ; Indicate that we should throw OOM.
+        jmp         RhExceptionHandling_FailedAllocation
+LEAF_END RhNewString, _TEXT
+
+
+;; Allocate one dimensional, zero based array (SZARRAY).
+;;  RCX == EEType
+;;  EDX == element count
+LEAF_ENTRY RhpNewArray, _TEXT
+
+        ; we want to limit the element count to the non-negative 32-bit int range
+        cmp         rdx, 07fffffffh
+        ja          ArraySizeOverflow
+
+        ; save element count
+        mov         r8, rdx
+
+        ; Compute overall allocation size (align(base size + (element size * elements), 8)).
+        movzx       eax, word ptr [rcx + OFFSETOF__EEType__m_usComponentSize]
+        mul         rdx
+        mov         edx, [rcx + OFFSETOF__EEType__m_uBaseSize]
+        add         rax, rdx
+        add         rax, 7
+        and         rax, -8
+
+        mov         rdx, r8
+
+        ; rax == array size
+        ; rcx == EEType
+        ; rdx == element count
+
+        INLINE_GETTHREAD r10, r8
+
+        mov         r8, rax
+        add         rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        jc          RhpNewArrayRare
+
+        ; rax == new alloc ptr
+        ; rcx == EEType
+        ; rdx == element count
+        ; r8 == array size
+        ; r10 == thread
+        cmp         rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        ja          RhpNewArrayRare
+
+        mov         [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax
+
+        ; calc the new object pointer
+        sub         rax, r8
+
+        mov         [rax + OFFSETOF__Object__m_pEEType], rcx
+        mov         [rax + OFFSETOF__Array__m_Length], edx
+
+        ret
+
+ArraySizeOverflow:
+        ; We get here if the size of the final array object can't be represented as an unsigned 
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an overflow exception that the caller of this allocator understands.
+
+        ; rcx holds EEType pointer already
+        mov         edx, 1              ; Indicate that we should throw OverflowException
+        jmp         RhExceptionHandling_FailedAllocation
+LEAF_END RhpNewArray, _TEXT
+
+NESTED_ENTRY RhpNewArrayRare, _TEXT
+
+        ; rcx == EEType
+        ; rdx == element count
+        ; r8 == array size
+
+        PUSH_COOP_PINVOKE_FRAME r9
+        END_PROLOGUE
+
+        ; r9: transition frame
+
+        ; Preserve the EEType in RSI
+        mov         rsi, rcx
+        ; Preserve the element count in RBX
+        mov         rbx, rdx
+        ; Preserve the size in RDI
+        mov         rdi, r8
+
+        ; passing EEType in rcx
+        xor         rdx, rdx        ; uFlags
+        ; pasing size in r8
+        ; pasing pTransitionFrame in r9
+
+        ; Call the rest of the allocation helper.
+        ; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        call        RhpGcAlloc
+
+        ; Set the new object's EEType pointer and length on success.
+        test        rax, rax
+        jz          ArrayOutOfMemory
+        mov         [rax + OFFSETOF__Object__m_pEEType], rsi
+        mov         [rax + OFFSETOF__Array__m_Length], ebx
+
+        ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        cmp         rdi, RH_LARGE_OBJECT_SIZE
+        jb          NewArray_SkipPublish
+        mov         rcx, rax            ;; rcx: object
+        mov         rdx, rdi            ;; rdx: object size
+        call        RhpPublishObject    ;; rax: this function returns the object that was passed-in
+NewArray_SkipPublish: 
+
+        POP_COOP_PINVOKE_FRAME
+        ret
+
+ArrayOutOfMemory:     
+        ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         rcx, rsi            ; EEType pointer
+        xor         edx, edx            ; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        jmp         RhExceptionHandling_FailedAllocation
+
+NESTED_END RhpNewArrayRare, _TEXT
+
+
+        END
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/AsmMacros.inc b/src/coreclr/src/nativeaot/Runtime/amd64/AsmMacros.inc
new file mode 100644
index 0000000000000..b20a6ba897105
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/AsmMacros.inc
@@ -0,0 +1,418 @@
+;;
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+;;
+
+include AsmOffsets.inc      ; generated by the build from AsmOffsets.cpp
+
+;;
+;; MACROS
+;;
+
+;
+; Define macros to build unwind data for prologues.
+;
+
+push_nonvol_reg macro Reg
+
+        .errnz ___STACK_ADJUSTMENT_FORBIDDEN, <push_nonvol_reg cannot be used after save_reg_postrsp>
+
+        push    Reg
+        .pushreg Reg
+
+        endm
+
+push_vol_reg macro Reg
+
+        .errnz ___STACK_ADJUSTMENT_FORBIDDEN, push_vol_reg cannot be used after save_reg_postrsp
+
+        push    Reg
+        .allocstack 8
+
+        endm
+
+push_imm macro imm
+
+        .errnz ___STACK_ADJUSTMENT_FORBIDDEN, push_vol_reg cannot be used after save_reg_postrsp
+
+        push    imm
+        .allocstack 8
+
+        endm
+
+push_eflags macro
+
+        .errnz ___STACK_ADJUSTMENT_FORBIDDEN, push_eflags cannot be used after save_reg_postrsp
+
+        pushfq
+        .allocstack 8
+
+        endm
+
+alloc_stack macro Size
+
+        .errnz ___STACK_ADJUSTMENT_FORBIDDEN, alloc_stack cannot be used after save_reg_postrsp
+
+        sub     rsp, Size
+        .allocstack Size
+
+        endm
+
+save_reg_frame macro Reg, FrameReg, Offset
+
+        .erre ___FRAME_REG_SET, save_reg_frame cannot be used before set_frame
+
+        mov     Offset[FrameReg], Reg
+        .savereg Reg, Offset
+
+        endm
+
+save_reg_postrsp macro Reg, Offset
+
+        .errnz ___FRAME_REG_SET, save_reg_postrsp cannot be used after set_frame
+
+        mov     Offset[rsp], Reg
+        .savereg Reg, Offset
+
+        ___STACK_ADJUSTMENT_FORBIDDEN = 1
+
+        endm
+
+save_xmm128_frame macro Reg, FrameReg, Offset
+
+        .erre ___FRAME_REG_SET, save_xmm128_frame cannot be used before set_frame
+
+        movdqa  Offset[FrameReg], Reg
+        .savexmm128 Reg, Offset
+
+        endm
+
+save_xmm128_postrsp macro Reg, Offset
+
+        .errnz ___FRAME_REG_SET, save_reg_postrsp cannot be used after set_frame
+
+        movdqa  Offset[rsp], Reg
+        .savexmm128 Reg, Offset
+    
+        ___STACK_ADJUSTMENT_FORBIDDEN = 1
+
+        endm
+
+set_frame macro Reg, Offset
+
+        .errnz ___FRAME_REG_SET, set_frame cannot be used more than once
+
+if Offset
+
+        lea     Reg, Offset[rsp]
+
+else
+
+        mov     reg, rsp
+
+endif
+
+        .setframe Reg, Offset
+        ___FRAME_REG_SET = 1
+
+        endm
+
+END_PROLOGUE macro
+
+        .endprolog
+
+        endm
+
+;
+; Define function entry/end macros.
+;
+
+LEAF_ENTRY macro Name, Section
+
+Section segment para 'CODE'
+
+        align   16
+
+        public  Name
+Name    proc
+
+        endm
+
+LEAF_END macro Name, section
+
+Name    endp
+
+Section ends
+
+        endm
+
+LEAF_END_MARKED macro Name, section
+        public Name&_End
+Name&_End label qword
+        ; this nop is important to keep the label in 
+        ; the right place in the face of BBT
+        nop
+        
+Name    endp
+
+Section ends
+
+        endm
+
+
+NESTED_ENTRY macro Name, Section, Handler
+
+Section segment para 'CODE'
+
+        align   16
+
+        public  Name
+
+ifb <Handler>
+
+Name    proc    frame
+
+else
+
+Name    proc    frame:Handler
+
+endif
+
+        ___FRAME_REG_SET = 0
+        ___STACK_ADJUSTMENT_FORBIDDEN = 0
+
+        endm
+
+NESTED_END macro Name, section
+
+Name    endp
+
+Section ends
+
+        endm
+
+NESTED_END_MARKED macro Name, section
+        public Name&_End
+Name&_End label qword
+
+Name    endp
+
+Section ends
+
+        endm
+
+
+ALTERNATE_ENTRY macro Name
+
+Name label proc
+PUBLIC Name
+        endm
+
+LABELED_RETURN_ADDRESS macro Name
+
+Name label proc
+PUBLIC Name
+        endm
+
+EXPORT_POINTER_TO_ADDRESS macro Name
+
+        local   AddressToExport
+
+AddressToExport  label   proc
+
+        .const
+
+        align   8
+
+Name    dq      offset AddressToExport
+
+        public  Name
+
+        .code
+
+        endm
+
+_tls_array     equ 58h     ;; offsetof(TEB, ThreadLocalStoragePointer)
+
+;;
+;; __declspec(thread) version
+;;
+INLINE_GETTHREAD macro destReg, trashReg
+    EXTERN _tls_index : DWORD
+    EXTERN tls_CurrentThread:DWORD
+
+;;
+;; construct 'eax' from 'rax' so that the register size and data size match
+;;
+;; BEWARE: currently only r10 is allowed as destReg from the r8-r15 set.
+;;
+ifidni <destReg>, <r10>
+destRegDWORD EQU r10d
+else
+destRegDWORD TEXTEQU @CatStr( e, @SubStr( destReg, 2, 2 ) )
+endif
+
+    mov         destRegDWORD, [_tls_index]
+    mov         trashReg, gs:[_tls_array]
+    mov         trashReg, [trashReg + destReg * 8]
+    mov         destRegDWORD, SECTIONREL tls_CurrentThread
+    add         destReg, trashReg
+
+endm
+
+INLINE_THREAD_UNHIJACK macro threadReg, trashReg1, trashReg2
+        ;;
+        ;; Thread::Unhijack()
+        ;;
+        mov         trashReg1, [threadReg + OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        cmp         trashReg1, 0
+        je          @F
+
+        mov         trashReg2, [threadReg + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        mov         [trashReg2], trashReg1
+        mov         qword ptr [threadReg + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0
+        mov         qword ptr [threadReg + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0
+
+@@:
+endm
+
+DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP
+
+;;
+;; Macro used from unmanaged helpers called from managed code where the helper does not transition immediately
+;; into pre-emptive mode but may cause a GC and thus requires the stack is crawlable. This is typically the
+;; case for helpers that meddle in GC state (e.g. allocation helpers) where the code must remain in
+;; cooperative mode since it handles object references and internal GC state directly but a garbage collection
+;; may be inevitable. In these cases we need to be able to transition to pre-meptive mode deep within the
+;; unmanaged code but still be able to initialize the stack iterator at the first stack frame which may hold
+;; interesting GC references. In all our helper cases this corresponds to the most recent managed frame (e.g.
+;; the helper's caller).
+;;
+;; This macro builds a frame describing the current state of managed code.
+;;
+;; INVARIANTS
+;; - The macro assumes it is called from a prolog, prior to a frame pointer being setup.
+;; - All preserved registers remain unchanged from their values in managed code. 
+;;
+PUSH_COOP_PINVOKE_FRAME macro trashReg
+    lea             trashReg, [rsp +  8h]
+    push_vol_reg    trashReg                    ; save caller's RSP
+    push_nonvol_reg r15                         ; save preserved registers
+    push_nonvol_reg r14                         ;   ..
+    push_nonvol_reg r13                         ;   ..
+    push_nonvol_reg r12                         ;   ..
+    push_nonvol_reg rdi                         ;   ..
+    push_nonvol_reg rsi                         ;   ..
+    push_nonvol_reg rbx                         ;   ..
+    push_imm        DEFAULT_FRAME_SAVE_FLAGS    ; save the register bitmask
+    push_vol_reg    trashReg                    ; Thread * (unused by stackwalker)
+    push_nonvol_reg rbp                         ; save caller's RBP
+    mov             trashReg, [rsp + 11*8]      ; Find the return address
+    push_vol_reg    trashReg                    ; save m_RIP
+    lea             trashReg, [rsp + 0]         ; trashReg == address of frame
+
+    ;; allocate scratch space and any required alignment
+    alloc_stack     28h
+endm
+
+;;
+;; Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME
+;;
+POP_COOP_PINVOKE_FRAME macro
+    add         rsp, 30h
+    pop         rbp     ; restore RBP
+    pop         r10     ; discard thread
+    pop         r10     ; discard bitmask
+    pop         rbx
+    pop         rsi
+    pop         rdi
+    pop         r12
+    pop         r13
+    pop         r14
+    pop         r15
+    pop         r10     ; discard caller RSP
+endm
+
+;  - TAILCALL_RAX: ("jmp rax") should be used for tailcalls, this emits an instruction 
+;            sequence which is recognized by the unwinder as a valid epilogue terminator
+TAILJMP_RAX TEXTEQU <DB 048h, 0FFh, 0E0h>
+
+;;
+;; CONSTANTS -- INTEGER
+;;
+TSF_Attached                    equ 01h
+TSF_SuppressGcStress            equ 08h
+TSF_DoNotTriggerGc              equ 10h
+
+;;
+;; Rename fields of nested structs
+;;
+OFFSETOF__Thread__m_alloc_context__alloc_ptr        equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr
+OFFSETOF__Thread__m_alloc_context__alloc_limit      equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit
+
+
+
+;; GC type flags
+GC_ALLOC_FINALIZE               equ 1
+
+;; Note: these must match the defs in PInvokeTransitionFrameFlags
+PTFF_SAVE_RBX           equ 00000001h
+PTFF_SAVE_RSI           equ 00000002h
+PTFF_SAVE_RDI           equ 00000004h
+PTFF_SAVE_R12           equ 00000010h
+PTFF_SAVE_R13           equ 00000020h
+PTFF_SAVE_R14           equ 00000040h
+PTFF_SAVE_R15           equ 00000080h
+PTFF_SAVE_ALL_PRESERVED equ 000000F7h   ;; NOTE: RBP is not included in this set!
+PTFF_SAVE_RSP           equ 00008000h
+PTFF_SAVE_RAX           equ 00000100h   ;; RAX is saved if it contains a GC ref and we're in hijack handler
+PTFF_SAVE_ALL_SCRATCH   equ 00007F00h
+PTFF_RAX_IS_GCREF       equ 00010000h   ;; iff PTFF_SAVE_RAX: set -> eax is Object, clear -> eax is scalar
+PTFF_RAX_IS_BYREF       equ 00020000h   ;; iff PTFF_SAVE_RAX: set -> eax is ByRef, clear -> eax is Object or scalar
+PTFF_THREAD_ABORT       equ 00040000h   ;; indicates that ThreadAbortException should be thrown when returning from the transition
+
+;; These must match the TrapThreadsFlags enum
+TrapThreadsFlags_None            equ 0
+TrapThreadsFlags_AbortInProgress equ 1
+TrapThreadsFlags_TrapThreads     equ 2
+
+;; This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT
+STATUS_REDHAWK_THREAD_ABORT      equ 43h
+
+;;
+;; CONSTANTS -- SYMBOLS
+;;
+
+ifdef FEATURE_GC_STRESS
+REDHAWKGCINTERFACE__STRESSGC                equ ?StressGc@RedhawkGCInterface@@SAXXZ
+THREAD__HIJACKFORGCSTRESS                   equ ?HijackForGcStress@Thread@@SAXPEAUPAL_LIMITED_CONTEXT@@@Z
+endif ;; FEATURE_GC_STRESS
+
+;;
+;; IMPORTS
+;;
+
+EXTERN RhpGcAlloc                               : PROC
+EXTERN RhpValidateExInfoPop                     : PROC
+EXTERN RhDebugBreak                             : PROC
+EXTERN RhpWaitForSuspend2                       : PROC
+EXTERN RhpWaitForGC2                            : PROC
+EXTERN RhpReversePInvokeAttachOrTrapThread2     : PROC
+EXTERN RhExceptionHandling_FailedAllocation     : PROC
+EXTERN RhpPublishObject                         : PROC
+EXTERN RhpCalculateStackTraceWorker             : PROC
+EXTERN RhThrowHwEx                              : PROC
+EXTERN RhThrowEx                                : PROC
+EXTERN RhRethrow                                : PROC
+ifdef FEATURE_GC_STRESS
+EXTERN REDHAWKGCINTERFACE__STRESSGC             : PROC
+EXTERN THREAD__HIJACKFORGCSTRESS                : PROC
+endif ;; FEATURE_GC_STRESS
+
+EXTERN g_lowest_address     : QWORD
+EXTERN g_highest_address    : QWORD
+EXTERN g_ephemeral_low      : QWORD
+EXTERN g_ephemeral_high     : QWORD
+EXTERN g_card_table         : QWORD
+EXTERN RhpTrapThreads       : DWORD
+
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/AsmOffsetsCpu.h b/src/coreclr/src/nativeaot/Runtime/amd64/AsmOffsetsCpu.h
new file mode 100644
index 0000000000000..2239433993f08
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/AsmOffsetsCpu.h
@@ -0,0 +1,121 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This file is used by AsmOffsets.h to validate that our
+// assembly-code offsets always match their C++ counterparts.
+//
+// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix
+
+#ifndef UNIX_AMD64_ABI
+PLAT_ASM_SIZEOF(260, ExInfo)
+PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo)
+PLAT_ASM_OFFSET(8, ExInfo, m_pExContext)
+PLAT_ASM_OFFSET(10, ExInfo, m_exception)
+PLAT_ASM_OFFSET(18, ExInfo, m_kind)
+PLAT_ASM_OFFSET(19, ExInfo, m_passNumber)
+PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause)
+PLAT_ASM_OFFSET(20, ExInfo, m_frameIter)
+PLAT_ASM_OFFSET(250, ExInfo, m_notifyDebuggerSP)
+
+PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP)
+PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer)
+PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread)
+PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags)
+PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs)
+
+PLAT_ASM_SIZEOF(230, StackFrameIterator)
+PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer)
+PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC)
+PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay)
+PLAT_ASM_OFFSET(228, StackFrameIterator, m_OriginalControlPC)
+
+PLAT_ASM_SIZEOF(100, PAL_LIMITED_CONTEXT)
+PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP)
+
+PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, Rsp)
+PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, Rbp)
+PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, Rdi)
+PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, Rsi)
+PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, Rax)
+PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, Rbx)
+
+PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, R12)
+PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, R13)
+PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, R14)
+PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, R15)
+PLAT_ASM_OFFSET(60, PAL_LIMITED_CONTEXT, Xmm6)
+PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, Xmm7)
+PLAT_ASM_OFFSET(80, PAL_LIMITED_CONTEXT, Xmm8)
+PLAT_ASM_OFFSET(90, PAL_LIMITED_CONTEXT, Xmm9)
+PLAT_ASM_OFFSET(0a0, PAL_LIMITED_CONTEXT, Xmm10)
+PLAT_ASM_OFFSET(0b0, PAL_LIMITED_CONTEXT, Xmm11)
+PLAT_ASM_OFFSET(0c0, PAL_LIMITED_CONTEXT, Xmm12)
+PLAT_ASM_OFFSET(0d0, PAL_LIMITED_CONTEXT, Xmm13)
+PLAT_ASM_OFFSET(0e0, PAL_LIMITED_CONTEXT, Xmm14)
+PLAT_ASM_OFFSET(0f0, PAL_LIMITED_CONTEXT, Xmm15)
+
+PLAT_ASM_SIZEOF(130, REGDISPLAY)
+PLAT_ASM_OFFSET(78, REGDISPLAY, SP)
+
+PLAT_ASM_OFFSET(18, REGDISPLAY, pRbx)
+PLAT_ASM_OFFSET(20, REGDISPLAY, pRbp)
+PLAT_ASM_OFFSET(28, REGDISPLAY, pRsi)
+PLAT_ASM_OFFSET(30, REGDISPLAY, pRdi)
+PLAT_ASM_OFFSET(58, REGDISPLAY, pR12)
+PLAT_ASM_OFFSET(60, REGDISPLAY, pR13)
+PLAT_ASM_OFFSET(68, REGDISPLAY, pR14)
+PLAT_ASM_OFFSET(70, REGDISPLAY, pR15)
+PLAT_ASM_OFFSET(90, REGDISPLAY, Xmm)
+
+#else // !UNIX_AMD64_ABI
+
+PLAT_ASM_SIZEOF(1a8, ExInfo)
+PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo)
+PLAT_ASM_OFFSET(8, ExInfo, m_pExContext)
+PLAT_ASM_OFFSET(10, ExInfo, m_exception)
+PLAT_ASM_OFFSET(18, ExInfo, m_kind)
+PLAT_ASM_OFFSET(19, ExInfo, m_passNumber)
+PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause)
+PLAT_ASM_OFFSET(20, ExInfo, m_frameIter)
+PLAT_ASM_OFFSET(1a0, ExInfo, m_notifyDebuggerSP)
+
+PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP)
+PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer)
+PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread)
+PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags)
+PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs)
+
+PLAT_ASM_SIZEOF(180, StackFrameIterator)
+PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer)
+PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC)
+PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay)
+PLAT_ASM_OFFSET(178, StackFrameIterator, m_OriginalControlPC)
+
+PLAT_ASM_SIZEOF(50, PAL_LIMITED_CONTEXT)
+PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP)
+
+PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, Rsp)
+PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, Rbp)
+PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, Rax)
+PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, Rbx)
+PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, Rdx)
+
+PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, R12)
+PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, R13)
+PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, R14)
+PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, R15)
+
+PLAT_ASM_SIZEOF(90, REGDISPLAY)
+PLAT_ASM_OFFSET(78, REGDISPLAY, SP)
+
+PLAT_ASM_OFFSET(18, REGDISPLAY, pRbx)
+PLAT_ASM_OFFSET(20, REGDISPLAY, pRbp)
+PLAT_ASM_OFFSET(28, REGDISPLAY, pRsi)
+PLAT_ASM_OFFSET(30, REGDISPLAY, pRdi)
+PLAT_ASM_OFFSET(58, REGDISPLAY, pR12)
+PLAT_ASM_OFFSET(60, REGDISPLAY, pR13)
+PLAT_ASM_OFFSET(68, REGDISPLAY, pR14)
+PLAT_ASM_OFFSET(70, REGDISPLAY, pR15)
+
+#endif // !UNIX_AMD64_ABI
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/CallDescrWorker.S b/src/coreclr/src/nativeaot/Runtime/amd64/CallDescrWorker.S
new file mode 100644
index 0000000000000..483d4b5f9ac8c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/CallDescrWorker.S
@@ -0,0 +1,13 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include <unixasmmacros.inc>
+
+NESTED_ENTRY RhCallDescrWorker, _TEXT, NoHandler
+
+    EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk
+
+    // UNIXTODO: Implement this function
+    int 3
+NESTED_END RhCallDescrWorker, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/CallDescrWorker.asm b/src/coreclr/src/nativeaot/Runtime/amd64/CallDescrWorker.asm
new file mode 100644
index 0000000000000..85c8e2dd52c5c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/CallDescrWorker.asm
@@ -0,0 +1,105 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmMacros.inc
+
+
+;;;;;;;;;;;;;;;;;;;;;;; CallingConventionConverter Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;extern "C" void RhCallDescrWorker(CallDescrData * pCallDescrData);
+
+        NESTED_ENTRY RhCallDescrWorker, _TEXT
+
+        push_nonvol_reg rbx             ; save nonvolatile registers
+        push_nonvol_reg rsi             ;
+        push_nonvol_reg rbp             ;
+        set_frame rbp, 0                ; set frame pointer
+
+        END_PROLOGUE
+
+        mov     rbx, rcx                ; save pCallDescrData in rbx
+
+        mov     ecx, dword ptr [rbx + OFFSETOF__CallDescrData__numStackSlots]
+
+        test    ecx, 1
+        jz      StackAligned
+        push    rax
+StackAligned:
+
+        mov     rsi, [rbx + OFFSETOF__CallDescrData__pSrc] ; set source argument list address
+        lea     rsi, [rsi + 8 * rcx]
+
+StackCopyLoop:                          ; copy the arguments to stack top-down to carefully probe for sufficient
+                                        ; stack space
+        sub     rsi, 8
+        push    qword ptr [rsi]
+        dec     ecx
+        jnz     StackCopyLoop
+
+        ;
+        ; N.B. All four argument registers are loaded regardless of the actual number
+        ;      of arguments.
+        ;
+
+        mov     rax, [rbx + OFFSETOF__CallDescrData__pFloatArgumentRegisters] ; get floating pointer arg registers pointer
+
+        mov     rcx, 0[rsp]             ; load first four argument registers
+        mov     rdx, 8[rsp]             ;
+        mov     r8, 10h[rsp]            ;
+        mov     r9, 18h[rsp]            ;
+        test    rax, rax                ;
+        jz      DoCall                  ;
+        movdqa  xmm0, [rax + 00h]       ; load floating point registers if they are used
+        movdqa  xmm1, [rax + 10h]       ;
+        movdqa  xmm2, [rax + 20h]       ;
+        movdqa  xmm3, [rax + 30h]       ;
+DoCall:
+        call    qword ptr [rbx + OFFSETOF__CallDescrData__pTarget]     ; call target function
+
+        EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk
+
+        ; Symbol used to identify thunk call to managed function so the special 
+        ; case unwinder can unwind through this function. Sadly we cannot directly
+        ; export this symbol right now because it confuses DIA unwinder to believe
+        ; it's the beginning of a new method, therefore we export the address
+        ; of an auxiliary variable holding the address instead.
+
+        ; Save FP return value
+
+        mov     ecx, dword ptr [rbx + OFFSETOF__CallDescrData__fpReturnSize]
+        test    ecx, ecx
+        jz      ReturnsInt
+        
+        cmp     ecx, 4
+        je      ReturnsFloat
+        cmp     ecx, 8
+        je      ReturnsDouble
+        ; unexpected
+        jmp     Epilog
+
+ReturnsInt:
+        mov     rbx, [rbx + OFFSETOF__CallDescrData__pReturnBuffer]
+        mov     [rbx], rax
+
+Epilog:
+        lea     rsp, 0[rbp]             ; deallocate argument list
+        pop     rbp                     ; restore nonvolatile register
+        pop     rsi                     ;
+        pop     rbx                     ;
+        ret
+
+ReturnsFloat:
+; Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself
+        mov     rbx, [rbx + OFFSETOF__CallDescrData__pReturnBuffer]
+        movss   real4 ptr [rbx], xmm0
+        jmp     Epilog
+
+ReturnsDouble:
+; Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself
+        mov     rbx, [rbx + OFFSETOF__CallDescrData__pReturnBuffer]
+        movsd   real8 ptr [rbx], xmm0
+        jmp     Epilog
+
+        NESTED_END RhCallDescrWorker, _TEXT
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/CallingConventionConverterHelpers.S b/src/coreclr/src/nativeaot/Runtime/amd64/CallingConventionConverterHelpers.S
new file mode 100644
index 0000000000000..2a55819057212
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/CallingConventionConverterHelpers.S
@@ -0,0 +1,57 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include <unixasmmacros.inc>
+
+//
+// void CallingConventionConverter_ReturnVoidReturnThunk()
+//
+LEAF_ENTRY CallingConventionConverter_ReturnVoidReturnThunk, _TEXT
+        ret
+LEAF_END CallingConventionConverter_ReturnVoidReturnThunk, _TEXT
+
+//
+// int CallingConventionConverter_ReturnIntegerReturnThunk(int)
+//
+LEAF_ENTRY CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT
+        // UNIXTODO: Implement this function
+        int 3
+LEAF_END CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT
+
+//
+// Note: The "__jmpstub__" prefix is used to indicate to debugger
+// that it must step-through this stub when it encounters it while
+// stepping.
+//
+
+// __jmpstub__CallingConventionConverter_CommonCallingStub
+//
+//
+// struct CallingConventionConverter_CommonCallingStub_PointerData
+// {
+//     void *ManagedCallConverterThunk;
+//     void *UniversalThunk;
+// }
+//
+// struct CommonCallingStubInputData
+// {
+//     ULONG_PTR CallingConventionId;
+//     CallingConventionConverter_CommonCallingStub_PointerData *commonData;
+// }
+//
+// r10 - Points at CommonCallingStubInputData
+//  
+//
+LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT
+        // UNIXTODO: Implement this function
+        int 3
+LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT
+
+//
+// void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonStub)
+//
+LEAF_ENTRY CallingConventionConverter_GetStubs, _TEXT
+        // UNIXTODO: Implement this function
+        int 3
+LEAF_END CallingConventionConverter_GetStubs, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/CallingConventionConverterHelpers.asm b/src/coreclr/src/nativeaot/Runtime/amd64/CallingConventionConverterHelpers.asm
new file mode 100644
index 0000000000000..a29d2202e5b62
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/CallingConventionConverterHelpers.asm
@@ -0,0 +1,85 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+;; -----------------------------------------------------------------------------------------------------------
+;; #include "asmmacros.inc"
+;; -----------------------------------------------------------------------------------------------------------
+
+LEAF_ENTRY macro Name, Section
+    Section segment para 'CODE'
+    align   16
+    public  Name
+    Name    proc
+endm
+
+LEAF_END macro Name, Section
+    Name    endp
+    Section ends
+endm
+
+;  - TAILCALL_RAX: ("jmp rax") should be used for tailcalls, this emits an instruction 
+;            sequence which is recognized by the unwinder as a valid epilogue terminator
+TAILJMP_RAX TEXTEQU <DB 048h, 0FFh, 0E0h>
+POINTER_SIZE                        equ 08h
+
+;;
+;; void CallingConventionConverter_ReturnVoidReturnThunk()
+;;
+LEAF_ENTRY CallingConventionConverter_ReturnVoidReturnThunk, _TEXT
+        ret
+LEAF_END CallingConventionConverter_ReturnVoidReturnThunk, _TEXT
+
+;;
+;; int CallingConventionConverter_ReturnIntegerReturnThunk(int)
+;;
+LEAF_ENTRY CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT
+        mov rax, rcx
+        ret
+LEAF_END CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT
+
+;;
+;; Note: The "__jmpstub__" prefix is used to indicate to debugger
+;; that it must step-through this stub when it encounters it while
+;; stepping.
+;;
+
+;; __jmpstub__CallingConventionConverter_CommonCallingStub
+;;
+;;
+;; struct CallingConventionConverter_CommonCallingStub_PointerData
+;; {
+;;     void *ManagedCallConverterThunk;
+;;     void *UniversalThunk;
+;; }
+;;
+;; struct CommonCallingStubInputData
+;; {
+;;     ULONG_PTR CallingConventionId;
+;;     CallingConventionConverter_CommonCallingStub_PointerData *commonData;
+;; }
+;;
+;; r10 - Points at CommonCallingStubInputData
+;;  
+;;
+LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT
+        mov     r11, [r10]                ; put CallingConventionId into r11 as "parameter" to universal transition thunk
+        mov     r10, [r10 + POINTER_SIZE] ; get pointer to CallingConventionConverter_CommonCallingStub_PointerData into r10
+        mov     rax, [r10 + POINTER_SIZE] ; get address of UniversalTransitionThunk
+        mov     r10, [r10]                ; get address of ManagedCallConverterThunk
+        TAILJMP_RAX
+LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT
+
+;;
+;; void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonStub)
+;;
+LEAF_ENTRY CallingConventionConverter_GetStubs, _TEXT
+        lea     rax, [CallingConventionConverter_ReturnVoidReturnThunk]
+        mov    [rcx], rax
+        lea     rax, [CallingConventionConverter_ReturnIntegerReturnThunk]
+        mov    [rdx], rax
+        lea     rax, [__jmpstub__CallingConventionConverter_CommonCallingStub]
+        mov    [r8], rax
+        ret
+LEAF_END CallingConventionConverter_GetStubs, _TEXT
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/ExceptionHandling.S b/src/coreclr/src/nativeaot/Runtime/amd64/ExceptionHandling.S
new file mode 100644
index 0000000000000..6c67859090d04
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/ExceptionHandling.S
@@ -0,0 +1,534 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// RhpThrowHwEx
+//
+// INPUT:  RDI:  exception code of fault
+//         RSI:  faulting RIP
+//
+// OUTPUT:
+// 
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler
+
+        STACKSIZEOF_ExInfo      = ((SIZEOF__ExInfo + 15) & (~15))
+        rsp_offsetof_Context    = STACKSIZEOF_ExInfo
+
+        mov     rax, rsp        // save the faulting RSP
+
+        // Align the stack towards zero
+        and     rsp, -16
+
+        xor     rdx, rdx
+
+//  struct PAL_LIMITED_CONTEXT
+//  {
+        push_nonvol_reg r15
+        push_nonvol_reg r14
+        push_nonvol_reg r13
+        push_nonvol_reg r12
+        push_register   rdx             // rdx set to 0
+        push_nonvol_reg rbx
+        push_register   rdx             // rax set to 0
+        push_nonvol_reg rbp
+        push_register   rax             // faulting RSP
+        push_register   rsi             // faulting IP
+//  }
+        // allocate outgoing args area and space for the ExInfo
+        alloc_stack     STACKSIZEOF_ExInfo
+
+        END_PROLOGUE
+
+        mov     rbx, rdi
+        INLINE_GETTHREAD
+        mov     rdi, rbx
+
+        mov     rsi, rsp                                             // rsi <- ExInfo*
+
+        xor     rdx, rdx
+        mov     [rsi + OFFSETOF__ExInfo__m_exception], rdx           // init the exception object to null
+        mov     byte ptr [rsi + OFFSETOF__ExInfo__m_passNumber], 1   // init to the first pass 
+        mov     dword ptr [rsi + OFFSETOF__ExInfo__m_idxCurClause], 0xFFFFFFFF
+        mov     byte ptr [rsi + OFFSETOF__ExInfo__m_kind], 2         // ExKind.HardwareFault
+
+        // link the ExInfo into the thread's ExInfo chain
+        mov     rdx, [rax + OFFSETOF__Thread__m_pExInfoStackHead]
+        mov     [rsi + OFFSETOF__ExInfo__m_pPrevExInfo], rdx         // pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        mov     [rax + OFFSETOF__Thread__m_pExInfoStackHead], rsi    // m_pExInfoStackHead = pExInfo
+
+        // set the exception context field on the ExInfo
+        lea     rdx, [rsp + rsp_offsetof_Context]                    // rdx <- PAL_LIMITED_CONTEXT*
+        mov     [rsi + OFFSETOF__ExInfo__m_pExContext], rdx          // init ExInfo.m_pExContext
+
+        // rdi still contains the exception code
+        // rsi contains the address of the ExInfo
+        call    EXTERNAL_C_FUNC(RhThrowHwEx)
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2
+
+        // no return
+        int 3
+
+NESTED_END RhpThrowHwEx, _TEXT
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// RhpThrowEx
+//
+// INPUT:  RDI:  exception object
+//
+// OUTPUT:
+// 
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+NESTED_ENTRY RhpThrowEx, _TEXT, NoHandler
+
+        STACKSIZEOF_ExInfo      = ((SIZEOF__ExInfo + 15) & (~ 15))
+        rsp_offsetof_Context    = STACKSIZEOF_ExInfo
+
+        lea     rax, [rsp+8]    // save the RSP of the throw site
+        mov     rsi, [rsp]      // get return address 
+
+        xor     rdx, rdx
+        push_register   rdx             // padding
+
+//  struct PAL_LIMITED_CONTEXT
+//  {
+        push_nonvol_reg r15
+        push_nonvol_reg r14
+        push_nonvol_reg r13
+        push_nonvol_reg r12
+        push_register   rdx             // rdx set to 0
+        push_nonvol_reg rbx
+        push_register   rdx             // rax set to 0
+        push_nonvol_reg rbp
+        push_register   rax             // 'faulting' RSP
+        push_register   rsi             // 'faulting' IP
+//  }
+
+        // allocate space for the ExInfo
+        alloc_stack     STACKSIZEOF_ExInfo
+
+        END_PROLOGUE
+
+        mov     rbx, rdi
+        INLINE_GETTHREAD
+        mov     rdi, rbx
+
+        lea                     rbx, [rsp + rsp_offsetof_Context + SIZEOF__PAL_LIMITED_CONTEXT + 0x8]    // rbx <- addr of return address
+
+        // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic.  So the return 
+        // address could have been hijacked when we were in that C# code and we must remove the hijack and
+        // reflect the correct return address in our exception context record.  The other throw helpers don't
+        // need this because they cannot be tail-called from C#.
+        INLINE_THREAD_UNHIJACK  rax, rcx, rsi        // trashes RCX, RSI
+        mov                     rsi, [rbx]           // rdx <- return address
+        mov                     [rsp + rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP], rsi   // set 'faulting' IP after unhijack
+
+        mov     rsi, rsp                                            // rsi <- ExInfo*
+
+        mov     [rsi + OFFSETOF__ExInfo__m_exception], rdx          // init the exception object to null
+        mov     byte ptr [rsi + OFFSETOF__ExInfo__m_passNumber], 1  // init to the first pass 
+        mov     dword ptr [rsi + OFFSETOF__ExInfo__m_idxCurClause], 0xFFFFFFFF
+        mov     byte ptr [rsi + OFFSETOF__ExInfo__m_kind], 1        // ExKind.Throw
+
+        // link the ExInfo into the thread's ExInfo chain
+        mov     rdx, [rax + OFFSETOF__Thread__m_pExInfoStackHead]
+        mov     [rsi + OFFSETOF__ExInfo__m_pPrevExInfo], rdx        // pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        mov     [rax + OFFSETOF__Thread__m_pExInfoStackHead], rsi   // m_pExInfoStackHead = pExInfo
+
+        // set the exception context field on the ExInfo
+        lea     rdx, [rsp + rsp_offsetof_Context]                   // rdx <- PAL_LIMITED_CONTEXT*
+        mov     [rsi + OFFSETOF__ExInfo__m_pExContext], rdx         // init ExInfo.m_pExContext
+
+        // rdi still contains the exception object
+        // rsi contains the address of the ExInfo
+        call    EXTERNAL_C_FUNC(RhThrowEx)
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2
+
+        // no return
+        int 3
+
+NESTED_END RhpThrowEx, _TEXT
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// void FASTCALL RhpRethrow()
+//
+// SUMMARY:  Similar to RhpThrowEx, except that it passes along the currently active ExInfo
+//
+// INPUT:
+//
+// OUTPUT:
+// 
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+NESTED_ENTRY RhpRethrow, _TEXT, NoHandler
+
+        STACKSIZEOF_ExInfo      = ((SIZEOF__ExInfo + 15) & (~ 15))
+        rsp_offsetof_Context    = STACKSIZEOF_ExInfo
+
+        lea     rax, [rsp+8]    // save the RSP of the throw site
+        mov     rsi, [rsp]      // get return address 
+
+        xor     rdx, rdx
+        push_register   rdx             // padding
+
+//  struct PAL_LIMITED_CONTEXT
+//  {
+        push_nonvol_reg r15
+        push_nonvol_reg r14
+        push_nonvol_reg r13
+        push_nonvol_reg r12
+        push_register   rdx             // rdx set to 0
+        push_nonvol_reg rbx
+        push_register   rdx             // rax set to 0
+        push_nonvol_reg rbp
+        push_register   rax             // 'faulting' RSP
+        push_register   rsi             // 'faulting' IP
+//  }
+
+        // allocate space for the ExInfo
+        alloc_stack     STACKSIZEOF_ExInfo
+
+        END_PROLOGUE
+
+        INLINE_GETTHREAD
+
+        mov     rsi, rsp                                            // rsi <- ExInfo*
+
+        mov     [rsi + OFFSETOF__ExInfo__m_exception], rdx          // init the exception object to null
+        mov     byte ptr [rsi + OFFSETOF__ExInfo__m_passNumber], 1  // init to the first pass 
+        mov     dword ptr [rsi + OFFSETOF__ExInfo__m_idxCurClause], 0xFFFFFFFF
+        mov     byte ptr [rsi + OFFSETOF__ExInfo__m_kind], 0        // init to a deterministic value (ExKind.None)
+
+
+        // link the ExInfo into the thread's ExInfo chain
+        mov     rdi, [rax + OFFSETOF__Thread__m_pExInfoStackHead]   // rdi <- currently active ExInfo
+        mov     [rsi + OFFSETOF__ExInfo__m_pPrevExInfo], rdi        // pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        mov     [rax + OFFSETOF__Thread__m_pExInfoStackHead], rsi   // m_pExInfoStackHead = pExInfo
+
+        // set the exception context field on the ExInfo
+        lea     rdx, [rsp + rsp_offsetof_Context]                   // rdx <- PAL_LIMITED_CONTEXT*
+        mov     [rsi + OFFSETOF__ExInfo__m_pExContext], rdx         // init ExInfo.m_pExContext
+
+        // rdi contains the currently active ExInfo
+        // rsi contains the address of the new ExInfo
+        call    EXTERNAL_C_FUNC(RhRethrow)
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2
+
+        // no return
+        int 3
+
+NESTED_END RhpRethrow, _TEXT
+
+//
+// Prologue of all funclet calling helpers (RhpCallXXXXFunclet)
+//
+.macro FUNCLET_CALL_PROLOGUE localsCount, alignStack
+
+    push_nonvol_reg r15     // save preserved regs for OS stackwalker
+    push_nonvol_reg r14     // ...
+    push_nonvol_reg r13     // ...
+    push_nonvol_reg r12     // ...
+    push_nonvol_reg rbx     // ...
+    push_nonvol_reg rbp     // ...
+
+    stack_alloc_size = \localsCount * 8 + \alignStack * 8
+
+    alloc_stack     stack_alloc_size
+
+    END_PROLOGUE
+.endm
+
+//
+// Epilogue of all funclet calling helpers (RhpCallXXXXFunclet)
+//
+.macro FUNCLET_CALL_EPILOGUE
+    free_stack      stack_alloc_size
+
+    pop_nonvol_reg rbp
+    pop_nonvol_reg rbx
+    pop_nonvol_reg r12
+    pop_nonvol_reg r13
+    pop_nonvol_reg r14
+    pop_nonvol_reg r15
+.endm
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay,
+//                                    ExInfo* pExInfo)
+//
+// INPUT:  RDI:  exception object
+//         RSI:  handler funclet address
+//         RDX:   REGDISPLAY*
+//         RCX:   ExInfo*
+//
+// OUTPUT:
+// 
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler
+
+        FUNCLET_CALL_PROLOGUE 6, 1
+
+        locThread       = 0
+        locResumeIp     = 8
+        locArg0         = 0x10
+        locArg1         = 0x18
+        locArg2         = 0x20
+        locArg3         = 0x28
+
+        mov     [rsp + locArg0], rdi                                // save arguments for later
+        mov     [rsp + locArg1], rsi
+        mov     [rsp + locArg2], rdx
+        mov     [rsp + locArg3], rcx
+
+        mov     rbx, rdx
+        INLINE_GETTHREAD
+        mov     rdx, rbx
+
+        mov     [rsp + locThread], rax                              // save Thread* for later
+
+        // Clear the DoNotTriggerGc state before calling out to our managed catch funclet.
+   lock and     dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], ~TSF_DoNotTriggerGc
+
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbx]
+        mov     rbx, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbp]
+        mov     rbp, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR12]
+        mov     r12, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR13]
+        mov     r13, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR14]
+        mov     r14, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR15]
+        mov     r15, [rax]
+
+#if 0 // _DEBUG  // @TODO: temporarily removed because trashing RBP breaks the debugger
+        // trash the values at the old homes to make sure nobody uses them
+        mov     rcx, 0xbaaddeed
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbx]
+        mov     [rax], rcx
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbp]
+        mov     [rax], rcx
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR12]
+        mov     [rax], rcx
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR13]
+        mov     [rax], rcx
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR14]
+        mov     [rax], rcx
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR15]
+        mov     [rax], rcx
+#endif
+
+        mov     rdi, [rdx + OFFSETOF__REGDISPLAY__SP]               // rdi <- establisher frame
+        mov     rsi, [rsp + locArg0]                                // rsi <- exception object
+        call    qword ptr [rsp + locArg1]                           // call handler funclet
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2
+
+        mov     rdx, [rsp + locArg2]                                // rdx <- dispatch context
+
+#ifdef _DEBUG
+        // Call into some C++ code to validate the pop of the ExInfo.  We only do this in debug because we 
+        // have to spill all the preserved registers and then refill them after the call.
+
+        mov     [rsp + locResumeIp], rax                            // save resume IP for later
+
+        mov     rdi, [rdx + OFFSETOF__REGDISPLAY__pRbx]
+        mov     [rdi]                            , rbx
+        mov     rdi, [rdx + OFFSETOF__REGDISPLAY__pRbp]
+        mov     [rdi]                            , rbp
+        mov     rdi, [rdx + OFFSETOF__REGDISPLAY__pR12]
+        mov     [rdi]                            , r12
+        mov     rdi, [rdx + OFFSETOF__REGDISPLAY__pR13]
+        mov     [rdi]                            , r13
+        mov     rdi, [rdx + OFFSETOF__REGDISPLAY__pR14]
+        mov     [rdi]                            , r14
+        mov     rdi, [rdx + OFFSETOF__REGDISPLAY__pR15]
+        mov     [rdi]                            , r15
+
+        mov     rdi, [rsp]                                          // rdi <- Thread*
+        mov     rsi, [rsp + locArg3]                                // rsi <- current ExInfo *
+        mov     rdx, [rdx + OFFSETOF__REGDISPLAY__SP]               // rdx  <- resume SP value
+        call    C_FUNC(RhpValidateExInfoPop)
+
+        mov     rdx, [rsp + locArg2]          // rdx <- dispatch context
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbx]
+        mov     rbx, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbp]
+        mov     rbp, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR12]
+        mov     r12, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR13]
+        mov     r13, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR14]
+        mov     r14, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR15]
+        mov     r15, [rax]
+
+        mov     rax, [rsp + locResumeIp]                            // reload resume IP
+#endif
+        mov     rsi, [rsp + locThread]                              // rsi <- Thread*
+
+        // We must unhijack the thread at this point because the section of stack where the hijack is applied
+        // may go dead.  If it does, then the next time we try to unhijack the thread, it will corrupt the stack.
+        INLINE_THREAD_UNHIJACK rsi, rdi, rcx                        // Thread in rsi, trashes rdi and rcx
+
+        mov     rdi, [rsp + locArg3]                                // rdi <- current ExInfo *
+        mov     rdx, [rdx + OFFSETOF__REGDISPLAY__SP]               // rdx <- resume SP value
+        xor     ecx, ecx                                            // rcx <- 0
+
+LOCAL_LABEL(ExInfoLoop):
+        mov     rdi, [rdi + OFFSETOF__ExInfo__m_pPrevExInfo]        // rdi <- next ExInfo
+        cmp     rdi, rcx
+        je      LOCAL_LABEL(ExInfoLoopDone)                         // we're done if it's null
+        cmp     rdi, rdx
+        jl      LOCAL_LABEL(ExInfoLoop)                             // keep looping if it's lower than the new SP
+
+LOCAL_LABEL(ExInfoLoopDone):
+        mov     [rsi + OFFSETOF__Thread__m_pExInfoStackHead], rdi   // store the new head on the Thread
+
+        // reset RSP and jump to the continuation address
+        mov     rsp, rdx                                            // reset the SP
+        jmp     rax
+
+NESTED_END RhpCallCatchFunclet, _TEXT
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay)
+//
+// INPUT:  RDI:  handler funclet address
+//         RSI:  REGDISPLAY*
+//
+// OUTPUT:
+// 
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler
+
+        FUNCLET_CALL_PROLOGUE 3, 0
+
+        locThread       = 0
+        locArg0         = 8
+        locArg1         = 0x10
+
+        mov     [rsp + locArg0], rdi                                 // save arguments for later
+        mov     [rsp + locArg1], rsi
+
+        mov     rbx, rsi
+        INLINE_GETTHREAD
+        mov     rsi, rbx
+
+        mov     [rsp + locThread], rax                               // save Thread* for later
+
+        //
+        // We want to suppress hijacking between invocations of subsequent finallys.  We do this because we
+        // cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the
+        // method) and then been popped off the stack, leaving behind no trace of its effect.
+        //
+        // So we clear the state before and set it after invocation of the handler.
+        //
+   lock and     dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], ~TSF_DoNotTriggerGc
+
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pRbx]
+        mov     rbx, [rax]
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pRbp]
+        mov     rbp, [rax]
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pR12]
+        mov     r12, [rax]
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pR13]
+        mov     r13, [rax]
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pR14]
+        mov     r14, [rax]
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pR15]
+        mov     r15, [rax]
+
+#if 0 // _DEBUG // @TODO: temporarily removed because trashing RBP breaks the debugger
+        // trash the values at the old homes to make sure nobody uses them
+        mov     rcx, 0xbaaddeed
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pRbx]
+        mov     [rax], rcx
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pRbp]
+        mov     [rax], rcx
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pR12]
+        mov     [rax], rcx
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pR13]
+        mov     [rax], rcx
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pR14]
+        mov     [rax], rcx
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pR15]
+        mov     [rax], rcx
+#endif
+
+        mov     rdi, [rsi + OFFSETOF__REGDISPLAY__SP]                // rdi <- establisher frame
+        call    qword ptr [rsp + locArg0]                            // handler funclet address
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2
+
+        mov     rsi, [rsp + locArg1]                                 // rsi <- regdisplay
+
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pRbx]
+        mov     [rax]                            , rbx
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pRbp]
+        mov     [rax]                            , rbp
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pR12]
+        mov     [rax]                            , r12
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pR13]
+        mov     [rax]                            , r13
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pR14]
+        mov     [rax]                            , r14
+        mov     rax, [rsi + OFFSETOF__REGDISPLAY__pR15]
+        mov     [rax]                            , r15
+
+        mov     rax, [rsp + locThread]                               // rax <- Thread*
+   lock or      dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc
+
+        FUNCLET_CALL_EPILOGUE
+
+        ret
+
+NESTED_END RhpCallFinallyFunclet, _TEXT
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay)
+//
+// INPUT:  RDI:  exception object
+//         RSI:  filter funclet address
+//         RDX:  REGDISPLAY*
+//
+// OUTPUT:
+// 
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler
+
+        FUNCLET_CALL_PROLOGUE 0, 1
+
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbp]
+        mov     rbp, [rax]
+
+        mov     rax, rsi                                            // rax <- handler funclet address
+        mov     rsi, rdi                                            // rsi <- exception object
+        mov     rdi, [rdx + OFFSETOF__REGDISPLAY__SP]               // rdi <- establisher frame
+        call    rax
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2
+
+        // RAX contains the result of the filter execution
+
+        FUNCLET_CALL_EPILOGUE
+
+        ret
+
+NESTED_END RhpCallFilterFunclet, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/ExceptionHandling.asm b/src/coreclr/src/nativeaot/Runtime/amd64/ExceptionHandling.asm
new file mode 100644
index 0000000000000..5e7ed52f1d375
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/ExceptionHandling.asm
@@ -0,0 +1,679 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include asmmacros.inc
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpThrowHwEx
+;;
+;; INPUT:  RCX:  exception code of fault
+;;         RDX:  faulting RIP
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+NESTED_ENTRY RhpThrowHwEx, _TEXT
+
+        SIZEOF_XmmSaves equ SIZEOF__PAL_LIMITED_CONTEXT - OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6
+        STACKSIZEOF_ExInfo equ ((SIZEOF__ExInfo + 15) AND (NOT 15))
+
+        SIZEOF_OutgoingScratch  equ 20h
+        rsp_offsetof_ExInfo     equ SIZEOF_OutgoingScratch
+        rsp_offsetof_Context    equ SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo
+
+        mov     rax, rsp        ;; save the faulting RSP
+
+        ;; Align the stack towards zero
+        and     rsp, -16
+
+        ;; Push the expected "machine frame" for the unwinder to see.  All that it looks at is the faulting 
+        ;; RSP and RIP, so we push zero for the others.
+        xor     r8, r8
+        push    r8              ;; SS
+        push    rax             ;; faulting RSP
+        pushfq                  ;; EFLAGS
+        push    r8              ;; CS
+        push    rdx             ;; faulting RIP
+
+        ; Tell the unwinder that the frame is there now
+        .pushframe
+
+        alloc_stack     SIZEOF_XmmSaves + 8h    ;; reserve stack for the xmm saves (+8h to realign stack)
+        push_vol_reg    r8                      ;; padding
+        push_nonvol_reg r15
+        push_nonvol_reg r14
+        push_nonvol_reg r13
+        push_nonvol_reg r12
+        push_nonvol_reg rbx
+        push_vol_reg    r8
+        push_nonvol_reg rsi
+        push_nonvol_reg rdi
+        push_nonvol_reg rbp
+        push_vol_reg    rax             ;; faulting RSP
+        push_vol_reg    rdx             ;; faulting IP
+
+        ;; allocate outgoing args area and space for the ExInfo
+        alloc_stack     SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo
+
+        save_xmm128_postrsp     Xmm6 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6
+        save_xmm128_postrsp     Xmm7 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm7
+        save_xmm128_postrsp     Xmm8 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm8
+        save_xmm128_postrsp     Xmm9 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm9
+        save_xmm128_postrsp     Xmm10, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm10
+        save_xmm128_postrsp     Xmm11, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm11
+        save_xmm128_postrsp     Xmm12, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm12
+        save_xmm128_postrsp     Xmm13, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm13
+        save_xmm128_postrsp     Xmm14, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm14
+        save_xmm128_postrsp     Xmm15, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm15
+
+        END_PROLOGUE
+
+        INLINE_GETTHREAD    rax, rbx                                ;; rax <- Thread*, rbx is trashed
+
+        lea     rdx, [rsp + rsp_offsetof_ExInfo]                    ;; rdx <- ExInfo*
+
+        xor     r8, r8
+        mov     [rdx + OFFSETOF__ExInfo__m_exception], r8           ;; init the exception object to null
+        mov     byte ptr [rdx + OFFSETOF__ExInfo__m_passNumber], 1  ;; init to the first pass 
+        mov     dword ptr [rdx + OFFSETOF__ExInfo__m_idxCurClause], 0FFFFFFFFh
+        mov     byte ptr [rdx + OFFSETOF__ExInfo__m_kind], 2        ;; ExKind.HardwareFault
+
+        ;; link the ExInfo into the thread's ExInfo chain
+        mov     r8, [rax + OFFSETOF__Thread__m_pExInfoStackHead]
+        mov     [rdx + OFFSETOF__ExInfo__m_pPrevExInfo], r8         ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        mov     [rax + OFFSETOF__Thread__m_pExInfoStackHead], rdx   ;; m_pExInfoStackHead = pExInfo
+
+        ;; set the exception context field on the ExInfo
+        lea     r8, [rsp + rsp_offsetof_Context]                    ;; r8 <- PAL_LIMITED_CONTEXT*
+        mov     [rdx + OFFSETOF__ExInfo__m_pExContext], r8          ;; init ExInfo.m_pExContext
+
+        ;; rcx still contains the exception code
+        ;; rdx contains the address of the ExInfo
+        call    RhThrowHwEx
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2
+
+        ;; no return
+        int 3
+
+NESTED_END RhpThrowHwEx, _TEXT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpThrowEx
+;;
+;; INPUT:  RCX:  exception object
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+NESTED_ENTRY RhpThrowEx, _TEXT
+
+        SIZEOF_XmmSaves equ SIZEOF__PAL_LIMITED_CONTEXT - OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6
+        STACKSIZEOF_ExInfo equ ((SIZEOF__ExInfo + 15) AND (NOT 15))
+
+        SIZEOF_OutgoingScratch  equ 20h
+        rsp_offsetof_ExInfo     equ SIZEOF_OutgoingScratch
+        rsp_offsetof_Context    equ SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo
+
+        lea     rax, [rsp+8]    ;; save the RSP of the throw site
+        mov     rdx, [rsp]      ;; get return address 
+
+        xor     r8, r8
+
+        alloc_stack     SIZEOF_XmmSaves + 8h    ;; reserve stack for the xmm saves (+8h to realign stack)
+        push_vol_reg    r8                      ;; padding
+        push_nonvol_reg r15
+        push_nonvol_reg r14
+        push_nonvol_reg r13
+        push_nonvol_reg r12
+        push_nonvol_reg rbx
+        push_vol_reg    r8
+        push_nonvol_reg rsi
+        push_nonvol_reg rdi
+        push_nonvol_reg rbp
+        push_vol_reg    rax             ;; 'faulting' RSP
+        push_vol_reg    rdx             ;; 'faulting' IP
+
+        ;; allocate outgoing args area and space for the ExInfo
+        alloc_stack     SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo
+
+        save_xmm128_postrsp     Xmm6 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6
+        save_xmm128_postrsp     Xmm7 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm7
+        save_xmm128_postrsp     Xmm8 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm8
+        save_xmm128_postrsp     Xmm9 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm9
+        save_xmm128_postrsp     Xmm10, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm10
+        save_xmm128_postrsp     Xmm11, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm11
+        save_xmm128_postrsp     Xmm12, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm12
+        save_xmm128_postrsp     Xmm13, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm13
+        save_xmm128_postrsp     Xmm14, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm14
+        save_xmm128_postrsp     Xmm15, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm15
+
+        END_PROLOGUE
+
+        INLINE_GETTHREAD        rax, rbx            ;; rax <- Thread*, rbx is trashed
+
+        lea                     rbx, [rsp + rsp_offsetof_Context + SIZEOF__PAL_LIMITED_CONTEXT + 8h]    ;; rbx <- addr of return address
+
+        ;; There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic.  So the return 
+        ;; address could have been hijacked when we were in that C# code and we must remove the hijack and
+        ;; reflect the correct return address in our exception context record.  The other throw helpers don't
+        ;; need this because they cannot be tail-called from C#.
+        INLINE_THREAD_UNHIJACK  rax, r9, rdx        ;; trashes R9, RDX
+        mov                     rdx, [rbx]          ;; rdx <- return address
+        mov                     [rsp + rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP], rdx   ;; set 'faulting' IP after unhijack
+
+        lea     rdx, [rsp + rsp_offsetof_ExInfo]                    ;; rdx <- ExInfo*
+
+        mov     [rdx + OFFSETOF__ExInfo__m_exception], r8           ;; init the exception object to null
+        mov     byte ptr [rdx + OFFSETOF__ExInfo__m_passNumber], 1  ;; init to the first pass 
+        mov     dword ptr [rdx + OFFSETOF__ExInfo__m_idxCurClause], 0FFFFFFFFh
+        mov     byte ptr [rdx + OFFSETOF__ExInfo__m_kind], 1        ;; ExKind.Throw
+
+        ;; link the ExInfo into the thread's ExInfo chain
+        mov     r8, [rax + OFFSETOF__Thread__m_pExInfoStackHead]
+        mov     [rdx + OFFSETOF__ExInfo__m_pPrevExInfo], r8         ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        mov     [rax + OFFSETOF__Thread__m_pExInfoStackHead], rdx   ;; m_pExInfoStackHead = pExInfo
+
+        ;; set the exception context field on the ExInfo
+        lea     r8, [rsp + rsp_offsetof_Context]                    ;; r8 <- PAL_LIMITED_CONTEXT*
+        mov     [rdx + OFFSETOF__ExInfo__m_pExContext], r8          ;; init ExInfo.m_pExContext
+
+        ;; rcx still contains the exception object
+        ;; rdx contains the address of the ExInfo
+        call    RhThrowEx
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2
+
+        ;; no return
+        int 3
+
+NESTED_END RhpThrowEx, _TEXT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void FASTCALL RhpRethrow()
+;;
+;; SUMMARY:  Similar to RhpThrowEx, except that it passes along the currently active ExInfo
+;;
+;; INPUT:
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+NESTED_ENTRY RhpRethrow, _TEXT
+
+        SIZEOF_XmmSaves equ SIZEOF__PAL_LIMITED_CONTEXT - OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6
+        STACKSIZEOF_ExInfo equ ((SIZEOF__ExInfo + 15) AND (NOT 15))
+
+        SIZEOF_OutgoingScratch  equ 20h
+        rsp_offsetof_ExInfo     equ SIZEOF_OutgoingScratch
+        rsp_offsetof_Context    equ SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo
+
+        lea     rax, [rsp+8]    ;; save the RSP of the throw site
+        mov     rdx, [rsp]      ;; get return address 
+
+        xor     r8, r8
+
+        alloc_stack     SIZEOF_XmmSaves + 8h    ;; reserve stack for the xmm saves (+8h to realign stack)
+        push_vol_reg    r8                      ;; padding
+        push_nonvol_reg r15
+        push_nonvol_reg r14
+        push_nonvol_reg r13
+        push_nonvol_reg r12
+        push_nonvol_reg rbx
+        push_vol_reg    r8
+        push_nonvol_reg rsi
+        push_nonvol_reg rdi
+        push_nonvol_reg rbp
+        push_vol_reg    rax             ;; 'faulting' RSP
+        push_vol_reg    rdx             ;; 'faulting' IP
+
+        ;; allocate outgoing args area and space for the ExInfo
+        alloc_stack     SIZEOF_OutgoingScratch + STACKSIZEOF_ExInfo
+
+        save_xmm128_postrsp     Xmm6 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6
+        save_xmm128_postrsp     Xmm7 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm7
+        save_xmm128_postrsp     Xmm8 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm8
+        save_xmm128_postrsp     Xmm9 , rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm9
+        save_xmm128_postrsp     Xmm10, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm10
+        save_xmm128_postrsp     Xmm11, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm11
+        save_xmm128_postrsp     Xmm12, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm12
+        save_xmm128_postrsp     Xmm13, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm13
+        save_xmm128_postrsp     Xmm14, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm14
+        save_xmm128_postrsp     Xmm15, rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__Xmm15
+
+        END_PROLOGUE
+
+        INLINE_GETTHREAD    rax, rbx                                ;; rax <- Thread*, rbx is trashed
+
+        lea     rdx, [rsp + rsp_offsetof_ExInfo]                    ;; rdx <- ExInfo*
+
+        mov     [rdx + OFFSETOF__ExInfo__m_exception], r8           ;; init the exception object to null
+        mov     byte ptr [rdx + OFFSETOF__ExInfo__m_passNumber], 1  ;; init to the first pass 
+        mov     dword ptr [rdx + OFFSETOF__ExInfo__m_idxCurClause], 0FFFFFFFFh
+        mov     byte ptr [rdx + OFFSETOF__ExInfo__m_kind], 0        ;; init to a deterministic value (ExKind.None)
+
+
+        ;; link the ExInfo into the thread's ExInfo chain
+        mov     rcx, [rax + OFFSETOF__Thread__m_pExInfoStackHead]   ;; rcx <- currently active ExInfo
+        mov     [rdx + OFFSETOF__ExInfo__m_pPrevExInfo], rcx        ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        mov     [rax + OFFSETOF__Thread__m_pExInfoStackHead], rdx   ;; m_pExInfoStackHead = pExInfo
+
+        ;; set the exception context field on the ExInfo
+        lea     r8, [rsp + rsp_offsetof_Context]                    ;; r8 <- PAL_LIMITED_CONTEXT*
+        mov     [rdx + OFFSETOF__ExInfo__m_pExContext], r8          ;; init ExInfo.m_pExContext
+
+        ;; rcx contains the currently active ExInfo
+        ;; rdx contains the address of the new ExInfo
+        call    RhRethrow
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2
+
+        ;; no return
+        int 3
+
+NESTED_END RhpRethrow, _TEXT
+
+;;
+;; Prologue of all funclet calling helpers (RhpCallXXXXFunclet)
+;;
+FUNCLET_CALL_PROLOGUE macro localsCount, alignStack
+
+    push_nonvol_reg r15     ;; save preserved regs for OS stackwalker
+    push_nonvol_reg r14     ;; ...
+    push_nonvol_reg r13     ;; ...
+    push_nonvol_reg r12     ;; ...
+    push_nonvol_reg rbx     ;; ...
+    push_nonvol_reg rsi     ;; ...
+    push_nonvol_reg rdi     ;; ...
+    push_nonvol_reg rbp     ;; ...
+
+    arguments_scratch_area_size = 20h
+    xmm_save_area_size = 10 * 10h ;; xmm6..xmm15 save area
+    stack_alloc_size = arguments_scratch_area_size + localsCount * 8 + alignStack * 8 + xmm_save_area_size
+    rsp_offsetof_arguments = stack_alloc_size + 8*8h + 8h
+    rsp_offsetof_locals = arguments_scratch_area_size + xmm_save_area_size
+    
+    alloc_stack     stack_alloc_size
+
+    save_xmm128_postrsp xmm6,  (arguments_scratch_area_size + 0 * 10h)
+    save_xmm128_postrsp xmm7,  (arguments_scratch_area_size + 1 * 10h)
+    save_xmm128_postrsp xmm8,  (arguments_scratch_area_size + 2 * 10h)
+    save_xmm128_postrsp xmm9,  (arguments_scratch_area_size + 3 * 10h)
+    save_xmm128_postrsp xmm10, (arguments_scratch_area_size + 4 * 10h)
+    save_xmm128_postrsp xmm11, (arguments_scratch_area_size + 5 * 10h)
+    save_xmm128_postrsp xmm12, (arguments_scratch_area_size + 6 * 10h)
+    save_xmm128_postrsp xmm13, (arguments_scratch_area_size + 7 * 10h)
+    save_xmm128_postrsp xmm14, (arguments_scratch_area_size + 8 * 10h)
+    save_xmm128_postrsp xmm15, (arguments_scratch_area_size + 9 * 10h)
+
+    END_PROLOGUE
+endm
+
+;;
+;; Epilogue of all funclet calling helpers (RhpCallXXXXFunclet)
+;;
+FUNCLET_CALL_EPILOGUE macro
+    movdqa  xmm6,  [rsp + arguments_scratch_area_size + 0 * 10h]
+    movdqa  xmm7,  [rsp + arguments_scratch_area_size + 1 * 10h]
+    movdqa  xmm8,  [rsp + arguments_scratch_area_size + 2 * 10h]
+    movdqa  xmm9,  [rsp + arguments_scratch_area_size + 3 * 10h]
+    movdqa  xmm10, [rsp + arguments_scratch_area_size + 4 * 10h]
+    movdqa  xmm11, [rsp + arguments_scratch_area_size + 5 * 10h]
+    movdqa  xmm12, [rsp + arguments_scratch_area_size + 6 * 10h]
+    movdqa  xmm13, [rsp + arguments_scratch_area_size + 7 * 10h]
+    movdqa  xmm14, [rsp + arguments_scratch_area_size + 8 * 10h]
+    movdqa  xmm15, [rsp + arguments_scratch_area_size + 9 * 10h]
+
+    add     rsp, stack_alloc_size
+    pop     rbp
+    pop     rdi
+    pop     rsi
+    pop     rbx
+    pop     r12
+    pop     r13
+    pop     r14
+    pop     r15
+endm
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay,
+;;                                    ExInfo* pExInfo)
+;;
+;; INPUT:  RCX:  exception object
+;;         RDX:  handler funclet address
+;;         R8:   REGDISPLAY*
+;;         R9:   ExInfo*
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+NESTED_ENTRY RhpCallCatchFunclet, _TEXT
+
+        FUNCLET_CALL_PROLOGUE 3, 0
+
+        ;; locals
+        rsp_offsetof_thread = rsp_offsetof_locals
+        rsp_offsetof_resume_ip = rsp_offsetof_locals + 8;
+        rsp_offsetof_is_handling_thread_abort = rsp_offsetof_locals + 16;
+      
+        mov     [rsp + rsp_offsetof_arguments + 0h], rcx            ;; save arguments for later
+        mov     [rsp + rsp_offsetof_arguments + 8h], rdx
+        mov     [rsp + rsp_offsetof_arguments + 10h], r8
+        mov     [rsp + rsp_offsetof_arguments + 18h], r9
+
+        INLINE_GETTHREAD    rax, rbx                                ;; rax <- Thread*, rbx is trashed
+        mov     [rsp + rsp_offsetof_thread], rax                    ;; save Thread* for later
+
+        cmp     rcx, [rax + OFFSETOF__Thread__m_threadAbortException]
+        setz    byte ptr [rsp + rsp_offsetof_is_handling_thread_abort]
+
+        ;; Clear the DoNotTriggerGc state before calling out to our managed catch funclet.
+        lock and            dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], NOT TSF_DoNotTriggerGc
+
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRbx]
+        mov     rbx, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRbp]
+        mov     rbp, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRsi]
+        mov     rsi, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRdi]
+        mov     rdi, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pR12]
+        mov     r12, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pR13]
+        mov     r13, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pR14]
+        mov     r14, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pR15]
+        mov     r15, [rax]
+
+if 0 ;; _DEBUG  ;; @TODO: temporarily removed because trashing RBP breaks the debugger
+        ;; trash the values at the old homes to make sure nobody uses them
+        mov     r9, 0baaddeedh
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRbx]
+        mov     [rax], r9
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRbp]
+        mov     [rax], r9
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRsi]
+        mov     [rax], r9
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRdi]
+        mov     [rax], r9
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pR12]
+        mov     [rax], r9
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pR13]
+        mov     [rax], r9
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pR14]
+        mov     [rax], r9
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pR15]
+        mov     [rax], r9
+endif
+
+        movdqa  xmm6, [r8 + OFFSETOF__REGDISPLAY__Xmm + 0*10h]
+        movdqa  xmm7, [r8 + OFFSETOF__REGDISPLAY__Xmm + 1*10h]
+        movdqa  xmm8, [r8 + OFFSETOF__REGDISPLAY__Xmm + 2*10h]
+        movdqa  xmm9, [r8 + OFFSETOF__REGDISPLAY__Xmm + 3*10h]
+        movdqa  xmm10,[r8 + OFFSETOF__REGDISPLAY__Xmm + 4*10h]
+
+        movdqa  xmm11,[r8 + OFFSETOF__REGDISPLAY__Xmm + 5*10h]
+        movdqa  xmm12,[r8 + OFFSETOF__REGDISPLAY__Xmm + 6*10h]
+        movdqa  xmm13,[r8 + OFFSETOF__REGDISPLAY__Xmm + 7*10h]
+        movdqa  xmm14,[r8 + OFFSETOF__REGDISPLAY__Xmm + 8*10h]
+        movdqa  xmm15,[r8 + OFFSETOF__REGDISPLAY__Xmm + 9*10h]
+
+        mov     rcx, [r8 + OFFSETOF__REGDISPLAY__SP]                ;; rcx <- establisher frame
+        mov     rdx, [rsp + rsp_offsetof_arguments + 0h]            ;; rdx <- exception object
+        call    qword ptr [rsp + rsp_offsetof_arguments + 8h]       ;; call handler funclet
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2
+
+        mov     r8, [rsp + rsp_offsetof_arguments + 10h]            ;; r8 <- dispatch context
+
+ifdef _DEBUG
+        ;; Call into some C++ code to validate the pop of the ExInfo.  We only do this in debug because we 
+        ;; have to spill all the preserved registers and then refill them after the call.
+        mov     [rsp + rsp_offsetof_resume_ip], rax                                    ;; save resume IP for later
+
+        mov     rcx, [r8 + OFFSETOF__REGDISPLAY__pRbx]
+        mov     [rcx]                           , rbx
+        mov     rcx, [r8 + OFFSETOF__REGDISPLAY__pRbp]
+        mov     [rcx]                           , rbp
+        mov     rcx, [r8 + OFFSETOF__REGDISPLAY__pRsi]
+        mov     [rcx]                           , rsi
+        mov     rcx, [r8 + OFFSETOF__REGDISPLAY__pRdi]
+        mov     [rcx]                           , rdi
+        mov     rcx, [r8 + OFFSETOF__REGDISPLAY__pR12]
+        mov     [rcx]                           , r12
+        mov     rcx, [r8 + OFFSETOF__REGDISPLAY__pR13]
+        mov     [rcx]                           , r13
+        mov     rcx, [r8 + OFFSETOF__REGDISPLAY__pR14]
+        mov     [rcx]                           , r14
+        mov     rcx, [r8 + OFFSETOF__REGDISPLAY__pR15]
+        mov     [rcx]                           , r15
+
+        mov     rcx, [rsp + rsp_offsetof_thread]                    ;; rcx <- Thread*
+        mov     rdx, [rsp + rsp_offsetof_arguments + 18h]           ;; rdx <- current ExInfo *
+        mov     r8, [r8 + OFFSETOF__REGDISPLAY__SP]                 ;; r8  <- resume SP value
+        call    RhpValidateExInfoPop
+
+        mov     r8, [rsp + rsp_offsetof_arguments + 10h]            ;; r8 <- dispatch context
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRbx]
+        mov     rbx, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRbp]
+        mov     rbp, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRsi]
+        mov     rsi, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRdi]
+        mov     rdi, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pR12]
+        mov     r12, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pR13]
+        mov     r13, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pR14]
+        mov     r14, [rax]
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pR15]
+        mov     r15, [rax]
+
+        mov     rax, [rsp + rsp_offsetof_resume_ip]                 ;; reload resume IP
+endif
+        mov     rdx, [rsp + rsp_offsetof_thread]                    ;; rdx <- Thread*
+
+        ;; We must unhijack the thread at this point because the section of stack where the hijack is applied
+        ;; may go dead.  If it does, then the next time we try to unhijack the thread, it will corrupt the stack.
+        INLINE_THREAD_UNHIJACK rdx, rcx, r9                         ;; Thread in rdx, trashes rcx and r9
+
+        mov     rcx, [rsp + rsp_offsetof_arguments + 18h]           ;; rcx <- current ExInfo *
+        mov     r8, [r8 + OFFSETOF__REGDISPLAY__SP]                 ;; r8 <- resume SP value
+        xor     r9d, r9d                                            ;; r9 <- 0
+
+   @@:  mov     rcx, [rcx + OFFSETOF__ExInfo__m_pPrevExInfo]        ;; rcx <- next ExInfo
+        cmp     rcx, r9
+        je      @F                                                  ;; we're done if it's null
+        cmp     rcx, r8
+        jl      @B                                                  ;; keep looping if it's lower than the new SP
+
+   @@:  mov     [rdx + OFFSETOF__Thread__m_pExInfoStackHead], rcx   ;; store the new head on the Thread
+
+        test    [RhpTrapThreads], TrapThreadsFlags_AbortInProgress
+        jz      @f
+        
+        ;; test if the exception handled by the catch was the ThreadAbortException
+        cmp     byte ptr [rsp + rsp_offsetof_is_handling_thread_abort], 0
+        je      @f
+
+        ;; It was the ThreadAbortException, so rethrow it
+        mov     rcx, STATUS_REDHAWK_THREAD_ABORT
+        mov     rdx, rax                                            ;; rdx <- continuation address as exception RIP
+        mov     rsp, r8                                             ;; reset the SP to resume SP value
+        jmp     RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception
+
+        ;; reset RSP and jump to the continuation address
+   @@:  mov     rsp, r8                                             ;; reset the SP to resume SP value
+        jmp     rax
+
+
+NESTED_END RhpCallCatchFunclet, _TEXT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay)
+;;
+;; INPUT:  RCX:  handler funclet address
+;;         RDX:  REGDISPLAY*
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+NESTED_ENTRY RhpCallFinallyFunclet, _TEXT
+
+        FUNCLET_CALL_PROLOGUE 1, 0
+
+        mov     [rsp + rsp_offsetof_arguments + 0h], rcx            ;; save arguments for later
+        mov     [rsp + rsp_offsetof_arguments + 8h], rdx
+
+        rsp_offsetof_thread = rsp_offsetof_locals
+        
+        INLINE_GETTHREAD    rax, rbx                                ;; rax <- Thread*, rbx is trashed
+        mov     [rsp + rsp_offsetof_thread], rax                    ;; save Thread* for later
+
+        ;;
+        ;; We want to suppress hijacking between invocations of subsequent finallys.  We do this because we
+        ;; cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the
+        ;; method) and then been popped off the stack, leaving behind no trace of its effect.
+        ;;
+        ;; So we clear the state before and set it after invocation of the handler.
+        ;;
+        lock and            dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], NOT TSF_DoNotTriggerGc
+
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbx]
+        mov     rbx, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbp]
+        mov     rbp, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRsi]
+        mov     rsi, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRdi]
+        mov     rdi, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR12]
+        mov     r12, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR13]
+        mov     r13, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR14]
+        mov     r14, [rax]
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR15]
+        mov     r15, [rax]
+
+        movdqa  xmm6, [rdx + OFFSETOF__REGDISPLAY__Xmm + 0*10h]
+        movdqa  xmm7, [rdx + OFFSETOF__REGDISPLAY__Xmm + 1*10h]
+        movdqa  xmm8, [rdx + OFFSETOF__REGDISPLAY__Xmm + 2*10h]
+        movdqa  xmm9, [rdx + OFFSETOF__REGDISPLAY__Xmm + 3*10h]
+        movdqa  xmm10,[rdx + OFFSETOF__REGDISPLAY__Xmm + 4*10h]
+
+        movdqa  xmm11,[rdx + OFFSETOF__REGDISPLAY__Xmm + 5*10h]
+        movdqa  xmm12,[rdx + OFFSETOF__REGDISPLAY__Xmm + 6*10h]
+        movdqa  xmm13,[rdx + OFFSETOF__REGDISPLAY__Xmm + 7*10h]
+        movdqa  xmm14,[rdx + OFFSETOF__REGDISPLAY__Xmm + 8*10h]
+        movdqa  xmm15,[rdx + OFFSETOF__REGDISPLAY__Xmm + 9*10h]
+
+if 0 ;; _DEBUG ;; @TODO: temporarily removed because trashing RBP breaks the debugger
+        ;; trash the values at the old homes to make sure nobody uses them
+        mov     r9, 0baaddeedh
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbx]
+        mov     [rax], r9
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbp]
+        mov     [rax], r9
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRsi]
+        mov     [rax], r9
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRdi]
+        mov     [rax], r9
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR12]
+        mov     [rax], r9
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR13]
+        mov     [rax], r9
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR14]
+        mov     [rax], r9
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR15]
+        mov     [rax], r9
+endif
+
+        mov     rcx, [rdx + OFFSETOF__REGDISPLAY__SP]               ;; rcx <- establisher frame
+        call    qword ptr [rsp + rsp_offsetof_arguments + 0h]       ;; handler funclet address
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2
+
+        mov     rdx, [rsp + rsp_offsetof_arguments + 8h]            ;; rdx <- regdisplay
+
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbx]
+        mov     [rax]                            , rbx
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRbp]
+        mov     [rax]                            , rbp
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRsi]
+        mov     [rax]                            , rsi
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pRdi]
+        mov     [rax]                            , rdi
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR12]
+        mov     [rax]                            , r12
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR13]
+        mov     [rax]                            , r13
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR14]
+        mov     [rax]                            , r14
+        mov     rax, [rdx + OFFSETOF__REGDISPLAY__pR15]
+        mov     [rax]                            , r15
+
+        movdqa  [rdx + OFFSETOF__REGDISPLAY__Xmm + 0*10h], xmm6
+        movdqa  [rdx + OFFSETOF__REGDISPLAY__Xmm + 1*10h], xmm7
+        movdqa  [rdx + OFFSETOF__REGDISPLAY__Xmm + 2*10h], xmm8
+        movdqa  [rdx + OFFSETOF__REGDISPLAY__Xmm + 3*10h], xmm9
+        movdqa  [rdx + OFFSETOF__REGDISPLAY__Xmm + 4*10h], xmm10
+
+        movdqa  [rdx + OFFSETOF__REGDISPLAY__Xmm + 5*10h], xmm11
+        movdqa  [rdx + OFFSETOF__REGDISPLAY__Xmm + 6*10h], xmm12
+        movdqa  [rdx + OFFSETOF__REGDISPLAY__Xmm + 7*10h], xmm13
+        movdqa  [rdx + OFFSETOF__REGDISPLAY__Xmm + 8*10h], xmm14
+        movdqa  [rdx + OFFSETOF__REGDISPLAY__Xmm + 9*10h], xmm15
+
+        mov     rax, [rsp + rsp_offsetof_thread]                                    ;; rax <- Thread*
+        lock or             dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc
+
+        FUNCLET_CALL_EPILOGUE
+
+        ret
+
+NESTED_END RhpCallFinallyFunclet, _TEXT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay)
+;;
+;; INPUT:  RCX:  exception object
+;;         RDX:  filter funclet address
+;;         R8:   REGDISPLAY*
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+NESTED_ENTRY RhpCallFilterFunclet, _TEXT
+
+        FUNCLET_CALL_PROLOGUE 0, 1
+
+        mov     rax, [r8 + OFFSETOF__REGDISPLAY__pRbp]
+        mov     rbp, [rax]
+
+        mov     rax, rdx                                            ;; rax <- handler funclet address
+        mov     rdx, rcx                                            ;; rdx <- exception object
+        mov     rcx, [r8 + OFFSETOF__REGDISPLAY__SP]                ;; rcx <- establisher frame
+        call    rax
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2
+
+        ;; RAX contains the result of the filter execution
+
+        FUNCLET_CALL_EPILOGUE
+
+        ret
+
+NESTED_END RhpCallFilterFunclet, _TEXT
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/FloatingPoint.asm b/src/coreclr/src/nativeaot/Runtime/amd64/FloatingPoint.asm
new file mode 100644
index 0000000000000..8dcf12610a055
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/FloatingPoint.asm
@@ -0,0 +1,57 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include asmmacros.inc
+
+LEAF_ENTRY RhpFltRemRev, _TEXT
+
+        sub         rsp, 18h
+
+        movss       dword ptr [rsp + 10h], xmm1     ; divisor
+        movss       dword ptr [rsp +  8h], xmm0     ; dividend
+        
+        fld         dword ptr [rsp + 10h]           ; divisor
+        fld         dword ptr [rsp +  8h]           ; dividend
+
+fremloop:
+        fprem
+        fstsw       ax
+        test        ax, 0400h
+        jnz         fremloop
+
+        fstp        dword ptr [rsp]
+        movlps      xmm0,qword ptr [rsp]
+
+        fstp        st(0)
+        add         rsp,18h
+        ret
+
+LEAF_END RhpFltRemRev, _TEXT
+
+
+LEAF_ENTRY RhpDblRemRev, _TEXT
+
+        sub         rsp, 18h
+
+        movsd       qword ptr [rsp + 10h], xmm1     ; divisor
+        movsd       qword ptr [rsp +  8h], xmm0     ; dividend
+        
+        fld         qword ptr [rsp + 10h]           ; divisor
+        fld         qword ptr [rsp +  8h]           ; dividend
+
+fremloopd:
+        fprem
+        fstsw       ax
+        test        ax, 0400h
+        jnz         fremloopd
+
+        fstp        qword ptr [rsp]
+        movlpd      xmm0,qword ptr [rsp]
+
+        fstp        st(0)
+        add         rsp,18h
+        ret
+
+LEAF_END RhpDblRemRev, _TEXT
+
+        END
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/GC.asm b/src/coreclr/src/nativeaot/Runtime/amd64/GC.asm
new file mode 100644
index 0000000000000..5e4459ab205e3
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/GC.asm
@@ -0,0 +1,65 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmMacros.inc
+
+;; extern "C" DWORD getcpuid(DWORD arg, unsigned char result[16]);
+NESTED_ENTRY getcpuid, _TEXT
+
+        push_nonvol_reg    rbx
+        push_nonvol_reg    rsi
+    END_PROLOGUE
+
+        mov     eax, ecx                ; first arg
+        mov     rsi, rdx                ; second arg (result)
+        xor     ecx, ecx                ; clear ecx - needed for "Structured Extended Feature Flags"
+        cpuid
+        mov     [rsi+ 0], eax
+        mov     [rsi+ 4], ebx
+        mov     [rsi+ 8], ecx
+        mov     [rsi+12], edx
+        pop     rsi
+        pop     rbx
+        ret
+NESTED_END getcpuid, _TEXT
+
+;The following function uses Deterministic Cache Parameter leafs to crack the cache hierarchy information on Prescott & Above platforms. 
+;  This function takes 3 arguments:
+;     Arg1 is an input to ECX. Used as index to specify which cache level to return information on by CPUID.
+;         Arg1 is already passed in ECX on call to getextcpuid, so no explicit assignment is required;  
+;     Arg2 is an input to EAX. For deterministic code enumeration, we pass in 4H in arg2.
+;     Arg3 is a pointer to the return dwbuffer
+NESTED_ENTRY getextcpuid, _TEXT
+        push_nonvol_reg    rbx
+        push_nonvol_reg    rsi
+    END_PROLOGUE
+        
+        mov     eax, edx                ; second arg (input to  EAX)
+        mov     rsi, r8                 ; third arg  (pointer to return dwbuffer)       
+        cpuid
+        mov     [rsi+ 0], eax
+        mov     [rsi+ 4], ebx
+        mov     [rsi+ 8], ecx
+        mov     [rsi+12], edx
+        pop     rsi
+        pop     rbx
+
+        ret
+NESTED_END getextcpuid, _TEXT
+
+;; extern "C" DWORD __stdcall xmmYmmStateSupport();
+LEAF_ENTRY xmmYmmStateSupport, _TEXT
+        mov     ecx, 0                  ; Specify xcr0
+        xgetbv                          ; result in EDX:EAX
+        and eax, 06H
+        cmp eax, 06H                    ; check OS has enabled both XMM and YMM state support
+        jne     not_supported
+        mov     eax, 1
+        jmp     done
+    not_supported:
+        mov     eax, 0
+    done:
+        ret
+LEAF_END xmmYmmStateSupport, _TEXT
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/src/nativeaot/Runtime/amd64/GcProbe.asm
new file mode 100644
index 0000000000000..31cd5a2539541
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/GcProbe.asm
@@ -0,0 +1,810 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmMacros.inc
+
+PROBE_SAVE_FLAGS_EVERYTHING     equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH
+PROBE_SAVE_FLAGS_RAX_IS_GCREF   equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF
+
+;;
+;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX and accepts the register 
+;; bitmask in RCX
+;;
+;; On entry:
+;;  - BITMASK: bitmask describing pushes, may be volatile register or constant value
+;;  - RAX: managed function return value, may be an object or byref
+;;  - preserved regs: need to stay preserved, may contain objects or byrefs
+;;  - extraStack bytes of stack have already been allocated
+;;
+;; INVARIANTS
+;; - The macro assumes it is called from a prolog, prior to a frame pointer being setup.
+;; - All preserved registers remain unchanged from their values in managed code. 
+;;
+PUSH_PROBE_FRAME macro threadReg, trashReg, extraStack, BITMASK
+
+    push_vol_reg    rax                         ; save RAX, it might contain an objectref
+    lea             trashReg, [rsp + 10h + extraStack]
+    push_vol_reg    trashReg                    ; save caller's RSP
+    push_nonvol_reg r15                         ; save preserved registers
+    push_nonvol_reg r14                         ;   ..
+    push_nonvol_reg r13                         ;   ..
+    push_nonvol_reg r12                         ;   ..
+    push_nonvol_reg rdi                         ;   ..
+    push_nonvol_reg rsi                         ;   ..
+    push_nonvol_reg rbx                         ;   ..
+    push_vol_reg    BITMASK                     ; save the register bitmask passed in by caller
+    push_vol_reg    threadReg                   ; Thread * (unused by stackwalker)
+    push_nonvol_reg rbp                         ; save caller's RBP
+    mov             trashReg, [rsp + 12*8 + extraStack]  ; Find the return address
+    push_vol_reg    trashReg                    ; save m_RIP
+    lea             trashReg, [rsp + 0]         ; trashReg == address of frame
+
+    ;; allocate scratch space and any required alignment
+    alloc_stack     20h + 10h + (extraStack AND (10h-1))
+
+    ;; save xmm0 in case it's being used as a return value
+    movdqa          [rsp + 20h], xmm0
+
+    ; link the frame into the Thread
+    mov             [threadReg + OFFSETOF__Thread__m_pHackPInvokeTunnel], trashReg
+endm
+
+;;
+;; Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved
+;; registers and return value to their values from before the probe was called (while also updating any 
+;; object refs or byrefs).
+;;
+;; NOTE: does NOT deallocate the 'extraStack' portion of the stack, the user of this macro must do that.
+;;
+POP_PROBE_FRAME macro extraStack
+    movdqa      xmm0, [rsp + 20h]
+    add         rsp, 20h + 10h + (extraStack AND (10h-1)) + 8
+    pop         rbp
+    pop         rax     ; discard Thread*
+    pop         rax     ; discard BITMASK
+    pop         rbx
+    pop         rsi
+    pop         rdi
+    pop         r12
+    pop         r13
+    pop         r14
+    pop         r15
+    pop         rax     ; discard caller RSP
+    pop         rax
+endm
+
+;;
+;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this 
+;; thread if it finds it at an IP that isn't managed code.
+;;
+;; Register state on entry:
+;;  RDX: thread pointer
+;;  
+;; Register state on exit:
+;;  RCX: trashed
+;;
+ClearHijackState macro
+        xor         ecx, ecx
+        mov         [rdx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], rcx
+        mov         [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress], rcx
+endm
+
+
+;;
+;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and 
+;; clears the hijack state.
+;;
+;; Register state on entry:
+;;  All registers correct for return to the original return address.
+;;  
+;; Register state on exit:
+;;  RCX: trashed
+;;  RDX: thread pointer
+;;
+FixupHijackedCallstack macro
+
+        ;; rdx <- GetThread(), TRASHES rcx
+        INLINE_GETTHREAD rdx, rcx
+        
+        ;;
+        ;; Fix the stack by pushing the original return address
+        ;;
+        mov         rcx, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        push        rcx
+
+        ClearHijackState
+endm
+
+;;
+;; Set the Thread state and wait for a GC to complete.
+;;
+;; Register state on entry:
+;;  RBX: thread pointer
+;;  
+;; Register state on exit:
+;;  RBX: thread pointer
+;;  All other registers trashed
+;;
+
+EXTERN RhpWaitForGCNoAbort : PROC
+
+WaitForGCCompletion macro
+        test        dword ptr [rbx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc
+        jnz         @F
+
+        mov         rcx, [rbx + OFFSETOF__Thread__m_pHackPInvokeTunnel]
+        call        RhpWaitForGCNoAbort
+@@:
+
+endm
+
+
+EXTERN RhpPInvokeExceptionGuard : PROC
+
+;;
+;;
+;;
+;; GC Probe Hijack targets
+;;
+;;
+NESTED_ENTRY RhpGcProbeHijackScalar, _TEXT, RhpPInvokeExceptionGuard
+        END_PROLOGUE
+        FixupHijackedCallstack
+        mov         ecx, DEFAULT_FRAME_SAVE_FLAGS
+        jmp         RhpGcProbe
+NESTED_END RhpGcProbeHijackScalar, _TEXT
+
+NESTED_ENTRY RhpGcProbeHijackObject, _TEXT, RhpPInvokeExceptionGuard
+        END_PROLOGUE
+        FixupHijackedCallstack
+        mov         ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF
+        jmp         RhpGcProbe
+NESTED_END RhpGcProbeHijackObject, _TEXT
+
+NESTED_ENTRY RhpGcProbeHijackByref, _TEXT, RhpPInvokeExceptionGuard
+        END_PROLOGUE
+        FixupHijackedCallstack
+        mov         ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF
+        jmp         RhpGcProbe
+NESTED_END RhpGcProbeHijackByref, _TEXT
+
+ifdef FEATURE_GC_STRESS
+;;
+;;
+;; GC Stress Hijack targets
+;;
+;;
+LEAF_ENTRY RhpGcStressHijackScalar, _TEXT
+        FixupHijackedCallstack
+        mov         ecx, DEFAULT_FRAME_SAVE_FLAGS
+        jmp         RhpGcStressProbe
+LEAF_END RhpGcStressHijackScalar, _TEXT
+
+LEAF_ENTRY RhpGcStressHijackObject, _TEXT
+        FixupHijackedCallstack
+        mov         ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF
+        jmp         RhpGcStressProbe
+LEAF_END RhpGcStressHijackObject, _TEXT
+
+LEAF_ENTRY RhpGcStressHijackByref, _TEXT
+        FixupHijackedCallstack
+        mov         ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF
+        jmp         RhpGcStressProbe
+LEAF_END RhpGcStressHijackByref, _TEXT
+
+;;
+;; Worker for our GC stress probes.  Do not call directly!!  
+;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. 
+;; This worker performs the GC Stress work and returns to the original return address.
+;;
+;; Register state on entry:
+;;  RDX: thread pointer
+;;  RCX: register bitmask
+;;
+;; Register state on exit:
+;;  Scratch registers, except for RAX, have been trashed
+;;  All other registers restored as they were when the hijack was first reached.
+;;
+NESTED_ENTRY RhpGcStressProbe, _TEXT
+        PUSH_PROBE_FRAME rdx, rax, 0, rcx
+        END_PROLOGUE
+
+        call        REDHAWKGCINTERFACE__STRESSGC
+
+        POP_PROBE_FRAME 0
+        ret
+NESTED_END RhpGcStressProbe, _TEXT
+
+endif ;; FEATURE_GC_STRESS
+
+EXTERN RhpThrowHwEx : PROC
+
+NESTED_ENTRY RhpGcProbe, _TEXT
+        test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
+        jnz         @f
+        ret
+@@:
+        PUSH_PROBE_FRAME rdx, rax, 0, rcx
+        END_PROLOGUE
+
+        mov         rbx, rdx
+        WaitForGCCompletion
+
+        mov         rax, [rbx + OFFSETOF__Thread__m_pHackPInvokeTunnel]
+        test        dword ptr [rax + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT
+        jnz         Abort
+        POP_PROBE_FRAME 0
+        ret
+Abort:  
+        POP_PROBE_FRAME 0
+        mov         rcx, STATUS_REDHAWK_THREAD_ABORT
+        pop         rdx         ;; return address as exception RIP
+        jmp         RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception
+
+NESTED_END RhpGcProbe, _TEXT
+
+
+ifdef FEATURE_GC_STRESS
+;; PAL_LIMITED_CONTEXT, 6 xmm regs to save, 2 scratch regs to save, plus 20h bytes for scratch space
+RhpHijackForGcStress_FrameSize equ SIZEOF__PAL_LIMITED_CONTEXT + 6*10h + 2*8h + 20h
+
+; -----------------------------------------------------------------------------------------------------------
+; RhpHijackForGcStress
+;
+; Called at the beginning of the epilog when a method is bound with /gcstress
+;
+; N.B. -- Leaf frames may not have aligned the stack or reserved any scratch space on the stack.  Also, in 
+;         order to have a resonable stacktrace in the debugger, we must use the .pushframe unwind directive.
+;
+; N.B. #2 -- The "EH jump epilog" codegen depends on rcx/rdx being preserved across this call.  We currently
+;            will trash R8-R11, but we can do better, if necessary.
+;
+NESTED_ENTRY RhpHijackForGcStress, _TEXT
+
+        lea         r10, [rsp+8]        ;; save the original RSP (prior to call)
+        mov         r11, [rsp]          ;; get the return address
+
+        ;; Align the stack
+        and         rsp, -16
+
+        ;; Push the expected "machine frame" for the unwinder to see.  All that it looks at is the RSP and  
+        ;; RIP, so we push zero for the others.
+        xor     r8, r8
+        push    r8              ;; just aligning the stack
+        push    r8              ;; SS
+        push    r10             ;; original RSP
+        push    r8              ;; EFLAGS
+        push    r8              ;; CS
+        push    r11             ;; return address
+
+        ; Tell the unwinder that the frame is there now
+        .pushframe
+
+        alloc_stack     RhpHijackForGcStress_FrameSize
+        END_PROLOGUE
+
+        ;; Save xmm scratch regs -- this is probably overkill, only the return value reg is 
+        ;; likely to be interesting at this point, but it's a bit ambiguous.
+        movdqa      [rsp + 20h + 0*10h], xmm0
+        movdqa      [rsp + 20h + 1*10h], xmm1
+        movdqa      [rsp + 20h + 2*10h], xmm2
+        movdqa      [rsp + 20h + 3*10h], xmm3
+        movdqa      [rsp + 20h + 4*10h], xmm4
+        movdqa      [rsp + 20h + 5*10h], xmm5
+
+        mov         [rsp + 20h + 6*10h + 0*8h], rcx
+        mov         [rsp + 20h + 6*10h + 1*8h], rdx
+
+        ;;
+        ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the
+        ;; IP after the call to this helper.
+        ;;
+        ;; This is very likely overkill since the calculation of the return address should only need RSP and 
+        ;; RBP, but this is test code, so I'm not too worried about efficiency.
+        ;;
+        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__IP],  r11     ; rip at callsite
+        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rsp], r10     ; rsp at callsite
+        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rbp], rbp
+        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rdi], rdi
+        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rsi], rsi
+        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rax], rax
+        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rbx], rbx
+
+        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__R12], r12
+        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__R13], r13
+        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__R14], r14
+        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__R15], r15
+
+        lea         rcx, [rsp + 20h + 6*10h + 2*8h]   ;; address of PAL_LIMITED_CONTEXT
+        call        THREAD__HIJACKFORGCSTRESS
+
+        ;; Note: we only restore the scratch registers here. No GC has occured, so restoring
+        ;; the callee saved ones is unnecessary.
+        mov         rax, [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rax]
+        mov         rcx, [rsp + 20h + 6*10h + 0*8h]
+        mov         rdx, [rsp + 20h + 6*10h + 1*8h]
+
+        ;; Restore xmm scratch regs
+        movdqa      xmm0, [rsp + 20h + 0*10h]
+        movdqa      xmm1, [rsp + 20h + 1*10h]
+        movdqa      xmm2, [rsp + 20h + 2*10h]
+        movdqa      xmm3, [rsp + 20h + 3*10h]
+        movdqa      xmm4, [rsp + 20h + 4*10h]
+        movdqa      xmm5, [rsp + 20h + 5*10h]
+
+        ;; epilog
+        mov         r10, [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rsp]
+        lea         rsp, [r10 - 8]              ;; adjust RSP to point back at the return address
+        ret
+NESTED_END RhpHijackForGcStress, _TEXT
+
+endif ;; FEATURE_GC_STRESS
+
+
+;;
+;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH 
+;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing
+;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of 
+;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the 
+;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be 
+;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the 
+;; handler in the caller.
+;; 
+;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to 
+;; complete. There are also variants for GC stress.
+;;
+;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to 
+;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack 
+;; another thread while the target thread is suspended in managed code, and this is _not_ managed code.
+;;
+;; Register state on entry:
+;;  RAX: pointer to this function (i.e., trash)
+;;  RCX: reference to the exception object.
+;;  RDX: handler address we want to jump to.
+;;  RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller.
+;;  The stack still contains the return address.
+;;  
+;; Register state on exit:
+;;  RSP: what it would be after a complete return to the caler.
+;;  RDX: TRASHED
+;;
+RTU_EH_JUMP_HELPER macro funcName, hijackFuncName, isStress, stressFuncName
+LEAF_ENTRY funcName, _TEXT
+        lea         rax, [hijackFuncName]
+        cmp         [rsp], rax
+        je          RhpGCProbeForEHJump
+
+IF isStress EQ 1
+        lea         rax, [stressFuncName]
+        cmp         [rsp], rax
+        je          RhpGCStressProbeForEHJump
+ENDIF
+
+        ;; We are not hijacked, so we can return to the handler.
+        ;; We return to keep the call/return prediction balanced.
+        mov         [rsp], rdx  ; Update the return address
+        ret
+
+LEAF_END funcName, _TEXT
+endm
+
+;; We need an instance of the helper for each possible hijack function. The binder has enough
+;; information to determine which one we need to use for any function.
+RTU_EH_JUMP_HELPER RhpEHJumpScalar,         RhpGcProbeHijackScalar, 0, 0
+RTU_EH_JUMP_HELPER RhpEHJumpObject,         RhpGcProbeHijackObject, 0, 0
+RTU_EH_JUMP_HELPER RhpEHJumpByref,          RhpGcProbeHijackByref,  0, 0
+ifdef FEATURE_GC_STRESS
+RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijackScalar, 1, RhpGcStressHijackScalar
+RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijackObject, 1, RhpGcStressHijackObject
+RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress,  RhpGcProbeHijackByref,  1, RhpGcStressHijackByref
+endif
+
+;;
+;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs.
+;;
+;; Register state on entry:
+;;  RAX: scratch
+;;  RCX: reference to the exception object.
+;;  RDX: handler address we want to jump to.
+;;  RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller.
+;;  The stack is as if we are just about to returned from the call
+;;  
+;; Register state on exit:
+;;  RAX: reference to the exception object
+;;  RCX: scratch
+;;  RDX: thread pointer
+;;
+EHJumpProbeProlog_extraStack = 1*8
+EHJumpProbeProlog macro
+        push_nonvol_reg rdx         ; save the handler address so we can jump to it later
+        mov             rax, rcx    ; move the ex object reference into rax so we can report it
+
+        ;; rdx <- GetThread(), TRASHES rcx
+        INLINE_GETTHREAD rdx, rcx
+        
+        ;; Fix the stack by patching the original return address
+        mov         rcx, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        mov         [rsp + EHJumpProbeProlog_extraStack], rcx
+
+        ClearHijackState
+
+        ; TRASHES r10
+        PUSH_PROBE_FRAME rdx, r10, EHJumpProbeProlog_extraStack, PROBE_SAVE_FLAGS_RAX_IS_GCREF
+
+        END_PROLOGUE
+endm
+
+;;
+;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the 
+;; final jump to the handler for EH jump probe funcs.
+;;
+;; Register state on entry:
+;;  RAX: reference to the exception object
+;;  RCX: scratch
+;;  RDX: scratch
+;;  
+;; Register state on exit:
+;;  RSP: correct for return to the caller
+;;  RCX: reference to the exception object
+;;  RDX: trashed
+;;
+EHJumpProbeEpilog macro
+        POP_PROBE_FRAME EHJumpProbeProlog_extraStack
+        mov         rcx, rax    ; Put the EX obj ref back into rcx for the handler.
+
+        pop         rax         ; Recover the handler address.
+        mov         [rsp], rax  ; Update the return address
+        ret
+endm
+
+;;
+;; We are hijacked for a normal GC (not GC stress), so we need to unhijcak and wait for the GC to complete.
+;;
+;; Register state on entry:
+;;  RAX: scratch
+;;  RCX: reference to the exception object.
+;;  RDX: handler address we want to jump to.
+;;  RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller.
+;;  The stack is as if we have tail called to this function (rsp points to return address).
+;;        
+;; Register state on exit:
+;;  RSP: correct for return to the caller
+;;  RBP: previous ebp frame
+;;  RCX: reference to the exception object
+;;
+NESTED_ENTRY RhpGCProbeForEHJump, _TEXT
+        EHJumpProbeProlog
+
+ifdef _DEBUG
+        ;;
+        ;; If we get here, then we have been hijacked for a real GC, and our SyncState must
+        ;; reflect that we've been requested to synchronize.
+
+        test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
+        jnz         @F
+
+        call        RhDebugBreak
+@@:
+endif ;; _DEBUG
+
+        mov         rbx, rdx
+        WaitForGCCompletion
+
+        EHJumpProbeEpilog
+
+NESTED_END RhpGCProbeForEHJump, _TEXT
+
+ifdef FEATURE_GC_STRESS
+;;
+;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper.
+;;
+;; Register state on entry:
+;;  RAX: scratch
+;;  RCX: reference to the exception object.
+;;  RDX: handler address we want to jump to.
+;;  RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller.
+;;  The stack is as if we have tail called to this function (rsp points to return address).
+;;        
+;; Register state on exit:
+;;  RSP: correct for return to the caller
+;;  RBP: previous ebp frame
+;;  RCX: reference to the exception object
+;;
+NESTED_ENTRY RhpGCStressProbeForEHJump, _TEXT
+        EHJumpProbeProlog
+
+        call        REDHAWKGCINTERFACE__STRESSGC
+
+        EHJumpProbeEpilog
+
+NESTED_END RhpGCStressProbeForEHJump, _TEXT
+
+g_pTheRuntimeInstance equ ?g_pTheRuntimeInstance@@3PEAVRuntimeInstance@@EA
+EXTERN g_pTheRuntimeInstance : QWORD
+RuntimeInstance__ShouldHijackLoopForGcStress equ ?ShouldHijackLoopForGcStress@RuntimeInstance@@QEAA_N_K@Z
+EXTERN RuntimeInstance__ShouldHijackLoopForGcStress : PROC
+
+endif ;; FEATURE_GC_STRESS
+
+EXTERN g_fGcStressStarted : DWORD
+EXTERN g_fHasFastFxsave : BYTE
+
+FXSAVE_SIZE             equ 512
+
+;; Trap to GC.
+;; Set up the P/Invoke transition frame with the return address as the safe point.
+;; All registers, both volatile and non-volatile, are preserved.
+;; The function should be called not jumped because it's expecting the return address
+NESTED_ENTRY RhpTrapToGC, _TEXT
+
+    sizeof_OutgoingScratchSpace equ 20h
+    sizeof_PInvokeFrame         equ OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs + 15*8
+    sizeof_XmmAlignPad          equ 8
+    sizeof_XmmSave              equ FXSAVE_SIZE
+    sizeof_MachineFrame         equ 6*8
+    sizeof_InitialPushedArgs    equ 2*8             ;; eflags, return value
+    sizeof_FixedFrame           equ sizeof_OutgoingScratchSpace + sizeof_PInvokeFrame + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame
+
+        ;; On the stack on entry: 
+        ;;   [rsp     ]  -> Return address 
+
+        ;; save eflags before we trash them
+        pushfq
+
+        ;; What we want to get to: 
+        ;;
+        ;;   [rsp     ]  -> outgoing scratch area
+        ;;
+        ;;   [rsp + 20]  -> m_RIP                           -------|
+        ;;   [rsp + 28]  -> m_FramePointer                         |
+        ;;   [rsp + 30]  -> m_pThread                              |
+        ;;   [rsp + 38]  -> m_Flags / m_dwAlignPad2                |
+        ;;   [rsp + 40]  -> rbx save                               |
+        ;;   [rsp + 48]  -> rsi save                               |
+        ;;   [rsp + 50]  -> rdi save                               |
+        ;;   [rsp + 58]  -> r12 save                               |
+        ;;   [rsp + 60]  -> r13 save                               |
+        ;;   [rsp + 68]  -> r14 save                               | PInvokeTransitionFrame
+        ;;   [rsp + 70]  -> r15 save                               |
+        ;;   [rsp + 78]  -> rsp save                               |
+        ;;   [rsp + 80]  -> rax save                               |
+        ;;   [rsp + 88]  -> rcx save                               |
+        ;;   [rsp + 90]  -> rdx save                               |
+        ;;   [rsp + 98]  -> r8 save                                |
+        ;;   [rsp + a0]  -> r9 save                                |
+        ;;   [rsp + a8]  -> r10 save                               |
+        ;;   [rsp + b0]  -> r11 save                        -------|
+        ;;
+        ;;   [rsp + b8]  -> [XmmAlignPad]
+        ;;
+        ;;   [rsp + c0]  -> FXSAVE area
+        ;;
+        ;;   [rsp +2c0]  | RIP      |
+        ;;   [rsp +2c8]  | CS       |
+        ;;   [rsp +2d0]  | EFLAGS   | <-- 'machine frame'
+        ;;   [rsp +2d8]  | RSP      |
+        ;;   [rsp +2e0]  | SS       |
+        ;;   [rsp +2e8]  | padding  |
+        ;;
+        ;;   [rsp +2f0]  [PSP]
+        ;;   [rsp +2f8]  [optional stack alignment]
+        ;;
+        ;;   [PSP - 10] -> eflags save
+        ;;   [PSP -  8] -> Return address
+        ;;   [PSP]      -> caller's frame
+
+        test        rsp, 0Fh
+        jz          AlreadyAligned
+
+        sub         rsp, sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 8 ; +8 to save PSP,
+        push        r11                         ; save incoming R11 into save location
+        lea         r11, [rsp + 8 + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 8 + sizeof_InitialPushedArgs]
+        jmp         PspCalculated
+
+    AlreadyAligned:
+
+        sub         rsp, sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 16 ; +8 to save RSP, +8 to re-align PSP,
+        push        r11                         ; save incoming R11 into save location
+        lea         r11, [rsp + 8 + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 16 + sizeof_InitialPushedArgs]
+        
+    PspCalculated:
+        push        r10                         ; save incoming R10 into save location
+        xor         r10d, r10d
+
+        ;;
+        ;; Populate the 'machine frame' in the diagram above.  We have only pushed up to the 'r10 save', so we have not
+        ;; yet pushed 0xA8 bytes of that diagram.
+        ;;
+        ;; [rsp + {offset-in-target-frame-layout-diagram} - {as-yet-unpushed-stack-size}]
+        mov         [rsp + 2c0h - 0a8h], r10           ; init RIP to zero
+        mov         [rsp + 2c8h - 0a8h], r10           ; init CS to zero
+        mov         [rsp + 2d0h - 0a8h], r10           ; init EFLAGS to zero
+        mov         [rsp + 2d8h - 0a8h], r11           ; save PSP in the 'machine frame'
+        mov         [rsp + 2e0h - 0a8h], r10           ; init SS to zero
+        mov         [rsp + 2f0h - 0a8h], r11           ; save PSP
+
+        .pushframe
+        .allocstack sizeof_XmmAlignPad + sizeof_XmmSave + 2*8    ;; only 2 of the regs from the PInvokeTransitionFrame are on the stack
+
+        push_vol_reg    r9
+        push_vol_reg    r8
+        push_vol_reg    rdx
+        push_vol_reg    rcx
+        push_vol_reg    rax
+        push_vol_reg    r11         ; PSP gets saved into the PInvokeTransitionFrame
+        push_nonvol_reg r15
+        push_nonvol_reg r14
+        push_nonvol_reg r13
+        push_nonvol_reg r12
+        push_nonvol_reg rdi
+        push_nonvol_reg rsi
+        push_nonvol_reg rbx
+        push_vol_reg    PROBE_SAVE_FLAGS_EVERYTHING     ; m_Flags / m_dwAlignPad2
+
+        ;; rdx <- GetThread(), TRASHES rcx
+        INLINE_GETTHREAD rdx, rcx
+
+        push_vol_reg    rdx                             ; m_pThread
+        push_nonvol_reg rbp                             ; m_FramePointer
+        push_vol_reg    r10                             ; m_RIP
+
+        alloc_stack     sizeof_OutgoingScratchSpace
+        END_PROLOGUE
+
+        mov         rbx, r11    ; put PSP into RBX
+        mov         rsi, rdx    ; put Thread* into RSI
+
+        ; RBX is PSP
+        ; RSI is Thread*
+
+        fxsave      [rsp + 0c0h]
+
+        cmp         [g_fHasFastFxsave], 0   ; fast fxsave won't save the xmm registers, so we must do it
+        jz          DontSaveXmmAgain
+
+        ;; 0C0h -> offset of FXSAVE area
+        ;; 0A0h -> offset of xmm0 save area within the FXSAVE area
+        movdqa      [rsp + 0c0h + 0a0h +  0*10h], xmm0
+        movdqa      [rsp + 0c0h + 0a0h +  1*10h], xmm1
+        movdqa      [rsp + 0c0h + 0a0h +  2*10h], xmm2
+        movdqa      [rsp + 0c0h + 0a0h +  3*10h], xmm3
+        movdqa      [rsp + 0c0h + 0a0h +  4*10h], xmm4
+        movdqa      [rsp + 0c0h + 0a0h +  5*10h], xmm5
+        movdqa      [rsp + 0c0h + 0a0h +  6*10h], xmm6
+        movdqa      [rsp + 0c0h + 0a0h +  7*10h], xmm7
+        movdqa      [rsp + 0c0h + 0a0h +  8*10h], xmm8
+        movdqa      [rsp + 0c0h + 0a0h +  9*10h], xmm9
+        movdqa      [rsp + 0c0h + 0a0h + 10*10h], xmm10
+        movdqa      [rsp + 0c0h + 0a0h + 11*10h], xmm11
+        movdqa      [rsp + 0c0h + 0a0h + 12*10h], xmm12
+        movdqa      [rsp + 0c0h + 0a0h + 13*10h], xmm13
+        movdqa      [rsp + 0c0h + 0a0h + 14*10h], xmm14
+        movdqa      [rsp + 0c0h + 0a0h + 15*10h], xmm15
+
+DontSaveXmmAgain:
+        mov         rax, [rbx - 8]
+        mov         [rsp + 2c0h], rax       ; save return address into 'machine frame'
+        mov         [rsp +  20h], rax       ; save return address into PInvokeTransitionFrame
+
+        ; Early out if GC stress is currently suppressed. Do this after we have computed the real address to
+        ; return to but before we link the transition frame onto m_pHackPInvokeTunnel (because hitting this
+        ; condition implies we're running restricted callouts during a GC itself and we could end up
+        ; overwriting a co-op frame set by the code that caused the GC in the first place, e.g. a GC.Collect
+        ; call).
+        test        dword ptr [rsi + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc
+        jnz         DoneWaitingForGc
+
+        ; link the frame into the Thread
+        lea         rcx, [rsp + sizeof_OutgoingScratchSpace]    ; rcx <- PInvokeTransitionFrame*
+        mov         [rsi + OFFSETOF__Thread__m_pHackPInvokeTunnel], rcx
+
+        ;;
+        ;; Unhijack this thread, if necessary.
+        ;;
+        INLINE_THREAD_UNHIJACK  rsi, rax, rcx       ;; trashes RAX, RCX
+
+ifdef FEATURE_GC_STRESS
+        xor         eax, eax
+        cmp         [g_fGcStressStarted], eax
+        jz          @F
+
+        mov         rdx, [rsp + 2c0h]
+        mov         rcx, [g_pTheRuntimeInstance]
+        call        RuntimeInstance__ShouldHijackLoopForGcStress
+        cmp         al, 0
+        je          @F
+
+        call        REDHAWKGCINTERFACE__STRESSGC
+@@:
+endif ;; FEATURE_GC_STRESS
+
+        lea         rcx, [rsp + sizeof_OutgoingScratchSpace]    ; calculate PInvokeTransitionFrame pointer
+        call        RhpWaitForGCNoAbort
+
+    DoneWaitingForGc:
+
+        fxrstor     [rsp + 0c0h]
+
+        cmp         [g_fHasFastFxsave], 0
+        jz          DontRestoreXmmAgain
+
+        movdqa      xmm0 , [rsp + 0c0h + 0a0h +  0*10h]
+        movdqa      xmm1 , [rsp + 0c0h + 0a0h +  1*10h]
+        movdqa      xmm2 , [rsp + 0c0h + 0a0h +  2*10h]
+        movdqa      xmm3 , [rsp + 0c0h + 0a0h +  3*10h]
+        movdqa      xmm4 , [rsp + 0c0h + 0a0h +  4*10h]
+        movdqa      xmm5 , [rsp + 0c0h + 0a0h +  5*10h]
+        movdqa      xmm6 , [rsp + 0c0h + 0a0h +  6*10h]
+        movdqa      xmm7 , [rsp + 0c0h + 0a0h +  7*10h]
+        movdqa      xmm8 , [rsp + 0c0h + 0a0h +  8*10h]
+        movdqa      xmm9 , [rsp + 0c0h + 0a0h +  9*10h]
+        movdqa      xmm10, [rsp + 0c0h + 0a0h + 10*10h]
+        movdqa      xmm11, [rsp + 0c0h + 0a0h + 11*10h]
+        movdqa      xmm12, [rsp + 0c0h + 0a0h + 12*10h]
+        movdqa      xmm13, [rsp + 0c0h + 0a0h + 13*10h]
+        movdqa      xmm14, [rsp + 0c0h + 0a0h + 14*10h]
+        movdqa      xmm15, [rsp + 0c0h + 0a0h + 15*10h]
+
+DontRestoreXmmAgain:
+        add         rsp, sizeof_OutgoingScratchSpace
+        mov         eax, [rsp + OFFSETOF__PInvokeTransitionFrame__m_Flags]
+        test        eax, PTFF_THREAD_ABORT
+        pop         rax                     ; m_RIP
+        pop         rbp                     ; m_FramePointer
+        pop         rax                     ; m_pThread
+        pop         rax                     ; m_Flags / m_dwAlign2
+        pop         rbx
+        pop         rsi
+        pop         rdi
+        pop         r12
+        pop         r13
+        pop         r14
+        pop         r15
+        pop         rax                     ; RSP
+        pop         rax                     ; RAX save
+        pop         rcx                     
+        pop         rdx
+        pop         r8
+        pop         r9
+        pop         r10
+        pop         r11
+
+        ;; restore PSP
+        ;; 2F0h -> offset of the PSP area
+        ;; 0B8h -> offset of the end of the integer register area which is already popped
+        mov         rsp, [rsp + 2f0h - 0b8h]    
+
+        ;; RSP is PSP at this point and the stack looks like this:
+        ;;   [PSP - 10] -> eflags save
+        ;;   [PSP -  8] -> return address
+        ;;   [PSP]      -> caller's frame
+        ;;
+        ;; The final step is to restore eflags and return 
+        
+        lea         rsp, [rsp - 10h]
+        jz          @f          ;; result of the test instruction before the pops above
+        popfq                   ;; restore flags
+        mov         rcx, STATUS_REDHAWK_THREAD_ABORT
+        pop         rdx         ;; return address as exception RIP
+        jmp         RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception
+
+@@:
+        popfq               ;; restore flags
+        ret
+
+NESTED_END RhpTrapToGC, _TEXT
+
+ifdef FEATURE_GC_STRESS
+;;
+;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this.
+;;
+LEAF_ENTRY RhpSuppressGcStress, _TEXT
+
+        INLINE_GETTHREAD    rax, r10
+   lock or          dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress
+        ret
+
+LEAF_END RhpSuppressGcStress, _TEXT
+endif ;; FEATURE_GC_STRESS
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/GetThread.asm b/src/coreclr/src/nativeaot/Runtime/amd64/GetThread.asm
new file mode 100644
index 0000000000000..409ba3dafd99d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/GetThread.asm
@@ -0,0 +1,27 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include asmmacros.inc
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpGetThread
+;;
+;;
+;; INPUT: 
+;;
+;; OUTPUT: RAX: Thread pointer
+;;
+;; TRASHES: R10
+;;
+;; MUST PRESERVE ARGUMENT REGISTERS
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+LEAF_ENTRY RhpGetThread, _TEXT
+        ;; rax = GetThread(), TRASHES r10
+        INLINE_GETTHREAD rax, r10
+        ret
+LEAF_END RhpGetThread, _TEXT
+
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/Interlocked.S b/src/coreclr/src/nativeaot/Runtime/amd64/Interlocked.S
new file mode 100644
index 0000000000000..bfb577365276e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/Interlocked.S
@@ -0,0 +1,25 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include <unixasmmacros.inc>
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg32AVLocation
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpLockCmpXchg32, _TEXT
+    mov             rax, rdx
+ALTERNATE_ENTRY RhpLockCmpXchg32AVLocation
+    lock cmpxchg    [rdi], esi
+    ret
+LEAF_END RhpLockCmpXchg32, _TEXT
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg64AVLocation
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpLockCmpXchg64, _TEXT
+    mov             rax, rdx
+ALTERNATE_ENTRY RhpLockCmpXchg64AVLocation
+    lock cmpxchg    [rdi], rsi
+    ret
+LEAF_END RhpLockCmpXchg64, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/Interlocked.asm b/src/coreclr/src/nativeaot/Runtime/amd64/Interlocked.asm
new file mode 100644
index 0000000000000..f7b9bd1be7772
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/Interlocked.asm
@@ -0,0 +1,26 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmMacros.inc
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg32AVLocation
+;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpLockCmpXchg32, _TEXT
+    mov             rax, r8
+ALTERNATE_ENTRY RhpLockCmpXchg32AVLocation
+    lock cmpxchg    [rcx], edx
+    ret
+LEAF_END RhpLockCmpXchg32, _TEXT
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg64AVLocation
+;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpLockCmpXchg64, _TEXT
+    mov             rax, r8
+ALTERNATE_ENTRY RhpLockCmpXchg64AVLocation
+    lock cmpxchg    [rcx], rdx
+    ret
+LEAF_END RhpLockCmpXchg64, _TEXT
+
+    end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/InteropThunksHelpers.S b/src/coreclr/src/nativeaot/Runtime/amd64/InteropThunksHelpers.S
new file mode 100644
index 0000000000000..66454d5466fa2
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/InteropThunksHelpers.S
@@ -0,0 +1,45 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+#define POINTER_SIZE 8
+
+LEAF_ENTRY RhCommonStub, _TEXT
+
+    PUSH_ARGUMENT_REGISTERS
+    push_register    r10
+
+    alloc_stack    SIZEOF_FP_REGS 
+    SAVE_FLOAT_ARGUMENT_REGISTERS 0 
+
+    INLINE_GET_TLS_VAR  tls_thunkData
+
+    RESTORE_FLOAT_ARGUMENT_REGISTERS 0 
+    free_stack    SIZEOF_FP_REGS
+
+    pop_register    r10
+    POP_ARGUMENT_REGISTERS
+
+    mov    r11, [r10]
+    mov    qword ptr [rax], r11
+
+    mov    rax, [r10 + POINTER_SIZE]
+    jmp    rax
+LEAF_END RhCommonStub, _TEXT
+
+
+LEAF_ENTRY RhGetCommonStubAddress, _TEXT
+    lea rax, [rip + C_FUNC(RhCommonStub)]
+    ret 
+LEAF_END RhGetCommonStubAddress, _TEXT
+
+
+LEAF_ENTRY RhGetCurrentThunkContext, _TEXT
+
+    INLINE_GET_TLS_VAR  tls_thunkData
+
+    mov    rax, qword ptr [rax]   
+    ret
+LEAF_END RhGetCurrentThunkContext, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/InteropThunksHelpers.asm b/src/coreclr/src/nativeaot/Runtime/amd64/InteropThunksHelpers.asm
new file mode 100644
index 0000000000000..e1107717d4c70
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/InteropThunksHelpers.asm
@@ -0,0 +1,97 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+
+;; -----------------------------------------------------------------------------------------------------------
+;;#include "asmmacros.inc"
+;; -----------------------------------------------------------------------------------------------------------
+
+LEAF_ENTRY macro Name, Section
+    Section segment para 'CODE'
+    align   16
+    public  Name
+    Name    proc
+endm
+
+LEAF_END macro Name, Section
+    Name    endp
+    Section ends
+endm
+
+;  - TAILCALL_RAX: ("jmp rax") should be used for tailcalls, this emits an instruction 
+;            sequence which is recognized by the unwinder as a valid epilogue terminator
+TAILJMP_RAX TEXTEQU <DB 048h, 0FFh, 0E0h>
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  DATA SECTIONS  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+_tls_array                          equ 58h     ;; offsetof(TEB, ThreadLocalStoragePointer)
+
+POINTER_SIZE                        equ 08h
+
+;; TLS variables
+_TLS    SEGMENT ALIAS(".tls$")
+    ThunkParamSlot  DQ 0000000000000000H
+_TLS    ENDS
+
+EXTRN   _tls_index:DWORD
+
+
+;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;
+;; RhCommonStub
+;;
+LEAF_ENTRY RhCommonStub, _TEXT
+        ;; There are arbitrary callers passing arguments with arbitrary signatures.
+        ;; Custom calling convention:
+        ;;      r10: pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers)
+
+        ;; Save context data into the ThunkParamSlot thread-local variable
+        ;; A pointer to the delegate and function pointer for open static delegate should have been saved in the thunk's context cell during thunk allocation
+        mov     [rsp + 8], rcx                                     ;; Save rcx in a home scratch location. Pushing the 
+                                                                   ;; register on the stack will break callstack unwind
+        mov     ecx, [_tls_index]
+        mov     r11, gs:[_tls_array]
+        mov     rax, [r11 + rcx * POINTER_SIZE]
+
+        ;; rax = base address of TLS data
+        ;; r10 = address of context cell in thunk's data
+        ;; r11 = trashed
+
+        ;; store thunk address in thread static
+        mov     r11, [r10]
+        mov     ecx, SECTIONREL ThunkParamSlot
+        mov     [rax + rcx], r11                 ;;   ThunkParamSlot <- context slot data
+
+        mov     rcx, [rsp + 8]                                     ;; Restore rcx
+
+        ;; jump to the target
+        mov     rax, [r10 + POINTER_SIZE]
+        TAILJMP_RAX
+LEAF_END RhCommonStub, _TEXT
+
+
+;;
+;; IntPtr RhGetCommonStubAddress()
+;;
+LEAF_ENTRY RhGetCommonStubAddress, _TEXT
+        lea     rax, [RhCommonStub]
+        ret
+LEAF_END RhGetCommonStubAddress, _TEXT
+
+
+;;
+;; IntPtr RhGetCurrentThunkContext()
+;;
+LEAF_ENTRY RhGetCurrentThunkContext, _TEXT
+        mov     r10d, [_tls_index]
+        mov     r11, gs:[_tls_array]
+        mov     r10, [r11 + r10 * POINTER_SIZE]
+        mov     r8d, SECTIONREL ThunkParamSlot
+        mov     rax, [r10 + r8]                 ;;   rax <- ThunkParamSlot
+        ret
+LEAF_END RhGetCurrentThunkContext, _TEXT
+
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/MemClrForGC.asm b/src/coreclr/src/nativeaot/Runtime/amd64/MemClrForGC.asm
new file mode 100644
index 0000000000000..047467aa7ed4a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/MemClrForGC.asm
@@ -0,0 +1,99 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmMacros.inc
+
+
+LEAF_ENTRY memclr_for_gc, _TEXT
+
+;    x64 version
+
+;   we get the following parameters
+;   rcx = destination address
+;   rdx = size to clear
+
+    ; save rdi - this should be faster than a push
+    mov     r11,rdi
+
+    xor     eax, eax
+
+    ; check alignment of destination
+    test    cl,7
+    jnz     alignDest
+alignDone:
+    ; now destination is qword aligned
+    ; move it to rdi for rep stos
+    mov     rdi,rcx
+
+    ; compute number of bytes to clear non-temporally
+    ; we wish to clear the first 8k or so with rep stos,
+    ; anything above that non-temporally
+    
+    xor     r8,r8
+    cmp     rdx,8*1024
+    jbe     noNonTempClear
+
+    ; compute the number of bytes above 8k
+    ; and round down to a multiple of 64
+    mov     r8,rdx
+    sub     r8,8*1024
+    and     r8,not 63
+
+    ; compute remaining size to clear temporally
+    sub     rdx,r8
+
+noNonTempClear:
+
+    ; do the temporal clear
+    mov     rcx,rdx
+    shr     rcx,3
+    rep     stosq
+
+    ; do the non-temporal clear
+    test    r8,r8
+    jne     nonTempClearLoop
+
+nonTempClearDone:
+
+    ; clear any remaining bytes
+    mov     rcx,rdx
+    and     rcx,7
+    rep     stosb
+
+    ; restore rdi
+    mov     rdi,r11
+
+    ret
+
+    ; this is the infrequent case, hence out of line
+nonTempClearLoop:
+    movnti  [rdi+ 0],rax
+    movnti  [rdi+ 8],rax
+    movnti  [rdi+16],rax
+    movnti  [rdi+24],rax
+
+    movnti  [rdi+32],rax
+    movnti  [rdi+40],rax
+    movnti  [rdi+48],rax
+    movnti  [rdi+56],rax
+
+    add     rdi,64
+    sub     r8,64
+    ja      nonTempClearLoop
+    jmp     nonTempClearDone
+
+alignDest:
+    test    rdx,rdx
+    je      alignDone
+alignLoop:
+    mov     [rcx],al
+    add     rcx,1
+    sub     rdx,1
+    jz      alignDone
+    test    cl,7
+    jnz     alignLoop
+    jmp     alignDone
+
+LEAF_END memclr_for_gc, _TEXT
+
+    end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/MiscStubs.S b/src/coreclr/src/nativeaot/Runtime/amd64/MiscStubs.S
new file mode 100644
index 0000000000000..a335b997459d3
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/MiscStubs.S
@@ -0,0 +1,46 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include <unixasmmacros.inc>
+
+// The following helper will access ("probe") a word on each page of the stack
+// starting with the page right beneath rsp down to the one pointed to by r11.
+// The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame.
+// The call to the helper will be emitted by JIT in the function/funclet prolog when large (larger than 0x3000 bytes) stack frame is required.
+//
+// NOTE: On Linux we must advance the stack pointer as we probe - it is not allowed to access 65535 bytes below rsp.
+// Since this helper will modify a value of rsp - it must establish the frame pointer.
+//
+// See also https://github.com/dotnet/runtime/issues/9899 for more information.
+
+#define PAGE_SIZE 0x1000
+
+NESTED_ENTRY RhpStackProbe, _TEXT, NoHandler
+        // On entry:
+        //   r11 - points to the lowest address on the stack frame being allocated (i.e. [InitialSp - FrameSize])
+        //   rsp - points to some byte on the last probed page
+        // On exit:
+        //   r11 - is preserved
+        //
+        // NOTE: this helper will probe at least one page below the one pointed by rsp.
+
+        push_nonvol_reg rbp
+        mov     rbp, rsp
+        set_cfa_register rbp, 16
+
+    END_PROLOGUE
+
+        and     rsp, -PAGE_SIZE        // rsp points to the **lowest address** on the last probed page
+                                       // This is done to make the following loop end condition simpler.
+
+LOCAL_LABEL(ProbeLoop):
+        sub     rsp, PAGE_SIZE         // rsp points to the lowest address of the **next page** to probe
+        test    dword ptr [rsp], eax   // rsp points to the lowest address on the **last probed** page
+        cmp     rsp, r11
+        jg      LOCAL_LABEL(ProbeLoop) // if (rsp > r11), then we need to probe at least one more page.
+
+        RESET_FRAME_WITH_RBP
+        ret
+
+NESTED_END RhpStackProbe, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/MiscStubs.asm b/src/coreclr/src/nativeaot/Runtime/amd64/MiscStubs.asm
new file mode 100644
index 0000000000000..16e00fb53f944
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/MiscStubs.asm
@@ -0,0 +1,276 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmMacros.inc
+
+EXTERN GetClasslibCCtorCheck        : PROC
+EXTERN memcpy                       : PROC
+EXTERN memcpyGCRefs                 : PROC
+EXTERN memcpyGCRefsWithWriteBarrier : PROC
+EXTERN memcpyAnyWithWriteBarrier    : PROC
+
+;;
+;; Checks whether the static class constructor for the type indicated by the context structure has been
+;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will
+;; execute the cctor and update the context to record this fact.
+;;
+;;  Input:
+;;      rax : Address of StaticClassConstructionContext structure
+;;
+;;  Output:
+;;      All volatile registers and the condition codes may be trashed.
+;;
+LEAF_ENTRY RhpCheckCctor, _TEXT
+
+        ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the
+        ;; initial state is 0 and the remaining values are reserved for classlib use). This check is
+        ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for
+        ;; synchronizing with other threads and re-checking the value.
+        cmp     dword ptr [rax + OFFSETOF__StaticClassConstructionContext__m_initialized], 1
+        jne     RhpCheckCctor__SlowPath
+        ret
+RhpCheckCctor__SlowPath:
+        mov     rdx, rax
+        jmp     RhpCheckCctor2 ; Tail-call the check cctor helper that can actually call the cctor
+LEAF_END RhpCheckCctor, _TEXT
+
+;;
+;; Checks whether the static class constructor for the type indicated by the context structure has been
+;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will
+;; execute the cctor and update the context to record this fact.
+;;
+;;  Input:
+;;      rax : Value that must be preserved in this register across the cctor check.
+;;      rdx : Address of StaticClassConstructionContext structure
+;;
+;;  Output:
+;;      All volatile registers other than rax may be trashed and the condition codes may also be trashed.
+;;
+LEAF_ENTRY RhpCheckCctor2, _TEXT
+
+        ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the
+        ;; initial state is 0 and the remaining values are reserved for classlib use). This check is
+        ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for
+        ;; synchronizing with other threads and re-checking the value.
+        cmp     dword ptr [rdx + OFFSETOF__StaticClassConstructionContext__m_initialized], 1
+        jne     RhpCheckCctor2__SlowPath
+        ret
+
+LEAF_END RhpCheckCctor2, _TEXT
+
+;;
+;; Slow path helper for RhpCheckCctor2.
+;;
+;;  Input:
+;;      rax : Value that must be preserved in this register across the cctor check.
+;;      rdx : Address of StaticClassConstructionContext structure
+;;
+;;  Output:
+;;      All volatile registers other than rax may be trashed and the condition codes may also be trashed.
+;;
+NESTED_ENTRY RhpCheckCctor2__SlowPath, _TEXT
+
+RhpCheckCctor2__SlowPath_FrameSize equ 20h + 10h + 8h ;; Scratch space + storage to save off rax/rdx value + align stack 
+
+        alloc_stack RhpCheckCctor2__SlowPath_FrameSize
+        save_reg_postrsp    rdx, 20h
+        save_reg_postrsp    rax, 28h
+
+        END_PROLOGUE
+
+        ;; Call a C++ helper to retrieve the address of the classlib callback.
+
+        ;; The caller's return address is passed as the argument to the helper; it's an address in the module
+        ;; and is used by the helper to locate the classlib.
+        mov     rcx, [rsp + RhpCheckCctor2__SlowPath_FrameSize]
+
+        call    GetClasslibCCtorCheck
+
+        ;; Rax now contains the address of the classlib method to call. The single argument is the context
+        ;; structure address currently in stashed on the stack. Clean up and tail call to the classlib
+        ;; callback so we're not on the stack should a GC occur (so we don't need to worry about transition
+        ;; frames).
+        mov     rdx, [rsp + 20h]
+        mov     rcx, [rsp + 28h]
+        add     rsp, RhpCheckCctor2__SlowPath_FrameSize
+        ;; Tail-call the classlib cctor check function. Note that the incoming rax value is moved to rcx
+        ;; and the classlib cctor check function is required to return that value, so that rax is preserved
+        ;; across a RhpCheckCctor call.
+        TAILJMP_RAX
+
+NESTED_END RhpCheckCctor2__SlowPath, _TEXT
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* RhpCopyMultibyteNoGCRefs(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;;
+LEAF_ENTRY RhpCopyMultibyteNoGCRefs, _TEXT
+
+        ; rcx       dest
+        ; rdx       src
+        ; r8        count
+
+        test        r8, r8              ; check for a zero-length copy
+        jz          NothingToCopy
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsDestAVLocation
+        cmp         byte ptr [rcx], 0   
+ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsSrcAVLocation
+        cmp         byte ptr [rdx], 0
+
+        ; tail-call to plain-old-memcpy
+        jmp         memcpy
+
+NothingToCopy:
+        mov         rax, rcx            ; return dest
+        ret
+
+LEAF_END RhpCopyMultibyteNoGCRefs, _TEXT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* RhpCopyMultibyte(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;;
+LEAF_ENTRY RhpCopyMultibyte, _TEXT
+
+        ; rcx       dest
+        ; rdx       src
+        ; r8        count
+
+        test        r8, r8              ; check for a zero-length copy
+        jz          NothingToCopy
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+ALTERNATE_ENTRY RhpCopyMultibyteDestAVLocation
+        cmp         byte ptr [rcx], 0
+ALTERNATE_ENTRY RhpCopyMultibyteSrcAVLocation
+        cmp         byte ptr [rdx], 0
+
+        ; tail-call to the GC-safe memcpy implementation
+        jmp         memcpyGCRefs
+
+NothingToCopy:
+        mov         rax, rcx            ; return dest
+        ret
+
+LEAF_END RhpCopyMultibyte, _TEXT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* RhpCopyMultibyteWithWriteBarrier(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;; Runs a card table update via RhpBulkWriteBarrier after the copy
+;;
+LEAF_ENTRY RhpCopyMultibyteWithWriteBarrier, _TEXT
+
+        ; rcx       dest
+        ; rdx       src
+        ; r8        count
+
+        test        r8, r8              ; check for a zero-length copy
+        jz          NothingToCopy
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierDestAVLocation
+        cmp         byte ptr [rcx], 0
+ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierSrcAVLocation
+        cmp         byte ptr [rdx], 0
+
+        ; tail-call to the GC-safe memcpy implementation
+        jmp         memcpyGCRefsWithWriteBarrier
+
+NothingToCopy:
+        mov         rax, rcx            ; return dest
+        ret
+
+LEAF_END RhpCopyMultibyteWithWriteBarrier, _TEXT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* RhpCopyAnyWithWriteBarrier(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;; Runs a card table update via RhpBulkWriteBarrier after the copy if the copy may contain GC pointers
+;;
+LEAF_ENTRY RhpCopyAnyWithWriteBarrier, _TEXT
+
+        ; rcx       dest
+        ; rdx       src
+        ; r8        count
+
+        test        r8, r8              ; check for a zero-length copy
+        jz          NothingToCopy
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierDestAVLocation
+        cmp         byte ptr [rcx], 0
+ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierSrcAVLocation
+        cmp         byte ptr [rdx], 0
+
+        ; tail-call to the GC-safe memcpy implementation
+        jmp         memcpyAnyWithWriteBarrier
+
+NothingToCopy:
+        mov         rax, rcx            ; return dest
+        ret
+
+LEAF_END RhpCopyAnyWithWriteBarrier, _TEXT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; The following helper will access ("probe") a word on each page of the stack
+; starting with the page right beneath rsp down to the one pointed to by r11.
+; The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame.
+; The call to the helper will be emitted by JIT in the function/funclet prolog when large (larger than 0x3000 bytes) stack frame is required.
+;
+; NOTE: this helper will NOT modify a value of rsp and can be defined as a leaf function.
+
+PAGE_SIZE equ 1000h
+
+LEAF_ENTRY RhpStackProbe, _TEXT
+        ; On entry:
+        ;   r11 - points to the lowest address on the stack frame being allocated (i.e. [InitialSp - FrameSize])
+        ;   rsp - points to some byte on the last probed page
+        ; On exit:
+        ;   rax - is not preserved
+        ;   r11 - is preserved
+        ;
+        ; NOTE: this helper will probe at least one page below the one pointed by rsp.
+
+        mov     rax, rsp               ; rax points to some byte on the last probed page
+        and     rax, -PAGE_SIZE        ; rax points to the **lowest address** on the last probed page
+                                       ; This is done to make the following loop end condition simpler.
+
+ProbeLoop:
+        sub     rax, PAGE_SIZE         ; rax points to the lowest address of the **next page** to probe
+        test    dword ptr [rax], eax   ; rax points to the lowest address on the **last probed** page
+        cmp     rax, r11
+        jg      ProbeLoop              ; If (rax > r11), then we need to probe at least one more page.
+
+        ret
+
+LEAF_END RhpStackProbe, _TEXT
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/PInvoke.S b/src/coreclr/src/nativeaot/Runtime/amd64/PInvoke.S
new file mode 100644
index 0000000000000..0048119214508
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/PInvoke.S
@@ -0,0 +1,58 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+//
+// RhpPInvoke
+//
+// IN:  RDI: address of pinvoke frame
+//
+// This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite.
+// The codegenerator must treat the callsite of this helper as GC triggering and generate the GC info for it.
+// Also, the codegenerator must ensure that there are no live GC references in callee saved registers.
+//
+NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler
+        push_nonvol_reg rbx
+        mov         rbx, rdi
+
+        // RAX = GetThread()
+        INLINE_GETTHREAD
+
+        mov         r11, [rsp + 0x8]            // r11 <- return address
+        mov         qword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_pThread], rax
+        mov         qword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_FramePointer], rbp
+        mov         qword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_RIP], r11
+
+        lea         r11, [rsp + 0x10]           // r11 <- caller SP
+        mov         dword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_SAVE_RSP
+        mov         qword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs], r11
+
+        mov         qword ptr [rax + OFFSETOF__Thread__m_pTransitionFrame], rbx
+
+        test        dword ptr [C_VAR(RhpTrapThreads)], TrapThreadsFlags_TrapThreads
+        pop_nonvol_reg rbx
+        jnz         0f                  // forward branch - predicted not taken
+        ret
+0:
+        jmp         C_FUNC(RhpWaitForSuspend2)
+NESTED_END RhpPInvoke, _TEXT
+
+
+//
+// RhpPInvokeReturn
+//
+// IN:  RDI: address of pinvoke frame
+//
+LEAF_ENTRY RhpPInvokeReturn, _TEXT
+        mov         rsi, [rdi + OFFSETOF__PInvokeTransitionFrame__m_pThread]
+        mov         qword ptr [rsi + OFFSETOF__Thread__m_pTransitionFrame], 0
+        cmp         dword ptr [C_VAR(RhpTrapThreads)], TrapThreadsFlags_None
+        jne         0f                  // forward branch - predicted not taken
+        ret
+0:
+        // passing transition frame pointer in rdi
+        jmp         C_FUNC(RhpWaitForGC2)
+LEAF_END RhpPInvokeReturn, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/PInvoke.asm b/src/coreclr/src/nativeaot/Runtime/amd64/PInvoke.asm
new file mode 100644
index 0000000000000..c9f93df834a20
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/PInvoke.asm
@@ -0,0 +1,329 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include asmmacros.inc
+
+extern RhpReversePInvokeBadTransition : proc
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpWaitForSuspend -- rare path for RhpPInvoke and RhpReversePInvokeReturn
+;;
+;;
+;; INPUT: none
+;;
+;; TRASHES: none
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+NESTED_ENTRY RhpWaitForSuspend, _TEXT
+        push_vol_reg    rax
+        alloc_stack     60h
+
+        ; save the arg regs in the caller's scratch space
+        save_reg_postrsp        rcx, 70h
+        save_reg_postrsp        rdx, 78h
+        save_reg_postrsp        r8, 80h
+        save_reg_postrsp        r9, 88h
+
+        ; save the FP arg regs in our stack frame
+        save_xmm128_postrsp     xmm0, (20h + 0*10h)
+        save_xmm128_postrsp     xmm1, (20h + 1*10h)
+        save_xmm128_postrsp     xmm2, (20h + 2*10h)
+        save_xmm128_postrsp     xmm3, (20h + 3*10h)
+
+        END_PROLOGUE
+
+        test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
+        jz          NoWait
+
+        call        RhpWaitForSuspend2
+
+NoWait:
+        movdqa      xmm0, [rsp + 20h + 0*10h]
+        movdqa      xmm1, [rsp + 20h + 1*10h]
+        movdqa      xmm2, [rsp + 20h + 2*10h]
+        movdqa      xmm3, [rsp + 20h + 3*10h]
+
+        mov         rcx, [rsp + 70h]
+        mov         rdx, [rsp + 78h]
+        mov         r8,  [rsp + 80h]
+        mov         r9,  [rsp + 88h]
+
+        add         rsp, 60h
+        pop         rax
+        ret
+
+NESTED_END RhpWaitForSuspend, _TEXT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpWaitForGCNoAbort -- rare path for RhpPInvokeReturn
+;;
+;;
+;; INPUT: RCX: transition frame
+;;
+;; TRASHES: RCX, RDX, R8, R9, R10, R11
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+NESTED_ENTRY RhpWaitForGCNoAbort, _TEXT
+        push_vol_reg    rax                 ; don't trash the integer return value
+        alloc_stack     30h
+        movdqa          [rsp + 20h], xmm0   ; don't trash the FP return value
+        END_PROLOGUE
+
+        mov         rdx, [rcx + OFFSETOF__PInvokeTransitionFrame__m_pThread]
+
+        test        dword ptr [rdx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc
+        jnz         Done
+
+        ; passing transition frame pointer in rcx
+        call        RhpWaitForGC2
+
+Done:
+        movdqa      xmm0, [rsp + 20h]
+        add         rsp, 30h
+        pop         rax
+        ret
+
+NESTED_END RhpWaitForGCNoAbort, _TEXT
+
+EXTERN RhpThrowHwEx : PROC
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpWaitForGC -- rare path for RhpPInvokeReturn
+;;
+;;
+;; INPUT: RCX: transition frame
+;;
+;; TRASHES: RCX, RDX, R8, R9, R10, R11
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+NESTED_ENTRY RhpWaitForGC, _TEXT
+        push_nonvol_reg rbx
+        END_PROLOGUE
+
+        mov         rbx, rcx
+
+        test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
+        jz          NoWait
+
+        call        RhpWaitForGCNoAbort
+NoWait:
+        test        [RhpTrapThreads], TrapThreadsFlags_AbortInProgress
+        jz          Done
+        test        dword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT
+        jz          Done
+
+        mov         rcx, STATUS_REDHAWK_THREAD_ABORT
+        pop         rbx
+        pop         rdx                 ; return address as exception RIP
+        jmp         RhpThrowHwEx        ; Throw the ThreadAbortException as a special kind of hardware exception
+
+Done:
+        pop         rbx
+        ret
+
+NESTED_END RhpWaitForGC, _TEXT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpReversePInvoke
+;;
+;;
+;; INCOMING:  RAX -- address of reverse pinvoke frame
+;;                          0: save slot for previous M->U transition frame
+;;                          8: save slot for thread pointer to avoid re-calc in epilog sequence
+;;
+;; PRESERVES: RCX, RDX, R8, R9 -- need to preserve these because the caller assumes they aren't trashed
+;;
+;; TRASHES:   RAX, R10, R11
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+LEAF_ENTRY RhpReversePInvoke, _TEXT
+        ;; R10 = GetThread(), TRASHES R11
+        INLINE_GETTHREAD r10, r11
+        mov         [rax + 8], r10          ; save thread pointer for RhpReversePInvokeReturn
+
+        test        dword ptr [r10 + OFFSETOF__Thread__m_ThreadStateFlags], TSF_Attached
+        jz          AttachThread
+
+        ;;
+        ;; Check for the correct mode.  This is accessible via various odd things that we cannot completely 
+        ;; prevent such as :
+        ;;     1) Registering a reverse pinvoke entrypoint as a vectored exception handler
+        ;;     2) Performing a managed delegate invoke on a reverse pinvoke delegate.
+        ;;
+        cmp         qword ptr [r10 + OFFSETOF__Thread__m_pTransitionFrame], 0
+        je          CheckBadTransition
+
+        ; rax: reverse pinvoke frame
+        ; r10: thread
+
+        ; Save previous TransitionFrame prior to making the mode transition so that it is always valid 
+        ; whenever we might attempt to hijack this thread.
+        mov         r11, [r10 + OFFSETOF__Thread__m_pTransitionFrame]
+        mov         [rax], r11
+
+        mov         qword ptr [r10 + OFFSETOF__Thread__m_pTransitionFrame], 0
+        test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
+        jnz         TrapThread
+
+        ret
+
+CheckBadTransition:
+        ;; Allow 'bad transitions' in when the TSF_DoNotTriggerGc mode is set.  This allows us to have 
+        ;; [UnmanagedCallersOnly] methods that are called via the "restricted GC callouts" as well as from native,
+        ;; which is necessary because the methods are CCW vtable methods on interfaces passed to native.
+        test        dword ptr [r10 + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc
+        jz          BadTransition
+
+        ;; RhpTrapThreads will always be set in this case, so we must skip that check.  We must be sure to 
+        ;; zero-out our 'previous transition frame' state first, however.
+        mov         qword ptr [rax], 0
+        ret
+
+TrapThread:
+        ;; put the previous frame back (sets us back to preemptive mode)
+        mov         qword ptr [r10 + OFFSETOF__Thread__m_pTransitionFrame], r11
+
+AttachThread:
+        ; passing address of reverse pinvoke frame in rax
+        jmp         RhpReversePInvokeAttachOrTrapThread
+
+BadTransition:
+        mov         rcx, qword ptr [rsp]    ; arg <- return address
+        jmp         RhpReversePInvokeBadTransition
+
+LEAF_END RhpReversePInvoke, _TEXT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpReversePInvokeAttachOrTrapThread
+;;
+;;
+;; INCOMING:  RAX -- address of reverse pinvoke frame
+;;
+;; PRESERVES: RCX, RDX, R8, R9 -- need to preserve these because the caller assumes they aren't trashed
+;;
+;; TRASHES:   RAX, R10, R11
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread, _TEXT
+        alloc_stack     88h     ; alloc scratch area and frame
+
+        ; save the integer arg regs
+        save_reg_postrsp        rcx, (20h + 0*8)
+        save_reg_postrsp        rdx, (20h + 1*8)
+        save_reg_postrsp        r8,  (20h + 2*8)
+        save_reg_postrsp        r9,  (20h + 3*8)
+
+        ; save the FP arg regs
+        save_xmm128_postrsp     xmm0, (20h + 4*8 + 0*10h)
+        save_xmm128_postrsp     xmm1, (20h + 4*8 + 1*10h)
+        save_xmm128_postrsp     xmm2, (20h + 4*8 + 2*10h)
+        save_xmm128_postrsp     xmm3, (20h + 4*8 + 3*10h)
+
+        END_PROLOGUE
+
+        mov         rcx, rax        ; rcx <- reverse pinvoke frame
+        call        RhpReversePInvokeAttachOrTrapThread2
+
+        movdqa      xmm0, [rsp + (20h + 4*8 + 0*10h)]
+        movdqa      xmm1, [rsp + (20h + 4*8 + 1*10h)]
+        movdqa      xmm2, [rsp + (20h + 4*8 + 2*10h)]
+        movdqa      xmm3, [rsp + (20h + 4*8 + 3*10h)]
+
+        mov         rcx, [rsp + (20h + 0*8)]
+        mov         rdx, [rsp + (20h + 1*8)]
+        mov         r8,  [rsp + (20h + 2*8)]
+        mov         r9,  [rsp + (20h + 3*8)]
+
+        ;; epilog
+        add         rsp, 88h
+        ret
+
+NESTED_END RhpReversePInvokeAttachOrTrapThread, _TEXT
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpReversePInvokeReturn
+;;
+;; IN:  RCX: address of reverse pinvoke frame 
+;;
+;; TRASHES:  RCX, RDX, R10, R11
+;;
+;; PRESERVES: RAX (return value)
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+LEAF_ENTRY RhpReversePInvokeReturn, _TEXT
+        mov         rdx, [rcx + 8]  ; get Thread pointer
+        mov         rcx, [rcx + 0]  ; get previous M->U transition frame
+
+        mov         [rdx + OFFSETOF__Thread__m_pTransitionFrame], rcx
+        cmp         [RhpTrapThreads], TrapThreadsFlags_None
+        jne         RhpWaitForSuspend
+        ret
+LEAF_END RhpReversePInvokeReturn, _TEXT
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpPInvoke
+;;
+;; IN:  RCX: address of pinvoke frame
+;;
+;; TRASHES: R10, R11
+;;
+;; This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite.
+;; The codegenerator must treat the callsite of this helper as GC triggering and generate the GC info for it.
+;; Also, the codegenerator must ensure that there are no live GC references in callee saved registers.
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+LEAF_ENTRY RhpPInvoke, _TEXT
+        ;; R10 = GetThread(), TRASHES R11
+        INLINE_GETTHREAD r10, r11
+
+        mov         r11, [rsp]                  ; r11 <- return address
+        mov         qword ptr [rcx + OFFSETOF__PInvokeTransitionFrame__m_pThread], r10
+        mov         qword ptr [rcx + OFFSETOF__PInvokeTransitionFrame__m_FramePointer], rbp
+        mov         qword ptr [rcx + OFFSETOF__PInvokeTransitionFrame__m_RIP], r11
+
+        lea         r11, [rsp + 8]              ; r11 <- caller SP
+        mov         dword ptr [rcx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_SAVE_RSP
+        mov         qword ptr [rcx + OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs], r11
+
+        mov         qword ptr [r10 + OFFSETOF__Thread__m_pTransitionFrame], rcx
+
+        cmp         [RhpTrapThreads], TrapThreadsFlags_None
+        jne         @F                  ; forward branch - predicted not taken
+        ret
+@@:
+        jmp         RhpWaitForSuspend
+LEAF_END RhpPInvoke, _TEXT
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpPInvokeReturn
+;;
+;; IN:  RCX: address of pinvoke frame
+;;
+;; TRASHES: RCX, RDX, R8, R9, R10, R11
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+LEAF_ENTRY RhpPInvokeReturn, _TEXT
+        mov         rdx, [rcx + OFFSETOF__PInvokeTransitionFrame__m_pThread]
+        mov         qword ptr [rdx + OFFSETOF__Thread__m_pTransitionFrame], 0
+        cmp         [RhpTrapThreads], TrapThreadsFlags_None
+        jne         @F                  ; forward branch - predicted not taken
+        ret
+@@:
+        ; passing transition frame pointer in rcx
+        jmp         RhpWaitForGC
+LEAF_END RhpPInvokeReturn, _TEXT
+
+
+END
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/StubDispatch.S b/src/coreclr/src/nativeaot/Runtime/amd64/StubDispatch.S
new file mode 100644
index 0000000000000..8fa74c29616dd
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/StubDispatch.S
@@ -0,0 +1,82 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+// trick to avoid PLT relocation at runtime which corrupts registers
+#define REL_C_FUNC(name) C_FUNC(name)@gotpcrel
+
+
+// Macro that generates a stub consuming a cache with the given number of entries.
+.macro DEFINE_INTERFACE_DISPATCH_STUB entries
+
+LEAF_ENTRY RhpInterfaceDispatch\entries, _TEXT
+
+        // r10 currently contains the indirection cell address.
+        // load r11 to point to the cache block.
+        mov     r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        // Load the EEType from the object instance in rdi.
+        mov     rax, [rdi]
+
+        CurrentOffset = OFFSETOF__InterfaceDispatchCache__m_rgEntries
+
+        // For each entry in the cache, see if its EEType type matches the EEType in rax.
+        // If so, call the second cache entry.  If not, skip the InterfaceDispatchCacheEntry.
+        .rept \entries
+            cmp     rax, [r11 + CurrentOffset]
+            jne     0f
+            jmp     [r11 + CurrentOffset + 8]
+        0:
+            CurrentOffset = CurrentOffset + 16
+        .endr
+
+        // r10 still contains the the indirection cell address.
+
+        jmp     C_FUNC(RhpInterfaceDispatchSlow)
+LEAF_END RhpInterfaceDispatch\entries, _TEXT
+
+.endm // DEFINE_INTERFACE_DISPATCH_STUB
+
+
+
+// Define all the stub routines we currently need.
+//
+// The mrt100dbi requires these be exported to identify mrt100 code that dispatches back into managed.
+// If you change or add any new dispatch stubs, please also change slr.def and dbi\process.cpp CordbProcess::GetExportStepInfo
+//
+DEFINE_INTERFACE_DISPATCH_STUB 1
+DEFINE_INTERFACE_DISPATCH_STUB 2
+DEFINE_INTERFACE_DISPATCH_STUB 4
+DEFINE_INTERFACE_DISPATCH_STUB 8
+DEFINE_INTERFACE_DISPATCH_STUB 16
+DEFINE_INTERFACE_DISPATCH_STUB 32
+DEFINE_INTERFACE_DISPATCH_STUB 64
+
+// Stub dispatch routine for dispatch to a vtable slot
+LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT
+        // UNIXTODO: Implement this function
+        int 3
+LEAF_END RhpVTableOffsetDispatch, _TEXT
+
+// Initial dispatch on an interface when we don't have a cache yet.
+LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT
+ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
+
+        // Just tail call to the cache miss helper.
+        jmp     C_FUNC(RhpInterfaceDispatchSlow)
+
+LEAF_END RhpInitialInterfaceDispatch, _TEXT
+
+// Cache miss case, call the runtime to resolve the target and update the cache.
+// Use universal transition helper to allow an exception to flow out of resolution
+LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT
+        // r10 contains indirection cell address, move to r11 where it will be passed by
+        // the universal transition thunk as an argument to RhpCidResolve
+        mov r11, r10
+        mov r10, [rip + REL_C_FUNC(RhpCidResolve)]
+        jmp qword ptr [rip + REL_C_FUNC(RhpUniversalTransition_DebugStepTailCall)]
+
+LEAF_END RhpInterfaceDispatchSlow, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/StubDispatch.asm b/src/coreclr/src/nativeaot/Runtime/amd64/StubDispatch.asm
new file mode 100644
index 0000000000000..11912a958716f
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/StubDispatch.asm
@@ -0,0 +1,108 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmMacros.inc
+
+
+ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+
+EXTERN RhpCidResolve : PROC
+EXTERN RhpUniversalTransition_DebugStepTailCall : PROC
+
+;; Macro that generates code to check a single cache entry.
+CHECK_CACHE_ENTRY macro entry
+NextLabel textequ @CatStr( Attempt, %entry+1 )
+        cmp     rax, [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16)]
+        jne     NextLabel
+        jmp     qword ptr [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16) + 8]
+NextLabel:
+endm
+
+
+;; Macro that generates a stub consuming a cache with the given number of entries.
+DEFINE_INTERFACE_DISPATCH_STUB macro entries
+
+StubName textequ @CatStr( RhpInterfaceDispatch, entries )
+
+LEAF_ENTRY StubName, _TEXT
+
+;EXTERN CID_g_cInterfaceDispatches : DWORD
+        ;inc     [CID_g_cInterfaceDispatches]
+
+        ;; r10 currently contains the indirection cell address. 
+        ;; load r11 to point to the cache block.
+        mov     r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        ;; Load the EEType from the object instance in rcx.
+        mov     rax, [rcx]
+
+CurrentEntry = 0
+    while CurrentEntry lt entries
+        CHECK_CACHE_ENTRY %CurrentEntry
+CurrentEntry = CurrentEntry + 1
+    endm
+
+        ;; r10 still contains the the indirection cell address.
+
+        jmp RhpInterfaceDispatchSlow
+
+LEAF_END StubName, _TEXT
+
+    endm ;; DEFINE_INTERFACE_DISPATCH_STUB
+
+
+;; Define all the stub routines we currently need.
+;;
+;; The mrt100dbi requires these be exported to identify mrt100 code that dispatches back into managed.
+;; If you change or add any new dispatch stubs, please also change slr.def and dbi\process.cpp CordbProcess::GetExportStepInfo
+;;
+DEFINE_INTERFACE_DISPATCH_STUB 1
+DEFINE_INTERFACE_DISPATCH_STUB 2
+DEFINE_INTERFACE_DISPATCH_STUB 4
+DEFINE_INTERFACE_DISPATCH_STUB 8
+DEFINE_INTERFACE_DISPATCH_STUB 16
+DEFINE_INTERFACE_DISPATCH_STUB 32
+DEFINE_INTERFACE_DISPATCH_STUB 64
+
+;; Stub dispatch routine for dispatch to a vtable slot
+LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT
+        ;; r10 currently contains the indirection cell address. 
+        ;; load rax to point to the vtable offset (which is stored in the m_pCache field).
+        mov     rax, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        ;; Load the EEType from the object instance in rcx, and add it to the vtable offset
+        ;; to get the address in the vtable of what we want to dereference
+        add     rax, [rcx]
+
+        ;; Load the target address of the vtable into rax
+        mov     rax, [rax]
+
+        TAILJMP_RAX
+LEAF_END RhpVTableOffsetDispatch, _TEXT
+
+
+;; Initial dispatch on an interface when we don't have a cache yet.
+LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT
+ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
+
+        ;; Just tail call to the cache miss helper.
+        jmp RhpInterfaceDispatchSlow
+
+LEAF_END RhpInitialInterfaceDispatch, _TEXT
+
+;; Cache miss case, call the runtime to resolve the target and update the cache.
+;; Use universal transition helper to allow an exception to flow out of resolution
+LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT
+        ;; r10 contains indirection cell address, move to r11 where it will be passed by
+        ;; the universal transition thunk as an argument to RhpCidResolve
+        mov r11, r10
+        lea r10, RhpCidResolve
+        jmp RhpUniversalTransition_DebugStepTailCall
+
+LEAF_END RhpInterfaceDispatchSlow, _TEXT
+
+
+endif ;; FEATURE_CACHED_INTERFACE_DISPATCH
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/ThunkPoolThunks.asm b/src/coreclr/src/nativeaot/Runtime/amd64/ThunkPoolThunks.asm
new file mode 100644
index 0000000000000..e2700d7fda390
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/ThunkPoolThunks.asm
@@ -0,0 +1,291 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+;; -----------------------------------------------------------------------------------------------------------
+;;#include "asmmacros.inc"
+;; -----------------------------------------------------------------------------------------------------------
+
+LEAF_ENTRY macro Name, Section
+    Section segment para 'CODE'
+    align   16
+    public  Name
+    Name    proc
+endm
+
+NAMED_LEAF_ENTRY macro Name, Section, SectionAlias
+    Section segment para alias(SectionAlias) 'CODE'
+    align   16
+    public  Name
+    Name    proc
+endm
+
+LEAF_END macro Name, Section
+    Name    endp
+    Section ends
+endm
+
+NAMED_READONLY_DATA_SECTION macro Section, SectionAlias
+    Section segment alias(SectionAlias) read 'DATA'
+    align   16
+    DQ 0
+    Section ends
+endm
+
+NAMED_READWRITE_DATA_SECTION macro Section, SectionAlias
+    Section segment alias(SectionAlias) read write 'DATA'
+    align   16
+    DQ 0
+    Section ends
+endm
+
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  STUBS & DATA SECTIONS  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+THUNK_CODESIZE                      equ 10h     ;; 7-byte lea, 6-byte jmp, 3 bytes of nops
+THUNK_DATASIZE                      equ 010h    ;; 2 qwords
+
+THUNK_POOL_NUM_THUNKS_PER_PAGE      equ 0FAh    ;; 250 thunks per page
+
+PAGE_SIZE                           equ 01000h  ;; 4K
+POINTER_SIZE                        equ 08h
+
+
+LOAD_DATA_ADDRESS macro groupIndex, index, thunkPool
+        ALIGN   10h                             ;; make sure we align to 16-byte boundary for CFG table
+
+        ;; set r10 to begining of data page : r10 <- [thunkPool] + PAGE_SIZE
+        ;; fix offset of the data           : r10 <- r10 + (THUNK_DATASIZE * current thunk's index)
+        lea     r10, [thunkPool + PAGE_SIZE + (groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * index)]
+endm
+
+JUMP_TO_COMMON macro groupIndex, index, thunkPool
+        ;; jump to the location pointed at by the last qword in the data page
+        jmp     qword ptr[thunkPool + PAGE_SIZE + PAGE_SIZE - POINTER_SIZE]
+endm
+
+TenThunks macro groupIndex, thunkPool
+        ;; Each thunk will load the address of its corresponding data (from the page that immediately follows)
+        ;; and call a common stub. The address of the common stub is setup by the caller (first qword
+        ;; in the thunks data section, hence the +8's below) depending on the 'kind' of thunks needed (interop, 
+        ;; fat function pointers, etc...)
+        
+        ;; Each data block used by a thunk consists of two qword values:
+        ;;      - Context: some value given to the thunk as context (passed in r10). Example for fat-fptrs: context = generic dictionary
+        ;;      - Target : target code that the thunk eventually jumps to.     
+
+        LOAD_DATA_ADDRESS groupIndex,0,thunkPool
+        JUMP_TO_COMMON    groupIndex,0,thunkPool
+
+        LOAD_DATA_ADDRESS groupIndex,1,thunkPool
+        JUMP_TO_COMMON    groupIndex,1,thunkPool
+
+        LOAD_DATA_ADDRESS groupIndex,2,thunkPool
+        JUMP_TO_COMMON    groupIndex,2,thunkPool
+
+        LOAD_DATA_ADDRESS groupIndex,3,thunkPool
+        JUMP_TO_COMMON    groupIndex,3,thunkPool
+
+        LOAD_DATA_ADDRESS groupIndex,4,thunkPool
+        JUMP_TO_COMMON    groupIndex,4,thunkPool
+
+        LOAD_DATA_ADDRESS groupIndex,5,thunkPool
+        JUMP_TO_COMMON    groupIndex,5,thunkPool
+
+        LOAD_DATA_ADDRESS groupIndex,6,thunkPool
+        JUMP_TO_COMMON    groupIndex,6,thunkPool
+
+        LOAD_DATA_ADDRESS groupIndex,7,thunkPool
+        JUMP_TO_COMMON    groupIndex,7,thunkPool
+
+        LOAD_DATA_ADDRESS groupIndex,8,thunkPool
+        JUMP_TO_COMMON    groupIndex,8,thunkPool
+
+        LOAD_DATA_ADDRESS groupIndex,9,thunkPool
+        JUMP_TO_COMMON    groupIndex,9,thunkPool
+endm
+
+THUNKS_PAGE_BLOCK macro thunkPool
+        TenThunks 0,thunkPool
+        TenThunks 1,thunkPool
+        TenThunks 2,thunkPool
+        TenThunks 3,thunkPool
+        TenThunks 4,thunkPool
+        TenThunks 5,thunkPool
+        TenThunks 6,thunkPool
+        TenThunks 7,thunkPool
+        TenThunks 8,thunkPool
+        TenThunks 9,thunkPool
+        TenThunks 10,thunkPool
+        TenThunks 11,thunkPool
+        TenThunks 12,thunkPool
+        TenThunks 13,thunkPool
+        TenThunks 14,thunkPool
+        TenThunks 15,thunkPool
+        TenThunks 16,thunkPool
+        TenThunks 17,thunkPool
+        TenThunks 18,thunkPool
+        TenThunks 19,thunkPool
+        TenThunks 20,thunkPool
+        TenThunks 21,thunkPool
+        TenThunks 22,thunkPool
+        TenThunks 23,thunkPool
+        TenThunks 24,thunkPool
+endm
+
+;;
+;; The first thunks section should be 64K aligned because it can get
+;; mapped multiple  times in memory, and mapping works on allocation
+;; granularity boundaries (we don't want to map more than what we need)
+;;
+;; The easiest way to do so is by having the thunks section at the 
+;; first 64K aligned virtual address in the binary. We provide a section
+;; layout file to the linker to tell it how to layout the thunks sections
+;; that we care about. (ndp\rh\src\runtime\DLLs\app\mrt100_app_sectionlayout.txt)
+;;
+;; The PE spec says images cannot have gaps between sections (other 
+;; than what is required by the section alignment value in the header),
+;; therefore we need a couple of padding data sections (otherwise the
+;; OS will not load the image).
+;;
+
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment0, ".pad0"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment1, ".pad1"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment2, ".pad2"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment3, ".pad3"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment4, ".pad4"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment5, ".pad5"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment6, ".pad6"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment7, ".pad7"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment8, ".pad8"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment9, ".pad9"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment10, ".pad10"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment11, ".pad11"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment12, ".pad12"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment13, ".pad13"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment14, ".pad14"
+
+;;
+;; Thunk Stubs
+;; NOTE: Keep number of blocks in sync with macro/constant named 'NUM_THUNK_BLOCKS' in:
+;;      - ndp\FxCore\src\System.Private.CoreLib\System\Runtime\InteropServices\ThunkPool.cs
+;;      - ndp\rh\src\tools\rhbind\zapimage.h
+;;
+NAMED_LEAF_ENTRY ThunkPool, TKS0, ".tks0"
+    THUNKS_PAGE_BLOCK ThunkPool
+LEAF_END ThunkPool, TKS0
+
+NAMED_READWRITE_DATA_SECTION ThunkData0, ".tkd0"
+
+NAMED_LEAF_ENTRY ThunkPool1, TKS1, ".tks1"
+    THUNKS_PAGE_BLOCK ThunkPool1
+LEAF_END ThunkPool1, TKS1
+
+NAMED_READWRITE_DATA_SECTION ThunkData1, ".tkd1"
+
+NAMED_LEAF_ENTRY ThunkPool2, TKS2, ".tks2"
+    THUNKS_PAGE_BLOCK ThunkPool2
+LEAF_END ThunkPool2, TKS2
+
+NAMED_READWRITE_DATA_SECTION ThunkData2, ".tkd2"
+
+NAMED_LEAF_ENTRY ThunkPool3, TKS3, ".tks3"
+    THUNKS_PAGE_BLOCK ThunkPool3
+LEAF_END ThunkPool3, TKS3
+
+NAMED_READWRITE_DATA_SECTION ThunkData3, ".tkd3"
+
+NAMED_LEAF_ENTRY ThunkPool4, TKS4, ".tks4"
+    THUNKS_PAGE_BLOCK ThunkPool4
+LEAF_END ThunkPool4, TKS4
+
+NAMED_READWRITE_DATA_SECTION ThunkData4, ".tkd4"
+
+NAMED_LEAF_ENTRY ThunkPool5, TKS5, ".tks5"
+    THUNKS_PAGE_BLOCK ThunkPool5
+LEAF_END ThunkPool5, TKS5
+
+NAMED_READWRITE_DATA_SECTION ThunkData5, ".tkd5"
+
+NAMED_LEAF_ENTRY ThunkPool6, TKS6, ".tks6"
+    THUNKS_PAGE_BLOCK ThunkPool6
+LEAF_END ThunkPool6, TKS6
+
+NAMED_READWRITE_DATA_SECTION ThunkData6, ".tkd6"
+
+NAMED_LEAF_ENTRY ThunkPool7, TKS7, ".tks7"
+    THUNKS_PAGE_BLOCK ThunkPool7
+LEAF_END ThunkPool7, TKS7
+
+NAMED_READWRITE_DATA_SECTION ThunkData7, ".tkd7"
+
+;;
+;; IntPtr RhpGetThunksBase()
+;;
+LEAF_ENTRY RhpGetThunksBase, _TEXT
+        ;; Return the address of the first thunk pool to the caller (this is really the base address)
+        lea     rax, [ThunkPool]
+        ret
+LEAF_END RhpGetThunksBase, _TEXT
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; General Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;
+;; int RhpGetNumThunksPerBlock()
+;;
+LEAF_ENTRY RhpGetNumThunksPerBlock, _TEXT
+        mov     rax, THUNK_POOL_NUM_THUNKS_PER_PAGE
+        ret   
+LEAF_END RhpGetNumThunksPerBlock, _TEXT
+
+;;
+;; int RhpGetThunkSize()
+;;
+LEAF_ENTRY RhpGetThunkSize, _TEXT
+        mov     rax, THUNK_CODESIZE
+        ret   
+LEAF_END RhpGetThunkSize, _TEXT
+
+;;
+;; int RhpGetNumThunkBlocksPerMapping()
+;;
+LEAF_ENTRY RhpGetNumThunkBlocksPerMapping, _TEXT
+        mov     rax, 8
+        ret   
+LEAF_END RhpGetNumThunkBlocksPerMapping, _TEXT
+
+;;
+;; int RhpGetThunkBlockSize
+;;
+LEAF_ENTRY RhpGetThunkBlockSize, _TEXT
+        mov     rax, PAGE_SIZE * 2
+        ret   
+LEAF_END RhpGetThunkBlockSize, _TEXT
+
+;; 
+;; IntPtr RhpGetThunkDataBlockAddress(IntPtr thunkStubAddress)
+;; 
+LEAF_ENTRY RhpGetThunkDataBlockAddress, _TEXT
+        mov     rax, rcx
+        mov     rcx, PAGE_SIZE - 1
+        not     rcx
+        and     rax, rcx
+        add     rax, PAGE_SIZE
+        ret   
+LEAF_END RhpGetThunkDataBlockAddress, _TEXT
+
+;; 
+;; IntPtr RhpGetThunkStubsBlockAddress(IntPtr thunkDataAddress)
+;; 
+LEAF_ENTRY RhpGetThunkStubsBlockAddress, _TEXT
+        mov     rax, rcx
+        mov     rcx, PAGE_SIZE - 1
+        not     rcx
+        and     rax, rcx
+        sub     rax, PAGE_SIZE
+        ret   
+LEAF_END RhpGetThunkStubsBlockAddress, _TEXT
+
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/UniversalTransition.S b/src/coreclr/src/nativeaot/Runtime/amd64/UniversalTransition.S
new file mode 100644
index 0000000000000..9ad56f8965438
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/UniversalTransition.S
@@ -0,0 +1,162 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include <unixasmmacros.inc>
+
+#ifdef FEATURE_DYNAMIC_CODE
+
+#ifdef _DEBUG
+#define TRASH_SAVED_ARGUMENT_REGISTERS 1
+#endif
+
+#define SIZEOF_RETADDR              8
+
+#define SIZEOF_RETURN_BLOCK         0x10    // for 16 bytes of conservatively reported space that the callee can
+                                            // use to manage the return value that the call eventually generates
+
+#define SIZEOF_ARGUMENT_REGISTERS   0x30    // Callee register spill
+
+//
+// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions:
+//
+//      SIZEOF_RETADDR
+//      SIZEOF_ARGUMENT_REGISTERS
+//      SIZEOF_RETURN_BLOCK
+//      SIZEOF_FP_REGS
+//
+
+#define DISTANCE_FROM_CHILDSP_TO_FP_REGS            0
+
+#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK       SIZEOF_FP_REGS
+
+#define DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS (SIZEOF_FP_REGS + SIZEOF_RETURN_BLOCK)
+
+#define DISTANCE_FROM_CHILDSP_TO_RETADDR            (SIZEOF_FP_REGS + SIZEOF_RETURN_BLOCK + SIZEOF_ARGUMENT_REGISTERS + 8)
+
+//
+// Defines an assembly thunk used to make a transition from managed code to a callee,
+// then (based on the return value from the callee), either returning or jumping to
+// a new location while preserving the input arguments.  The usage of this thunk also
+// ensures arguments passed are properly reported.
+//
+// TODO: This code currently only tailcalls, and does not return.
+//
+// Inputs:
+//      rdi, esi, rcx, rdx, r8, r9, stack space: arguments as normal
+//      r10: The location of the target code the UniversalTransition thunk will call
+//      r11: The only parameter to the target function (passed in rdx to callee)
+//
+
+//
+// Frame layout is:
+//
+//  {StackPassedArgs}                           ChildSP+0D0     CallerSP+000
+//  {CallerRetaddr}                             ChildSP+0C8     CallerSP-008
+//  {AlignmentPad (0x8 bytes)}                  ChildSP+0C0     CallerSP-010
+//  {IntArgRegs (0x30 bytes)}                   ChildSP+090     CallerSP-040
+//  {ReturnBlock (0x10 bytes)}                  ChildSP+080     CallerSP-050
+//  {FpArgRegs (xmm0-xmm7) (0x80 bytes)}        ChildSP+000     CallerSP-0D0
+//  {CalleeRetaddr}                             ChildSP-008     CallerSP-0D8
+//
+// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure
+// must be updated as well.
+//
+// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has
+// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed
+// FpArgRegs.
+//
+// NOTE: The stack walker guarantees that conservative GC reporting will be applied to
+// everything between the base of the ReturnBlock and the top of the StackPassedArgs.
+//
+
+.macro UNIVERSAL_TRANSITION FunctionName
+
+NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler
+
+        alloc_stack DISTANCE_FROM_CHILDSP_TO_RETADDR
+
+        // save integer argument registers
+        mov             [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x00], rdi
+        mov             [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x08], rsi
+        mov             [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x10], rcx
+        mov             [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x18], rdx
+        mov             [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x20], r8
+        mov             [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x28], r9
+
+        // save fp argument registers
+        movdqa          [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x00], xmm0
+        movdqa          [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x10], xmm1
+        movdqa          [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x20], xmm2
+        movdqa          [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x30], xmm3
+        movdqa          [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x40], xmm4
+        movdqa          [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x50], xmm5
+        movdqa          [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x60], xmm6
+        movdqa          [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x70], xmm7
+
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+
+        // Before calling out, trash all of the argument registers except the ones (rdi, rsi) that
+        // hold outgoing arguments.  All of these registers have been saved to the transition
+        // frame, and the code at the call target is required to use only the transition frame
+        // copies when dispatching this call to the eventual callee.
+
+        movsd           xmm0, [C_VAR(RhpFpTrashValues) + 0x0]
+        movsd           xmm1, [C_VAR(RhpFpTrashValues) + 0x8]
+        movsd           xmm2, [C_VAR(RhpFpTrashValues) + 0x10]
+        movsd           xmm3, [C_VAR(RhpFpTrashValues) + 0x18]
+        movsd           xmm4, [C_VAR(RhpFpTrashValues) + 0x20]
+        movsd           xmm5, [C_VAR(RhpFpTrashValues) + 0x28]
+        movsd           xmm6, [C_VAR(RhpFpTrashValues) + 0x30]
+        movsd           xmm7, [C_VAR(RhpFpTrashValues) + 0x38]
+
+        mov             rcx, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x10]
+        mov             rdx, qword ptr [C_VAR(RhpIntegerTrashValues) + 0x18]
+        mov             r8,  qword ptr [C_VAR(RhpIntegerTrashValues) + 0x20]
+        mov             r9,  qword ptr [C_VAR(RhpIntegerTrashValues) + 0x28]
+
+#endif // TRASH_SAVED_ARGUMENT_REGISTERS
+
+        //
+        // Call out to the target, while storing and reporting arguments to the GC.
+        //
+        mov  rsi, r11
+        lea  rdi, [rsp + DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK]
+        call r10
+
+        EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom\FunctionName
+
+        // restore fp argument registers
+        movdqa          xmm0, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x00]
+        movdqa          xmm1, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x10]
+        movdqa          xmm2, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x20]
+        movdqa          xmm3, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x30]
+        movdqa          xmm4, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x40]
+        movdqa          xmm5, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x50]
+        movdqa          xmm6, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x60]
+        movdqa          xmm7, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 0x70]
+
+        // restore integer argument registers
+        mov             rdi, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x00]
+        mov             rsi, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x08]
+        mov             rcx, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x10]
+        mov             rdx, [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x18]
+        mov             r8,  [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x20]
+        mov             r9,  [rsp + DISTANCE_FROM_CHILDSP_TO_ARGUMENT_REGISTERS + 0x28]
+
+        // Pop the space that was allocated between the ChildSP and the caller return address.
+        free_stack      DISTANCE_FROM_CHILDSP_TO_RETADDR
+
+        jmp             rax
+
+NESTED_END Rhp\FunctionName, _TEXT
+
+.endm // UNIVERSAL_TRANSITION
+
+        // To enable proper step-in behavior in the debugger, we need to have two instances
+        // of the thunk. For the first one, the debugger steps into the call in the function, 
+        // for the other, it steps over it.
+        UNIVERSAL_TRANSITION UniversalTransition
+        UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall
+
+#endif // FEATURE_DYNAMIC_CODE
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/UniversalTransition.asm b/src/coreclr/src/nativeaot/Runtime/amd64/UniversalTransition.asm
new file mode 100644
index 0000000000000..398c8bd487028
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/UniversalTransition.asm
@@ -0,0 +1,167 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmMacros.inc
+
+ifdef FEATURE_DYNAMIC_CODE
+
+ifdef _DEBUG
+TRASH_SAVED_ARGUMENT_REGISTERS equ 1
+else
+TRASH_SAVED_ARGUMENT_REGISTERS equ 0
+endif
+
+if TRASH_SAVED_ARGUMENT_REGISTERS ne 0
+EXTERN RhpIntegerTrashValues    : QWORD
+EXTERN RhpFpTrashValues         : QWORD
+endif ;; TRASH_SAVED_ARGUMENT_REGISTERS
+
+SIZEOF_RETADDR                  equ 8h
+
+SIZEOF_ALIGNMENT_PADDING        equ 8h
+
+SIZEOF_RETURN_BLOCK             equ 10h    ; for 16 bytes of conservatively reported space that the callee can
+                                           ; use to manage the return value that the call eventually generates
+
+SIZEOF_FP_REGS                  equ 40h    ; xmm0-3
+
+SIZEOF_OUT_REG_HOMES            equ 20h    ; Callee register spill
+
+;
+; From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions:
+;
+;       SIZEOF_RETADDR
+;       SIZEOF_ALIGNMENT_PADDING
+;       SIZEOF_RETURN_BLOCK
+;       SIZEOF_FP_REGS
+;       SIZEOF_OUT_REG_HOMES
+; 
+
+DISTANCE_FROM_CHILDSP_TO_FP_REGS                equ SIZEOF_OUT_REG_HOMES
+
+DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK           equ DISTANCE_FROM_CHILDSP_TO_FP_REGS + SIZEOF_FP_REGS
+
+DISTANCE_FROM_CHILDSP_TO_RETADDR                equ DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK + SIZEOF_RETURN_BLOCK + SIZEOF_ALIGNMENT_PADDING
+
+DISTANCE_FROM_CHILDSP_TO_CALLERSP               equ DISTANCE_FROM_CHILDSP_TO_RETADDR + SIZEOF_RETADDR
+
+.errnz DISTANCE_FROM_CHILDSP_TO_CALLERSP mod 16
+
+;;
+;; Defines an assembly thunk used to make a transition from managed code to a callee,
+;; then (based on the return value from the callee), either returning or jumping to
+;; a new location while preserving the input arguments.  The usage of this thunk also
+;; ensures arguments passed are properly reported.
+;;
+;; TODO: This code currently only tailcalls, and does not return.
+;;
+;; Inputs:
+;;      rcx, rdx, r8, r9, stack space: arguments as normal
+;;      r10: The location of the target code the UniversalTransition thunk will call
+;;      r11: The only parameter to the target function (passed in rdx to callee)
+;;
+
+;
+; Frame layout is:
+;
+;   {StackPassedArgs}                           ChildSP+0a0     CallerSP+020
+;   {IntArgRegs (rcx,rdx,r8,r9) (0x20 bytes)}   ChildSP+080     CallerSP+000
+;   {CallerRetaddr}                             ChildSP+078     CallerSP-008
+;   {AlignmentPad (0x8 bytes)}                  ChildSP+070     CallerSP-010
+;   {ReturnBlock (0x10 bytes)}                  ChildSP+060     CallerSP-020
+;   {FpArgRegs (xmm0-xmm3) (0x40 bytes)}        ChildSP+020     CallerSP-060
+;   {CalleeArgumentHomes (0x20 bytes)}          ChildSP+000     CallerSP-080
+;   {CalleeRetaddr}                             ChildSP-008     CallerSP-088
+;
+; NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure
+; must be updated as well.
+;
+; NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has
+; knowledge of the exact layout of all pieces of the frame that lie at or above the pushed
+; FpArgRegs.
+;
+; NOTE: The stack walker guarantees that conservative GC reporting will be applied to
+; everything between the base of the ReturnBlock and the top of the StackPassedArgs.
+;
+
+UNIVERSAL_TRANSITION macro FunctionName
+
+NESTED_ENTRY Rhp&FunctionName, _TEXT
+
+        alloc_stack DISTANCE_FROM_CHILDSP_TO_RETADDR
+
+        save_reg_postrsp    rcx,   0h + DISTANCE_FROM_CHILDSP_TO_CALLERSP
+        save_reg_postrsp    rdx,   8h + DISTANCE_FROM_CHILDSP_TO_CALLERSP
+        save_reg_postrsp    r8,   10h + DISTANCE_FROM_CHILDSP_TO_CALLERSP
+        save_reg_postrsp    r9,   18h + DISTANCE_FROM_CHILDSP_TO_CALLERSP
+
+        save_xmm128_postrsp xmm0, DISTANCE_FROM_CHILDSP_TO_FP_REGS
+        save_xmm128_postrsp xmm1, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 10h
+        save_xmm128_postrsp xmm2, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 20h
+        save_xmm128_postrsp xmm3, DISTANCE_FROM_CHILDSP_TO_FP_REGS + 30h
+
+        END_PROLOGUE
+
+if TRASH_SAVED_ARGUMENT_REGISTERS ne 0
+
+        ; Before calling out, trash all of the argument registers except the ones (rcx, rdx) that
+        ; hold outgoing arguments.  All of these registers have been saved to the transition
+        ; frame, and the code at the call target is required to use only the transition frame
+        ; copies when dispatching this call to the eventual callee.
+
+        movsd           xmm0, mmword ptr [RhpFpTrashValues + 0h]
+        movsd           xmm1, mmword ptr [RhpFpTrashValues + 8h]
+        movsd           xmm2, mmword ptr [RhpFpTrashValues + 10h]
+        movsd           xmm3, mmword ptr [RhpFpTrashValues + 18h]
+
+        mov             r8, qword ptr [RhpIntegerTrashValues + 10h]
+        mov             r9, qword ptr [RhpIntegerTrashValues + 18h]
+
+endif ; TRASH_SAVED_ARGUMENT_REGISTERS
+
+        ;
+        ; Call out to the target, while storing and reporting arguments to the GC.
+        ;
+        mov  rdx, r11
+        lea  rcx, [rsp + DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK]
+        call r10
+
+        EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom&FunctionName
+
+        ; We cannot make the label public as that tricks DIA stackwalker into thinking
+        ; it's the beginning of a method. For this reason we export the address
+        ; by means of an auxiliary variable.
+
+        ; restore fp argument registers
+        movdqa          xmm0, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS      ]
+        movdqa          xmm1, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 10h]
+        movdqa          xmm2, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 20h]
+        movdqa          xmm3, [rsp + DISTANCE_FROM_CHILDSP_TO_FP_REGS + 30h]
+
+        ; restore integer argument registers
+        mov             rcx, [rsp +  0h + DISTANCE_FROM_CHILDSP_TO_CALLERSP]
+        mov             rdx, [rsp +  8h + DISTANCE_FROM_CHILDSP_TO_CALLERSP]
+        mov             r8,  [rsp + 10h + DISTANCE_FROM_CHILDSP_TO_CALLERSP]
+        mov             r9,  [rsp + 18h + DISTANCE_FROM_CHILDSP_TO_CALLERSP]
+
+        ; epilog
+        nop
+
+        ; Pop the space that was allocated between the ChildSP and the caller return address.
+        add             rsp, DISTANCE_FROM_CHILDSP_TO_RETADDR
+
+        TAILJMP_RAX
+
+NESTED_END Rhp&FunctionName, _TEXT
+
+        endm
+
+        ; To enable proper step-in behavior in the debugger, we need to have two instances
+        ; of the thunk. For the first one, the debugger steps into the call in the function, 
+        ; for the other, it steps over it.
+        UNIVERSAL_TRANSITION UniversalTransition
+        UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall
+
+endif
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/WriteBarriers.S b/src/coreclr/src/nativeaot/Runtime/amd64/WriteBarriers.S
new file mode 100644
index 0000000000000..2927acd0241fc
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/WriteBarriers.S
@@ -0,0 +1,287 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include <unixasmmacros.inc>
+
+#ifdef WRITE_BARRIER_CHECK
+
+.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG
+
+    // If g_GCShadow is 0, don't perform the check.
+    cmp     qword ptr [C_VAR(g_GCShadow)], 0
+    je      LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG)
+
+    // Save DESTREG since we're about to modify it (and we need the original value both within the macro and
+    // once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of
+    // the prolog inside a method without a frame. But given that this is only debug code and generally we
+    // shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier
+    // variants to set up frames. The compiler knows exactly which registers are trashed in the simple write
+    // barrier case, so we don't have any more scratch registers to play with (and doing so would only make
+    // things harder if at a later stage we want to allow multiple barrier versions based on the input
+    // registers).
+    push    \DESTREG
+
+    // Transform DESTREG into the equivalent address in the shadow heap.
+    sub     \DESTREG, [C_VAR(g_lowest_address)]
+    jb      LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG)
+    add     \DESTREG, [C_VAR(g_GCShadow)]
+    cmp     \DESTREG, [C_VAR(g_GCShadowEnd)]
+    ja      LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG)
+
+    // Update the shadow heap.
+    mov     [\DESTREG], \REFREG
+
+    // Now check that the real heap location still contains the value we just wrote into the shadow heap. This
+    // read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to
+    // recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock
+    // prefix).
+    xchg    [rsp], \DESTREG
+    cmp     [\DESTREG], \REFREG
+    jne     LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG)
+
+    // The original DESTREG value is now restored but the stack has a value (the shadow version of the
+    // location) pushed. Need to discard this push before we are done.
+    add     rsp, 8
+    jmp     LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG)
+
+LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG):
+    // Someone went and updated the real heap. We need to invalidate the shadow location since we can't
+    // guarantee whose shadow update won.
+
+    // Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an
+    // additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg
+    // variant that doesn't implicitly specify the lock prefix. Note that INVALIDGCVALUE is a 64-bit
+    // immediate and therefore must be moved into a register before it can be written to the shadow
+    // location.
+    xchg    [rsp], \DESTREG
+    push    \REFREG
+    movabs  \REFREG, INVALIDGCVALUE
+    mov     qword ptr [\DESTREG], \REFREG
+    pop     \REFREG
+
+LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG):
+    // Restore original DESTREG value from the stack.
+    pop     \DESTREG
+
+LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG):
+.endm
+
+#else // WRITE_BARRIER_CHECK
+
+.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG
+.endm
+
+#endif // WRITE_BARRIER_CHECK
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+// name of the register that points to the location to be updated and the name of the register that holds the
+// object reference (this should be in upper case as it's used in the definition of the name of the helper).
+.macro DEFINE_UNCHECKED_WRITE_BARRIER_CORE BASENAME, REFREG
+
+    // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+    // we're in a debug build and write barrier checking has been enabled).
+    UPDATE_GC_SHADOW \BASENAME, \REFREG, rdi
+
+    // If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    // (since the object won't be collected or moved by an ephemeral collection).
+    cmp     \REFREG, [C_VAR(g_ephemeral_low)]
+    jb      LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+    cmp     \REFREG, [C_VAR(g_ephemeral_high)]
+    jae     LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+
+    // We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    // track this write. The location address is translated into an offset in the card table bitmap. We set
+    // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    // the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     rdi, 11
+    add     rdi, [C_VAR(g_card_table)]
+    cmp     byte ptr [rdi], 0x0FF
+    jne     LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG)
+
+LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG):
+    ret
+
+// We get here if it's necessary to update the card table.
+LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG):
+    mov     byte ptr [rdi], 0x0FF
+    ret
+
+.endm
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. One argument is taken, the
+// name of the register that will hold the object reference (this should be in upper case as it's used in the
+// definition of the name of the helper).
+.macro DEFINE_UNCHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME
+
+// Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard
+// decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that
+// location is in one of the other general registers determined by the value of REFREG.
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpAssignRef\EXPORT_REG_NAME, _TEXT
+
+    // Export the canonical write barrier under unqualified name as well
+    .ifc \REFREG, RSI
+    ALTERNATE_ENTRY RhpAssignRef
+    ALTERNATE_ENTRY RhpAssignRefAVLocation
+    .endif
+
+    // Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    // and the card table update we may perform below.
+    mov     qword ptr [rdi], \REFREG
+
+    DEFINE_UNCHECKED_WRITE_BARRIER_CORE RhpAssignRef, \REFREG
+
+LEAF_END RhpAssignRef\EXPORT_REG_NAME, _TEXT
+.endm
+
+// One day we might have write barriers for all the possible argument registers but for now we have
+// just one write barrier that assumes the input register is RSI.
+DEFINE_UNCHECKED_WRITE_BARRIER RSI, ESI
+
+//
+// Define the helpers used to implement the write barrier required when writing an object reference into a
+// location residing on the GC heap. Such write barriers allow the GC to optimize which objects in
+// non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral
+// collection.
+//
+
+.macro DEFINE_CHECKED_WRITE_BARRIER_CORE BASENAME, REFREG
+
+    // The location being updated might not even lie in the GC heap (a handle or stack location for instance),
+    // in which case no write barrier is required.
+    cmp     rdi, [C_VAR(g_lowest_address)]
+    jb      LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+    cmp     rdi, [C_VAR(g_highest_address)]
+    jae     LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+
+    DEFINE_UNCHECKED_WRITE_BARRIER_CORE \BASENAME, \REFREG
+
+.endm
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. One argument is taken, the
+// name of the register that will hold the object reference (this should be in upper case as it's used in the
+// definition of the name of the helper).
+.macro DEFINE_CHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME
+
+// Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard
+// decoration). The location to be updated is always in RDI. The object reference that will be assigned into
+// that location is in one of the other general registers determined by the value of REFREG.
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT
+
+    // Export the canonical write barrier under unqualified name as well
+    .ifc \REFREG, RSI
+    ALTERNATE_ENTRY RhpCheckedAssignRef
+    ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+    .endif
+
+    // Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    // and the card table update we may perform below.
+    mov     qword ptr [rdi], \REFREG
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, \REFREG
+
+LEAF_END RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT
+.endm
+
+// One day we might have write barriers for all the possible argument registers but for now we have
+// just one write barrier that assumes the input register is RSI.
+DEFINE_CHECKED_WRITE_BARRIER RSI, ESI
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
+    mov             rax, rdx
+ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
+    lock cmpxchg    [rdi], rsi
+    jne             LOCAL_LABEL(RhpCheckedLockCmpXchg_NoBarrierRequired_RSI)
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, RSI
+
+LEAF_END RhpCheckedLockCmpXchg, _TEXT
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedXchgAVLocation
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpCheckedXchg, _TEXT
+    
+    // Setup rax with the new object for the exchange, that way it will automatically hold the correct result
+    // afterwards and we can leave rdx unaltered ready for the GC write barrier below.
+    mov             rax, rsi
+ALTERNATE_ENTRY RhpCheckedXchgAVLocation
+    xchg            [rdi], rax
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, RSI
+
+LEAF_END RhpCheckedXchg, _TEXT
+
+//
+// RhpByRefAssignRef simulates movs instruction for object references.
+//
+// On entry:
+//      rdi: address of ref-field (assigned to)
+//      rsi: address of the data (source)
+//      rcx: be trashed
+//
+// On exit:
+//      rdi, rsi are incremented by 8, 
+//      rcx: trashed
+//
+LEAF_ENTRY RhpByRefAssignRef, _TEXT
+    mov     rcx, [rsi]
+    mov     [rdi], rcx
+
+    // Check whether the writes were even into the heap. If not there's no card update required.
+    cmp     rdi, [C_VAR(g_lowest_address)]
+    jb      LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
+    cmp     rdi, [C_VAR(g_highest_address)]
+    jae     LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
+
+    // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+    // we're in a debug build and write barrier checking has been enabled).
+    UPDATE_GC_SHADOW BASENAME, rcx, rdi
+
+    // If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    // (since the object won't be collected or moved by an ephemeral collection).
+    cmp     rcx, [C_VAR(g_ephemeral_low)]
+    jb      LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
+    cmp     rcx, [C_VAR(g_ephemeral_high)]
+    jae     LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
+
+    // move current rdi value into rcx and then increment the pointers
+    mov     rcx, rdi
+    add     rsi, 0x8
+    add     rdi, 0x8
+
+    // We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    // track this write. The location address is translated into an offset in the card table bitmap. We set
+    // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    // the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     rcx, 11
+    add     rcx, [C_VAR(g_card_table)]
+    cmp     byte ptr [rcx], 0x0FF
+    jne     LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable)
+    ret
+
+// We get here if it's necessary to update the card table.
+LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable):
+    mov     byte ptr [rcx], 0x0FF
+    ret
+
+LOCAL_LABEL(RhpByRefAssignRef_NotInHeap):
+    // Increment the pointers before leaving
+    add     rdi, 0x8
+    add     rsi, 0x8
+    ret
+LEAF_END RhpByRefAssignRef, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/amd64/WriteBarriers.asm b/src/coreclr/src/nativeaot/Runtime/amd64/WriteBarriers.asm
new file mode 100644
index 0000000000000..67ecf7dc01728
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/amd64/WriteBarriers.asm
@@ -0,0 +1,305 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmMacros.inc
+
+;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used
+;; during garbage collections to verify that object references where never written to the heap without using a
+;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing
+;; new references to the real heap. Since this can't be solved perfectly without critical sections around the
+;; entire update process, we instead update the shadow location and then re-check the real location (as two
+;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value
+;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC
+;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the
+;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE.
+ifdef WRITE_BARRIER_CHECK
+
+g_GCShadow      TEXTEQU <?g_GCShadow@@3PEAEEA>
+g_GCShadowEnd   TEXTEQU <?g_GCShadowEnd@@3PEAEEA>
+INVALIDGCVALUE  EQU 0CCCCCCCDh
+
+EXTERN  g_GCShadow : QWORD
+EXTERN  g_GCShadowEnd : QWORD
+
+UPDATE_GC_SHADOW macro BASENAME, REFREG, DESTREG
+
+    ;; If g_GCShadow is 0, don't perform the check.
+    cmp     g_GCShadow, 0
+    je      &BASENAME&_UpdateShadowHeap_Done_&REFREG&
+
+    ;; Save DESTREG since we're about to modify it (and we need the original value both within the macro and
+    ;; once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of
+    ;; the prolog inside a method without a frame. But given that this is only debug code and generally we
+    ;; shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier
+    ;; variants to set up frames. Unlike RhpBulkWriteBarrier below which is treated as a helper call using the
+    ;; usual calling convention, the compiler knows exactly which registers are trashed in the simple write
+    ;; barrier case, so we don't have any more scratch registers to play with (and doing so would only make
+    ;; things harder if at a later stage we want to allow multiple barrier versions based on the input
+    ;; registers).
+    push    DESTREG
+
+    ;; Transform DESTREG into the equivalent address in the shadow heap.
+    sub     DESTREG, g_lowest_address
+    jb      &BASENAME&_UpdateShadowHeap_PopThenDone_&REFREG&
+    add     DESTREG, [g_GCShadow]
+    cmp     DESTREG, [g_GCShadowEnd]
+    ja      &BASENAME&_UpdateShadowHeap_PopThenDone_&REFREG&
+
+    ;; Update the shadow heap.
+    mov     [DESTREG], REFREG
+
+    ;; Now check that the real heap location still contains the value we just wrote into the shadow heap. This
+    ;; read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to
+    ;; recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock
+    ;; prefix).
+    xchg    [rsp], DESTREG
+    cmp     [DESTREG], REFREG
+    jne     &BASENAME&_UpdateShadowHeap_Invalidate_&REFREG&
+
+    ;; The original DESTREG value is now restored but the stack has a value (the shadow version of the
+    ;; location) pushed. Need to discard this push before we are done.
+    add     rsp, 8
+    jmp     &BASENAME&_UpdateShadowHeap_Done_&REFREG&
+
+&BASENAME&_UpdateShadowHeap_Invalidate_&REFREG&:
+    ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't
+    ;; guarantee whose shadow update won.
+
+    ;; Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an
+    ;; additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg
+    ;; variant that doesn't implicitly specify the lock prefix. Note that INVALIDGCVALUE is a 64-bit
+    ;; immediate and therefore must be moved into a register before it can be written to the shadow
+    ;; location.
+    xchg    [rsp], DESTREG
+    push    REFREG
+    mov     REFREG, INVALIDGCVALUE
+    mov     qword ptr [DESTREG], REFREG
+    pop     REFREG
+
+&BASENAME&_UpdateShadowHeap_PopThenDone_&REFREG&:
+    ;; Restore original DESTREG value from the stack.
+    pop     DESTREG
+
+&BASENAME&_UpdateShadowHeap_Done_&REFREG&:
+endm
+
+else ; WRITE_BARRIER_CHECK
+
+UPDATE_GC_SHADOW macro BASENAME, REFREG, DESTREG
+endm
+
+endif ; WRITE_BARRIER_CHECK
+
+;; There are several different helpers used depending on which register holds the object reference. Since all
+;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+;; name of the register that points to the location to be updated and the name of the register that holds the
+;; object reference (this should be in upper case as it's used in the definition of the name of the helper).
+DEFINE_UNCHECKED_WRITE_BARRIER_CORE macro BASENAME, REFREG
+
+    ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+    ;; we're in a debug build and write barrier checking has been enabled).
+    UPDATE_GC_SHADOW BASENAME, REFREG, rcx
+
+    ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    ;; (since the object won't be collected or moved by an ephemeral collection).
+    cmp     REFREG, [g_ephemeral_low]
+    jb      &BASENAME&_NoBarrierRequired_&REFREG&
+    cmp     REFREG, [g_ephemeral_high]
+    jae     &BASENAME&_NoBarrierRequired_&REFREG&
+
+    ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    ;; track this write. The location address is translated into an offset in the card table bitmap. We set
+    ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    ;; the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     rcx, 11
+    add     rcx, [g_card_table]
+    cmp     byte ptr [rcx], 0FFh
+    jne     &BASENAME&_UpdateCardTable_&REFREG&
+
+&BASENAME&_NoBarrierRequired_&REFREG&:
+    ret
+
+;; We get here if it's necessary to update the card table.
+&BASENAME&_UpdateCardTable_&REFREG&:
+    mov     byte ptr [rcx], 0FFh
+    ret
+
+endm
+
+;; There are several different helpers used depending on which register holds the object reference. Since all
+;; the helpers have identical structure we use a macro to define this structure. One argument is taken, the
+;; name of the register that will hold the object reference (this should be in upper case as it's used in the
+;; definition of the name of the helper).
+DEFINE_UNCHECKED_WRITE_BARRIER macro REFREG, EXPORT_REG_NAME
+
+;; Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard
+;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that
+;; location is in one of the other general registers determined by the value of REFREG.
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpAssignRef&EXPORT_REG_NAME&, _TEXT
+
+    ;; Export the canonical write barrier under unqualified name as well
+    ifidni <REFREG>, <RDX>
+    ALTERNATE_ENTRY RhpAssignRef
+    ALTERNATE_ENTRY RhpAssignRefAVLocation
+    endif
+
+    ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    ;; and the card table update we may perform below.
+    mov     qword ptr [rcx], REFREG
+
+    DEFINE_UNCHECKED_WRITE_BARRIER_CORE RhpAssignRef, REFREG
+
+LEAF_END RhpAssignRef&EXPORT_REG_NAME&, _TEXT
+endm
+
+;; One day we might have write barriers for all the possible argument registers but for now we have
+;; just one write barrier that assumes the input register is RDX.
+DEFINE_UNCHECKED_WRITE_BARRIER RDX, EDX
+
+;;
+;; Define the helpers used to implement the write barrier required when writing an object reference into a
+;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in
+;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral
+;; collection.
+;;
+
+DEFINE_CHECKED_WRITE_BARRIER_CORE macro BASENAME, REFREG
+
+    ;; The location being updated might not even lie in the GC heap (a handle or stack location for instance),
+    ;; in which case no write barrier is required.
+    cmp     rcx, [g_lowest_address]
+    jb      &BASENAME&_NoBarrierRequired_&REFREG&
+    cmp     rcx, [g_highest_address]
+    jae     &BASENAME&_NoBarrierRequired_&REFREG&
+
+    DEFINE_UNCHECKED_WRITE_BARRIER_CORE BASENAME, REFREG
+
+endm
+
+;; There are several different helpers used depending on which register holds the object reference. Since all
+;; the helpers have identical structure we use a macro to define this structure. One argument is taken, the
+;; name of the register that will hold the object reference (this should be in upper case as it's used in the
+;; definition of the name of the helper).
+DEFINE_CHECKED_WRITE_BARRIER macro REFREG, EXPORT_REG_NAME
+
+;; Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard
+;; decoration). The location to be updated is always in RCX. The object reference that will be assigned into
+;; that location is in one of the other general registers determined by the value of REFREG.
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpCheckedAssignRef&EXPORT_REG_NAME&, _TEXT
+
+    ;; Export the canonical write barrier under unqualified name as well
+    ifidni <REFREG>, <RDX>
+    ALTERNATE_ENTRY RhpCheckedAssignRef
+    ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+    endif
+
+    ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    ;; and the card table update we may perform below.
+    mov     qword ptr [rcx], REFREG
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, REFREG
+
+LEAF_END RhpCheckedAssignRef&EXPORT_REG_NAME&, _TEXT
+endm
+
+;; One day we might have write barriers for all the possible argument registers but for now we have
+;; just one write barrier that assumes the input register is RDX.
+DEFINE_CHECKED_WRITE_BARRIER RDX, EDX
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation
+;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
+    mov             rax, r8
+ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
+    lock cmpxchg    [rcx], rdx
+    jne             RhpCheckedLockCmpXchg_NoBarrierRequired_RDX
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, RDX
+
+LEAF_END RhpCheckedLockCmpXchg, _TEXT
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedXchgAVLocation
+;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpCheckedXchg, _TEXT
+    
+    ;; Setup rax with the new object for the exchange, that way it will automatically hold the correct result
+    ;; afterwards and we can leave rdx unaltered ready for the GC write barrier below.
+    mov             rax, rdx
+ALTERNATE_ENTRY RhpCheckedXchgAVLocation
+    xchg            [rcx], rax
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, RDX
+
+LEAF_END RhpCheckedXchg, _TEXT
+
+;;
+;; RhpByRefAssignRef simulates movs instruction for object references.
+;;
+;; On entry:
+;;      rdi: address of ref-field (assigned to)
+;;      rsi: address of the data (source)
+;;      rcx: be trashed
+;;
+;; On exit:
+;;      rdi, rsi are incremented by 8, 
+;;      rcx: trashed
+;;
+LEAF_ENTRY RhpByRefAssignRef, _TEXT
+    mov     rcx, [rsi]
+    mov     [rdi], rcx
+
+    ;; Check whether the writes were even into the heap. If not there's no card update required.
+    cmp     rdi, [g_lowest_address]
+    jb      RhpByRefAssignRef_NotInHeap
+    cmp     rdi, [g_highest_address]
+    jae     RhpByRefAssignRef_NotInHeap
+
+    ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+    ;; we're in a debug build and write barrier checking has been enabled).
+    UPDATE_GC_SHADOW BASENAME, rcx, rdi
+
+    ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    ;; (since the object won't be collected or moved by an ephemeral collection).
+    cmp     rcx, [g_ephemeral_low]
+    jb      RhpByRefAssignRef_NotInHeap
+    cmp     rcx, [g_ephemeral_high]
+    jae     RhpByRefAssignRef_NotInHeap
+
+    ;; move current rdi value into rcx and then increment the pointers
+    mov     rcx, rdi
+    add     rsi, 8h
+    add     rdi, 8h
+
+    ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    ;; track this write. The location address is translated into an offset in the card table bitmap. We set
+    ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    ;; the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     rcx, 11
+    add     rcx, [g_card_table]
+    cmp     byte ptr [rcx], 0FFh
+    jne     RhpByRefAssignRef_UpdateCardTable
+    ret
+
+;; We get here if it's necessary to update the card table.
+RhpByRefAssignRef_UpdateCardTable:
+    mov     byte ptr [rcx], 0FFh
+    ret
+
+RhpByRefAssignRef_NotInHeap:
+    ; Increment the pointers before leaving
+    add     rdi, 8h
+    add     rsi, 8h
+    ret
+LEAF_END RhpByRefAssignRef, _TEXT
+
+    end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/AllocFast.S b/src/coreclr/src/nativeaot/Runtime/arm/AllocFast.S
new file mode 100644
index 0000000000000..e72a3ef105b66
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/AllocFast.S
@@ -0,0 +1,555 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.syntax unified
+.thumb
+
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+// allocation context then automatically fallback to the slow allocation path.
+//  r0 == EEType
+LEAF_ENTRY RhpNewFast, _TEXT
+        PROLOG_PUSH "{r4,lr}"
+        mov	        r4, r0 // save EEType
+
+        // r0 = GetThread()
+        INLINE_GETTHREAD
+
+        // r4 contains EEType pointer
+        ldr         r2, [r4, #OFFSETOF__EEType__m_uBaseSize]
+
+        // r0: Thread pointer
+        // r4: EEType pointer
+        // r2: base size
+
+        ldr         r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        add         r2, r3
+        ldr         r1, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         r2, r1
+        bhi         LOCAL_LABEL(RhpNewFast_RarePath)
+
+        // set the new alloc pointer
+        str         r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        // Set the new object's EEType pointer
+        str         r4, [r3, #OFFSETOF__Object__m_pEEType]
+
+        mov         r0, r3
+
+        EPILOG_POP  "{r4,pc}"
+
+LOCAL_LABEL(RhpNewFast_RarePath):
+        mov         r0, r4            // restore EEType
+        mov         r1, #0
+        EPILOG_POP  "{r4,lr}"
+        b           C_FUNC(RhpNewObject)
+
+LEAF_END RhpNewFast, _TEXT
+
+// Allocate non-array object with finalizer.
+//  r0 == EEType
+//
+LEAF_ENTRY RhpNewFinalizable, _TEXT
+        mov         r1, #GC_ALLOC_FINALIZE
+        b           C_FUNC(RhpNewObject)
+LEAF_END RhpNewFinalizable, _TEXT
+
+
+// Allocate non-array object.
+//  r0 == EEType
+//  r1 == alloc flags
+NESTED_ENTRY RhpNewObject, _TEXT, NoHandler
+
+        PUSH_COOP_PINVOKE_FRAME r3
+
+        // r0: EEType
+        // r1: alloc flags
+        // r3: transition frame
+
+        // Preserve the EEType in r5.
+        mov         r5, r0
+
+        ldr         r2, [r0, #OFFSETOF__EEType__m_uBaseSize]    // cbSize
+
+        // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        blx         C_FUNC(RhpGcAlloc)
+
+        // Set the new object's EEType pointer on success.
+        cbz         r0, LOCAL_LABEL(NewOutOfMemory)
+        str         r5, [r0, #OFFSETOF__Object__m_pEEType]
+
+        // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        ldr         r1, [r5, #OFFSETOF__EEType__m_uBaseSize]
+        movw        r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF)
+        movt        r2, #(RH_LARGE_OBJECT_SIZE >> 16)
+        cmp         r1, r2
+        blo         LOCAL_LABEL(New_SkipPublish)
+
+        // r0: already contains object
+        // r1: already contains object size
+
+        bl          C_FUNC(RhpPublishObject)
+        // r0: function returned the passed-in object
+
+LOCAL_LABEL(New_SkipPublish):
+
+        POP_COOP_PINVOKE_FRAME
+        bx          lr
+
+LOCAL_LABEL(NewOutOfMemory):
+        // This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mov         r0, r5            // EEType pointer
+        mov         r1, #0            // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+NESTED_END RhpNewObject, _TEXT
+
+
+// Allocate a string.
+//  r0 == EEType
+//  r1 == element/character count
+LEAF_ENTRY RhNewString, _TEXT
+        PROLOG_PUSH "{r4-r6,lr}"
+        // Make sure computing the overall allocation size won't overflow
+        MOV32       r12, MAX_STRING_LENGTH
+        cmp         r1, r12
+        bhi         LOCAL_LABEL(StringSizeOverflow)
+
+        // Compute overall allocation size (align(base size + (element size * elements), 4)).
+        mov         r2, #(STRING_BASE_SIZE + 3)
+#if STRING_COMPONENT_SIZE == 2
+        add         r2, r2, r1, lsl #1                  // r2 += characters * 2
+#else
+        NotImplementedComponentSize
+#endif
+        bic         r2, r2, #3
+
+        mov         r4, r0 // Save EEType
+        mov         r5, r1 // Save element count
+        mov         r6, r2 // Save string size
+        // r0 = GetThread()
+        INLINE_GETTHREAD
+        // r4 == EEType
+        // r5 == element count
+        // r6 == string size
+        // r0 == Thread*
+
+        // Load potential new object address into r12.
+        ldr         r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        // Determine whether the end of the object would lie outside of the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        adds        r6, r12
+        bcs         LOCAL_LABEL(RhNewString_RarePath) // if we get a carry here, the string is too large to fit below 4 GB
+
+        ldr         r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         r6, r12
+        bhi         LOCAL_LABEL(RhNewString_RarePath)
+
+        // Reload new object address into r12.
+        ldr         r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        // Update the alloc pointer to account for the allocation.
+        str         r6, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        // Set the new object's EEType pointer and element count.
+        str         r4, [r12, #OFFSETOF__Object__m_pEEType]
+        str         r5, [r12, #OFFSETOF__String__m_Length]
+
+        // Return the object allocated in r0.
+        mov         r0, r12
+        EPILOG_POP  "{r4-r6,pc}"
+
+LOCAL_LABEL(StringSizeOverflow):
+        // We get here if the size of the final string object can't be represented as an unsigned
+        // 32-bit value. We're going to tail-call to a managed helper that will throw
+        // an OOM exception that the caller of this allocator understands.
+
+        // EEType is in r0 already
+        mov         r1, 0                  // Indicate that we should throw OOM
+        EPILOG_POP  "{r4-r6,lr}"
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+LOCAL_LABEL(RhNewString_RarePath):
+        mov         r3, r0
+        mov         r0, r4
+        mov         r1, r5
+        mov         r2, r6
+        // r0 == EEType
+        // r1 == element count
+        // r2 == string size + Thread::m_alloc_context::alloc_ptr
+        // r3 == Thread
+        EPILOG_POP  "{r4-r6,lr}"
+        b           C_FUNC(RhpNewArrayRare)
+
+LEAF_END RhNewString, _TEXT
+
+
+// Allocate one dimensional, zero based array (SZARRAY).
+//  r0 == EEType
+//  r1 == element count
+LEAF_ENTRY RhpNewArray, _TEXT
+        PROLOG_PUSH "{r4-r6,lr}"
+
+        // Compute overall allocation size (align(base size + (element size * elements), 4)).
+        // if the element count is <= 0x10000, no overflow is possible because the component
+        // size is <= 0xffff (it's an unsigned 16-bit value) and thus the product is <= 0xffff0000
+        // and the base size for the worst case (32 dimensional MdArray) is less than 0xffff.
+        ldrh        r2, [r0, #OFFSETOF__EEType__m_usComponentSize]
+        cmp         r1, #0x10000
+        bhi         LOCAL_LABEL(ArraySizeBig)
+        umull       r2, r3, r2, r1
+        ldr         r3, [r0, #OFFSETOF__EEType__m_uBaseSize]
+        adds        r2, r3
+        adds        r2, #3
+LOCAL_LABEL(ArrayAlignSize):
+        bic         r2, r2, #3
+
+        mov         r4, r0 // Save EEType
+        mov         r5, r1 // Save element count
+        mov         r6, r2 // Save array size
+        // r0 = GetThread()
+        INLINE_GETTHREAD
+        // r4 == EEType
+        // r5 == element count
+        // r6 == array size
+        // r0 == Thread*
+
+        // Load potential new object address into r12.
+        ldr         r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        // Determine whether the end of the object would lie outside of the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        adds        r6, r12
+        bcs         LOCAL_LABEL(RhpNewArray_RarePath) // if we get a carry here, the array is too large to fit below 4 GB
+
+        ldr         r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         r6, r12
+        bhi         LOCAL_LABEL(RhpNewArray_RarePath)
+
+        // Reload new object address into r12.
+        ldr         r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        // Update the alloc pointer to account for the allocation.
+        str         r6, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        // Set the new object's EEType pointer and element count.
+        str         r4, [r12, #OFFSETOF__Object__m_pEEType]
+        str         r5, [r12, #OFFSETOF__Array__m_Length]
+
+        // Return the object allocated in r0.
+        mov         r0, r12
+        EPILOG_POP  "{r4-r6,pc}"
+
+LOCAL_LABEL(ArraySizeBig):
+        // if the element count is negative, it's an overflow error
+        cmp         r1, #0
+        blt         LOCAL_LABEL(ArraySizeOverflow)
+
+        // now we know the element count is in the signed int range [0..0x7fffffff]
+        // overflow in computing the total size of the array size gives an out of memory exception,
+        // NOT an overflow exception
+        // we already have the component size in r2
+        umull       r2, r3, r2, r1
+        cbnz        r3, LOCAL_LABEL(ArrayOutOfMemoryFinal)
+        ldr         r3, [r0, #OFFSETOF__EEType__m_uBaseSize]
+        adds        r2, r3
+        bcs         LOCAL_LABEL(ArrayOutOfMemoryFinal)
+        adds        r2, #3
+        bcs         LOCAL_LABEL(ArrayOutOfMemoryFinal)
+        b           LOCAL_LABEL(ArrayAlignSize)
+
+LOCAL_LABEL(ArrayOutOfMemoryFinal):
+
+        // EEType is in r0 already
+        mov         r1, #0                  // Indicate that we should throw OOM.
+        EPILOG_POP  "{r4-r6,lr}"
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+LOCAL_LABEL(ArraySizeOverflow):
+        // We get here if the size of the final array object can't be represented as an unsigned
+        // 32-bit value. We're going to tail-call to a managed helper that will throw
+        // an overflow exception that the caller of this allocator understands.
+
+        // EEType is in r0 already
+        mov         r1, #1                  // Indicate that we should throw OverflowException
+        EPILOG_POP  "{r4-r6,lr}"
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+LOCAL_LABEL(RhpNewArray_RarePath):
+        mov         r3, r0
+        mov         r0, r4
+        mov         r1, r5
+        mov         r2, r6
+        // r0 == EEType
+        // r1 == element count
+        // r2 == array size + Thread::m_alloc_context::alloc_ptr
+        // r3 == Thread
+        EPILOG_POP  "{r4-r6,lr}"
+        b           C_FUNC(RhpNewArrayRare)
+
+LEAF_END RhpNewArray, _TEXT
+
+
+// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper.
+//  r0 == EEType
+//  r1 == element count
+//  r2 == array size + Thread::m_alloc_context::alloc_ptr
+//  r3 == Thread
+NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler
+
+        // Recover array size by subtracting the alloc_ptr from r2.
+        ldr         r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        sub         r2, r12
+
+        PUSH_COOP_PINVOKE_FRAME r3
+
+        // Preserve the EEType in r5 and element count in r6.
+        mov         r5, r0
+        mov         r6, r1
+
+        mov         r7, r2          // Save array size in r7
+
+        mov         r1, #0          // uFlags
+
+        // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        blx         C_FUNC(RhpGcAlloc)
+
+        // Test for failure (NULL return).
+        cbz         r0, LOCAL_LABEL(ArrayOutOfMemory)
+
+        // Success, set the array's type and element count in the new object.
+        str         r5, [r0, #OFFSETOF__Object__m_pEEType]
+        str         r6, [r0, #OFFSETOF__Array__m_Length]
+
+        // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        movw        r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF)
+        movt        r2, #(RH_LARGE_OBJECT_SIZE >> 16)
+        cmp         r7, r2
+        blo         LOCAL_LABEL(NewArray_SkipPublish)
+
+                                        // r0: already contains object
+        mov         r1, r7              // r1: object size
+        bl          C_FUNC(RhpPublishObject)
+                                        // r0: function returned the passed-in object
+
+LOCAL_LABEL(NewArray_SkipPublish):
+
+        POP_COOP_PINVOKE_FRAME
+        bx          lr
+
+LOCAL_LABEL(ArrayOutOfMemory):
+
+        mov         r0, r5       // EEType
+        mov         r1, #0       // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+NESTED_END RhpNewArrayRare, _TEXT
+
+// Allocate simple object (not finalizable, array or value type) on an 8 byte boundary.
+//  r0 == EEType
+LEAF_ENTRY RhpNewFastAlign8, _TEXT
+        PROLOG_PUSH "{r4,lr}"
+
+        mov         r4, r0 // save EEType
+
+        // r0 = GetThread()
+        INLINE_GETTHREAD
+
+        // Fetch object size into r2.
+        ldr         r2, [r4, #OFFSETOF__EEType__m_uBaseSize]
+
+        // r4: EEType pointer
+        // r0: Thread pointer
+        // r2: base size
+
+        // Load potential new object address into r3. Cache this result in r12 as well for the common case
+        // where the allocation succeeds (r3 will be overwritten in the following bounds check).
+        ldr         r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        mov         r12, r3
+
+        // Check whether the current allocation context is already aligned for us.
+        tst         r3, #0x7
+        bne         LOCAL_LABEL(Alloc8Failed)
+
+        // Determine whether the end of the object would lie outside of the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        add         r2, r3
+        ldr         r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         r2, r3
+        bhi         LOCAL_LABEL(Alloc8Failed)
+
+        // Update the alloc pointer to account for the allocation.
+        str         r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        // Set the new object's EEType pointer.
+        str         r4, [r12, #OFFSETOF__Object__m_pEEType]
+
+        // Return the object allocated in r0.
+        mov         r0, r12
+
+        EPILOG_POP  "{r4,pc}"
+
+LOCAL_LABEL(Alloc8Failed):
+        // Fast allocation failed. Call slow helper with flags set to indicate an 8-byte alignment and no
+        // finalization.
+        mov         r0, r4 // restore EEType
+        mov         r1, #GC_ALLOC_ALIGN8
+        EPILOG_POP  "{r4,lr}"
+        b           C_FUNC(RhpNewObject)
+
+LEAF_END RhpNewFastAlign8, _TEXT
+
+// Allocate a finalizable object (by definition not an array or value type) on an 8 byte boundary.
+//  r0 == EEType
+LEAF_ENTRY RhpNewFinalizableAlign8, _TEXT
+        mov         r1, #(GC_ALLOC_FINALIZE | GC_ALLOC_ALIGN8)
+        b           C_FUNC(RhpNewObject)
+LEAF_END RhpNewFinalizableAlign8, _TEXT
+
+// Allocate a value type object (i.e. box it) on an 8 byte boundary + 4 (so that the value type payload
+// itself is 8 byte aligned).
+//  r0 == EEType
+LEAF_ENTRY RhpNewFastMisalign, _TEXT
+        PROLOG_PUSH "{r4,lr}"
+
+        mov         r4, r0 // save EEType
+
+        // r0 = GetThread()
+        INLINE_GETTHREAD
+
+        // Fetch object size into r2.
+        ldr         r2, [r4, #OFFSETOF__EEType__m_uBaseSize]
+
+        // r4: EEType pointer
+        // r0: Thread pointer
+        // r2: base size
+
+        // Load potential new object address into r3. Cache this result in r12 as well for the common case
+        // where the allocation succeeds (r3 will be overwritten in the following bounds check).
+        ldr         r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        mov         r12, r3
+
+        // Check whether the current allocation context is already aligned for us (for boxing that means the
+        // address % 8 == 4, so the value type payload following the EEType* is actually 8-byte aligned).
+        tst         r3, #0x7
+        beq         LOCAL_LABEL(BoxAlloc8Failed)
+
+        // Determine whether the end of the object would lie outside of the current allocation context. If so,
+        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        add         r2, r3
+        ldr         r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         r2, r3
+        bhi         LOCAL_LABEL(BoxAlloc8Failed)
+
+        // Update the alloc pointer to account for the allocation.
+        str         r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        // Set the new object's EEType pointer.
+        str         r4, [r12, #OFFSETOF__Object__m_pEEType]
+
+        // Return the object allocated in r0.
+        mov         r0, r12
+
+        EPILOG_POP  "{r4,pc}"
+
+LOCAL_LABEL(BoxAlloc8Failed):
+        // Fast allocation failed. Call slow helper with flags set to indicate an 8+4 byte alignment and no
+        // finalization.
+        mov         r0, r4 // restore EEType
+        mov         r1, #(GC_ALLOC_ALIGN8 | GC_ALLOC_ALIGN8_BIAS)
+        EPILOG_POP  "{r4,lr}"
+        b           C_FUNC(RhpNewObject)
+
+LEAF_END RhpNewFastMisalign, _TEXT
+
+// Allocate an array on an 8 byte boundary.
+//  r0 == EEType
+//  r1 == element count
+NESTED_ENTRY RhpNewArrayAlign8, _TEXT, NoHandler
+
+        PUSH_COOP_PINVOKE_FRAME r3
+
+        // Compute overall allocation size (base size + align((element size * elements), 4)).
+        ldrh        r2, [r0, #OFFSETOF__EEType__m_usComponentSize]
+        umull       r2, r4, r2, r1
+        cbnz        r4, LOCAL_LABEL(Array8SizeOverflow)
+        adds        r2, #3
+        bcs         LOCAL_LABEL(Array8SizeOverflow)
+        bic         r2, r2, #3
+        ldr         r4, [r0, #OFFSETOF__EEType__m_uBaseSize]
+        adds        r2, r4
+        bcs         LOCAL_LABEL(Array8SizeOverflow)
+
+        // Preserve the EEType in r5 and element count in r6.
+        mov         r5, r0
+        mov         r6, r1
+        mov         r7, r2                  // Save array size in r7
+
+        mov         r1, #GC_ALLOC_ALIGN8    // uFlags
+
+        // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        blx         C_FUNC(RhpGcAlloc)
+
+        // Test for failure (NULL return).
+        cbz         r0, LOCAL_LABEL(Array8OutOfMemory)
+
+        // Success, set the array's type and element count in the new object.
+        str         r5, [r0, #OFFSETOF__Object__m_pEEType]
+        str         r6, [r0, #OFFSETOF__Array__m_Length]
+
+        // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        movw        r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF)
+        movt        r2, #(RH_LARGE_OBJECT_SIZE >> 16)
+        cmp         r7, r2
+        blo         LOCAL_LABEL(NewArray8_SkipPublish)
+
+                                              // r0: already contains object
+        mov         r1, r7                    // r1: object size
+        bl          C_FUNC(RhpPublishObject)
+                                              // r0: function returned the passed-in object
+LOCAL_LABEL(NewArray8_SkipPublish):
+
+        POP_COOP_PINVOKE_FRAME
+
+        bx          lr
+
+LOCAL_LABEL(Array8SizeOverflow):
+        // We get here if the size of the final array object can't be represented as an unsigned
+        // 32-bit value. We're going to tail-call to a managed helper that will throw
+        // an OOM or overflow exception that the caller of this allocator understands.
+
+        // if the element count is non-negative, it's an OOM error
+        cmp         r1, #0
+        bge         LOCAL_LABEL(Array8OutOfMemory1)
+
+        // r0 holds EEType pointer already
+        mov         r1, #1              // Indicate that we should throw OverflowException
+
+        POP_COOP_PINVOKE_FRAME
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+LOCAL_LABEL(Array8OutOfMemory):
+        // This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        // an out of memory exception that the caller of this allocator understands.
+
+        mov         r0, r5              // EEType pointer
+
+LOCAL_LABEL(Array8OutOfMemory1):
+
+        mov         r1, #0              // Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        b           C_FUNC(RhExceptionHandling_FailedAllocation)
+
+NESTED_END RhpNewArrayAlign8, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/AllocFast.asm b/src/coreclr/src/nativeaot/Runtime/arm/AllocFast.asm
new file mode 100644
index 0000000000000..d459ef8fa6d8d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/AllocFast.asm
@@ -0,0 +1,578 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+;; allocation context then automatically fallback to the slow allocation path.
+;;  r0 == EEType
+        LEAF_ENTRY RhpNewFast
+
+        ;; r1 = GetThread(), TRASHES r2
+        INLINE_GETTHREAD r1, r2
+
+        ;; Fetch object size into r2.
+        ldr         r2, [r0, #OFFSETOF__EEType__m_uBaseSize]
+
+        ;;
+        ;; r0: EEType pointer
+        ;; r1: Thread pointer
+        ;; r2: base size
+        ;;
+
+        ;; Load potential new object address into r3. Cache this result in r12 as well for the common case
+        ;; where the allocation succeeds (r3 will be overwritten in the following bounds check).
+        ldr         r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        mov         r12, r3
+
+        ;; Determine whether the end of the object would lie outside of the current allocation context. If so,
+        ;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        add         r2, r3
+        ldr         r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         r2, r3
+        bhi         AllocFailed
+
+        ;; Update the alloc pointer to account for the allocation.
+        str         r2, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Set the new object's EEType pointer.
+        str         r0, [r12, #OFFSETOF__Object__m_pEEType]
+
+        ;; Return the object allocated in r0.
+        mov         r0, r12
+
+        bx          lr
+
+AllocFailed
+        ;; Fast allocation failed. Call slow helper with flags set to zero (this isn't a finalizable object).
+        mov         r1, #0
+        b           RhpNewObject
+
+        LEAF_END RhpNewFast
+
+        INLINE_GETTHREAD_CONSTANT_POOL
+
+
+;; Allocate non-array object with finalizer.
+;;  r0 == EEType
+        LEAF_ENTRY RhpNewFinalizable
+        mov         r1, #GC_ALLOC_FINALIZE
+        b           RhpNewObject
+        LEAF_END RhpNewFinalizable
+
+;; Allocate non-array object.
+;;  r0 == EEType
+;;  r1 == alloc flags
+        NESTED_ENTRY RhpNewObject
+
+        PUSH_COOP_PINVOKE_FRAME r3
+
+        ; r0: EEType
+        ; r1: alloc flags
+        ; r3: transition frame
+
+        ;; Preserve the EEType in r5.
+        mov         r5, r0
+
+        ldr         r2, [r0, #OFFSETOF__EEType__m_uBaseSize]    ; cbSize
+
+        ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        blx         RhpGcAlloc
+
+        ;; Set the new object's EEType pointer on success.
+        cbz         r0, NewOutOfMemory
+        str         r5, [r0, #OFFSETOF__Object__m_pEEType]
+
+        ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        ldr         r1, [r5, #OFFSETOF__EEType__m_uBaseSize]
+        movw        r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF)
+        movt        r2, #(RH_LARGE_OBJECT_SIZE >> 16)
+        cmp         r1, r2
+        blo         New_SkipPublish
+                                        ;; r0: already contains object
+                                        ;; r1: already contains object size
+        bl          RhpPublishObject
+                                        ;; r0: function returned the passed-in object
+New_SkipPublish
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+NewOutOfMemory
+        ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         r0, r5              ; EEType pointer
+        mov         r1, #0              ; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_BRANCH RhExceptionHandling_FailedAllocation
+
+        NESTED_END RhpNewObject
+
+
+;; Allocate a string.
+;;  r0 == EEType
+;;  r1 == element/character count
+        LEAF_ENTRY RhNewString
+
+        ; Make sure computing the overall allocation size won't overflow
+        MOV32       r2, MAX_STRING_LENGTH
+        cmp         r1, r2
+        bhs         StringSizeOverflow
+
+        ; Compute overall allocation size (align(base size + (element size * elements), 4)).
+        mov         r2, #(STRING_BASE_SIZE + 3)
+#if STRING_COMPONENT_SIZE == 2
+        add         r2, r2, r1, lsl #1                  ; r2 += characters * 2
+#else
+        NotImplementedComponentSize
+#endif
+        bic         r2, r2, #3
+
+        ; r0 == EEType
+        ; r1 == element count
+        ; r2 == string size
+
+        INLINE_GETTHREAD        r3, r12
+
+        ;; Load potential new object address into r12.
+        ldr         r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Determine whether the end of the object would lie outside of the current allocation context. If so,
+        ;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        adds        r2, r12
+        bcs         RhpNewArrayRare ; if we get a carry here, the array is too large to fit below 4 GB
+        ldr         r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         r2, r12
+        bhi         RhpNewArrayRare
+
+        ;; Reload new object address into r12.
+        ldr         r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Update the alloc pointer to account for the allocation.
+        str         r2, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Set the new object's EEType pointer and element count.
+        str         r0, [r12, #OFFSETOF__Object__m_pEEType]
+        str         r1, [r12, #OFFSETOF__String__m_Length]
+
+        ;; Return the object allocated in r0.
+        mov         r0, r12
+
+        bx          lr
+
+StringSizeOverflow
+        ; We get here if the size of the final string object can't be represented as an unsigned 
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an OOM exception that the caller of this allocator understands.
+
+        ; r0 holds EEType pointer already
+        mov         r1, #0                  ; Indicate that we should throw OOM.
+        b           RhExceptionHandling_FailedAllocation
+
+        LEAF_END    RhNewString
+
+        INLINE_GETTHREAD_CONSTANT_POOL
+
+
+;; Allocate one dimensional, zero based array (SZARRAY).
+;;  r0 == EEType
+;;  r1 == element count
+        LEAF_ENTRY RhpNewArray
+
+        ; Compute overall allocation size (align(base size + (element size * elements), 4)).
+        ; if the element count is <= 0x10000, no overflow is possible because the component
+        ; size is <= 0xffff (it's an unsigned 16-bit value) and thus the product is <= 0xffff0000
+        ; and the base size for the worst case (32 dimensional MdArray) is less than 0xffff.
+        ldrh        r2, [r0, #OFFSETOF__EEType__m_usComponentSize]
+        cmp         r1, #0x10000
+        bhi         ArraySizeBig
+        umull       r2, r3, r2, r1
+        ldr         r3, [r0, #OFFSETOF__EEType__m_uBaseSize]
+        adds        r2, r3
+        adds        r2, #3
+ArrayAlignSize
+        bic         r2, r2, #3
+
+        ; r0 == EEType
+        ; r1 == element count
+        ; r2 == array size
+
+        INLINE_GETTHREAD        r3, r12
+
+        ;; Load potential new object address into r12.
+        ldr         r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Determine whether the end of the object would lie outside of the current allocation context. If so,
+        ;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        adds        r2, r12
+        bcs         RhpNewArrayRare ; if we get a carry here, the array is too large to fit below 4 GB
+        ldr         r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         r2, r12
+        bhi         RhpNewArrayRare
+
+        ;; Reload new object address into r12.
+        ldr         r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Update the alloc pointer to account for the allocation.
+        str         r2, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Set the new object's EEType pointer and element count.
+        str         r0, [r12, #OFFSETOF__Object__m_pEEType]
+        str         r1, [r12, #OFFSETOF__Array__m_Length]
+
+        ;; Return the object allocated in r0.
+        mov         r0, r12
+
+        bx          lr
+
+ArraySizeOverflow
+        ; We get here if the size of the final array object can't be represented as an unsigned 
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an overflow exception that the caller of this allocator understands.
+
+        ; r0 holds EEType pointer already
+        mov         r1, #1                  ; Indicate that we should throw OverflowException
+        b           RhExceptionHandling_FailedAllocation
+
+ArraySizeBig
+        ; if the element count is negative, it's an overflow error
+        cmp         r1, #0
+        blt         ArraySizeOverflow
+        ; now we know the element count is in the signed int range [0..0x7fffffff]
+        ; overflow in computing the total size of the array size gives an out of memory exception,
+        ; NOT an overflow exception
+        ; we already have the component size in r2
+        umull       r2, r3, r2, r1
+        cbnz        r3, ArrayOutOfMemoryFinal
+        ldr         r3, [r0, #OFFSETOF__EEType__m_uBaseSize]
+        adds        r2, r3
+        bcs         ArrayOutOfMemoryFinal
+        adds        r2, #3
+        bcs         ArrayOutOfMemoryFinal
+        b           ArrayAlignSize
+        
+ArrayOutOfMemoryFinal
+        ; r0 holds EEType pointer already
+        mov         r1, #0                  ; Indicate that we should throw OOM.
+        b           RhExceptionHandling_FailedAllocation
+
+        LEAF_END    RhpNewArray
+
+        INLINE_GETTHREAD_CONSTANT_POOL
+
+
+;; Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper.
+;;  r0 == EEType
+;;  r1 == element count
+;;  r2 == array size + Thread::m_alloc_context::alloc_ptr
+;;  r3 == Thread
+        NESTED_ENTRY RhpNewArrayRare
+
+        ; Recover array size by subtracting the alloc_ptr from r2.
+        PROLOG_NOP ldr r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        PROLOG_NOP sub r2, r12
+
+        PUSH_COOP_PINVOKE_FRAME r3
+
+        ; Preserve the EEType in r5 and element count in r6.
+        mov         r5, r0
+        mov         r6, r1
+
+        mov         r7, r2          ; Save array size in r7
+
+        mov         r1, #0          ; uFlags
+
+        ; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        blx         RhpGcAlloc
+
+        ; Test for failure (NULL return).
+        cbz         r0, ArrayOutOfMemory
+
+        ; Success, set the array's type and element count in the new object.
+        str         r5, [r0, #OFFSETOF__Object__m_pEEType]
+        str         r6, [r0, #OFFSETOF__Array__m_Length]
+
+        ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        movw        r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF)
+        movt        r2, #(RH_LARGE_OBJECT_SIZE >> 16)
+        cmp         r7, r2
+        blo         NewArray_SkipPublish
+                                        ;; r0: already contains object
+        mov         r1, r7              ;; r1: object size
+        bl          RhpPublishObject
+                                        ;; r0: function returned the passed-in object
+NewArray_SkipPublish
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+ArrayOutOfMemory
+        ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         r0, r5              ;; EEType pointer
+        mov         r1, #0              ;; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_BRANCH RhExceptionHandling_FailedAllocation
+
+        NESTED_END RhpNewArrayRare
+
+;; Allocate simple object (not finalizable, array or value type) on an 8 byte boundary.
+;;  r0 == EEType
+        LEAF_ENTRY RhpNewFastAlign8
+
+        ;; r1 = GetThread(), TRASHES r2
+        INLINE_GETTHREAD r1, r2
+
+        ;; Fetch object size into r2.
+        ldr         r2, [r0, #OFFSETOF__EEType__m_uBaseSize]
+
+        ;;
+        ;; r0: EEType pointer
+        ;; r1: Thread pointer
+        ;; r2: base size
+        ;;
+
+        ;; Load potential new object address into r3. Cache this result in r12 as well for the common case
+        ;; where the allocation succeeds (r3 will be overwritten in the following bounds check).
+        ldr         r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        mov         r12, r3
+
+        ;; Check whether the current allocation context is already aligned for us.
+        tst         r3, #0x7
+        bne         ContextMisaligned
+
+        ;; Determine whether the end of the object would lie outside of the current allocation context. If so,
+        ;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        add         r2, r3
+        ldr         r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         r2, r3
+        bhi         Alloc8Failed
+
+        ;; Update the alloc pointer to account for the allocation.
+        str         r2, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Set the new object's EEType pointer.
+        str         r0, [r12, #OFFSETOF__Object__m_pEEType]
+
+        ;; Return the object allocated in r0.
+        mov         r0, r12
+
+        bx          lr
+
+ContextMisaligned
+        ;; Allocation context is currently misaligned. We attempt to fix this by allocating a minimum sized
+        ;; free object (which is sized such that it "flips" the alignment to a good value).
+
+        ;; Determine whether the end of both objects would lie outside of the current allocation context. If
+        ;; so, we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        add         r2, r3
+        add         r2, #SIZEOF__MinObject
+        ldr         r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         r2, r3
+        bhi         Alloc8Failed
+
+        ;; Update the alloc pointer to account for the allocation.
+        str         r2, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Set the free object's EEType pointer (it's the only field we need to set, a component count of zero
+        ;; is what we want).
+        ldr         r2, =$G_FREE_OBJECT_EETYPE
+        ldr         r2, [r2]
+        str         r2, [r12, #OFFSETOF__Object__m_pEEType]
+
+        ;; Set the new object's EEType pointer.
+        str         r0, [r12, #(SIZEOF__MinObject + OFFSETOF__Object__m_pEEType)]
+
+        ;; Return the object allocated in r0.
+        add         r0, r12, #SIZEOF__MinObject
+
+        bx          lr
+
+Alloc8Failed
+        ;; Fast allocation failed. Call slow helper with flags set to indicate an 8-byte alignment and no
+        ;; finalization.
+        mov         r1, #GC_ALLOC_ALIGN8
+        b           RhpNewObject
+
+        LEAF_END RhpNewFastAlign8
+
+        INLINE_GETTHREAD_CONSTANT_POOL
+
+
+;; Allocate a finalizable object (by definition not an array or value type) on an 8 byte boundary.
+;;  r0 == EEType
+        LEAF_ENTRY RhpNewFinalizableAlign8
+
+        mov         r1, #(GC_ALLOC_FINALIZE | GC_ALLOC_ALIGN8)
+        b           RhpNewObject
+
+        LEAF_END RhpNewFinalizableAlign8
+
+;; Allocate a value type object (i.e. box it) on an 8 byte boundary + 4 (so that the value type payload 
+;; itself is 8 byte aligned).
+;;  r0 == EEType
+        LEAF_ENTRY RhpNewFastMisalign
+
+        ;; r1 = GetThread(), TRASHES r2
+        INLINE_GETTHREAD r1, r2
+
+        ;; Fetch object size into r2.
+        ldr         r2, [r0, #OFFSETOF__EEType__m_uBaseSize]
+
+        ;;
+        ;; r0: EEType pointer
+        ;; r1: Thread pointer
+        ;; r2: base size
+        ;;
+
+        ;; Load potential new object address into r3. Cache this result in r12 as well for the common case
+        ;; where the allocation succeeds (r3 will be overwritten in the following bounds check).
+        ldr         r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        mov         r12, r3
+
+        ;; Check whether the current allocation context is already aligned for us (for boxing that means the
+        ;; address % 8 == 4, so the value type payload following the EEType* is actually 8-byte aligned).
+        tst         r3, #0x7
+        beq         BoxContextMisaligned
+
+        ;; Determine whether the end of the object would lie outside of the current allocation context. If so,
+        ;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        add         r2, r3
+        ldr         r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         r2, r3
+        bhi         BoxAlloc8Failed
+
+        ;; Update the alloc pointer to account for the allocation.
+        str         r2, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Set the new object's EEType pointer.
+        str         r0, [r12, #OFFSETOF__Object__m_pEEType]
+
+        ;; Return the object allocated in r0.
+        mov         r0, r12
+
+        bx          lr
+
+BoxContextMisaligned
+        ;; Allocation context is currently misaligned. We attempt to fix this by allocating a minimum sized
+        ;; free object (which is sized such that it "flips" the alignment to a good value).
+
+        ;; Determine whether the end of both objects would lie outside of the current allocation context. If
+        ;; so, we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        add         r2, r3
+        add         r2, #SIZEOF__MinObject
+        ldr         r3, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         r2, r3
+        bhi         BoxAlloc8Failed
+
+        ;; Update the alloc pointer to account for the allocation.
+        str         r2, [r1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Set the free object's EEType pointer (it's the only field we need to set, a component count of zero
+        ;; is what we want).
+        ldr         r2, =$G_FREE_OBJECT_EETYPE
+        ldr         r2, [r2]
+        str         r2, [r12, #OFFSETOF__Object__m_pEEType]
+
+        ;; Set the new object's EEType pointer.
+        str         r0, [r12, #(SIZEOF__MinObject + OFFSETOF__Object__m_pEEType)]
+
+        ;; Return the object allocated in r0.
+        add         r0, r12, #SIZEOF__MinObject
+
+        bx          lr
+
+BoxAlloc8Failed
+        ;; Fast allocation failed. Call slow helper with flags set to indicate an 8+4 byte alignment and no
+        ;; finalization.
+        mov         r1, #(GC_ALLOC_ALIGN8 | GC_ALLOC_ALIGN8_BIAS)
+        b           RhpNewObject
+
+        LEAF_END RhpNewFastMisalign
+
+        INLINE_GETTHREAD_CONSTANT_POOL
+
+
+;; Allocate an array on an 8 byte boundary.
+;;  r0 == EEType
+;;  r1 == element count
+        NESTED_ENTRY RhpNewArrayAlign8
+
+        PUSH_COOP_PINVOKE_FRAME r3
+
+        ; Compute overall allocation size (base size + align((element size * elements), 4)).
+        ldrh        r2, [r0, #OFFSETOF__EEType__m_usComponentSize]
+        umull       r2, r4, r2, r1
+        cbnz        r4, Array8SizeOverflow
+        adds        r2, #3
+        bcs         Array8SizeOverflow
+        bic         r2, r2, #3
+        ldr         r4, [r0, #OFFSETOF__EEType__m_uBaseSize]
+        adds        r2, r4
+        bcs         Array8SizeOverflow
+
+        ; Preserve the EEType in r5 and element count in r6.
+        mov         r5, r0
+        mov         r6, r1
+        mov         r7, r2                  ; Save array size in r7
+
+        mov         r1, #GC_ALLOC_ALIGN8    ; uFlags
+
+        ; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        blx         RhpGcAlloc
+
+        ; Test for failure (NULL return).
+        cbz         r0, Array8OutOfMemory
+
+        ; Success, set the array's type and element count in the new object.
+        str         r5, [r0, #OFFSETOF__Object__m_pEEType]
+        str         r6, [r0, #OFFSETOF__Array__m_Length]
+
+        ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        movw        r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF)
+        movt        r2, #(RH_LARGE_OBJECT_SIZE >> 16)
+        cmp         r7, r2
+        blo         NewArray8_SkipPublish
+                                        ;; r0: already contains object
+        mov         r1, r7              ;; r1: object size
+        bl          RhpPublishObject
+                                        ;; r0: function returned the passed-in object
+NewArray8_SkipPublish
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+Array8SizeOverflow
+        ; We get here if the size of the final array object can't be represented as an unsigned 
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an OOM or overflow exception that the caller of this allocator understands.
+
+        ; if the element count is non-negative, it's an OOM error
+        cmp         r1, #0
+        bge         Array8OutOfMemory1
+
+        ; r0 holds EEType pointer already
+        mov         r1, #1              ;; Indicate that we should throw OverflowException
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_BRANCH RhExceptionHandling_FailedAllocation
+
+Array8OutOfMemory
+        ; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ; an out of memory exception that the caller of this allocator understands.
+
+        mov         r0, r5              ;; EEType pointer
+Array8OutOfMemory1
+        mov         r1, #0              ;; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_BRANCH RhExceptionHandling_FailedAllocation
+
+        NESTED_END RhpNewArrayAlign8
+
+        END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/AsmMacros.h b/src/coreclr/src/nativeaot/Runtime/arm/AsmMacros.h
new file mode 100644
index 0000000000000..8c61fb571194c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/AsmMacros.h
@@ -0,0 +1,288 @@
+;; Licensed to the.NET Foundation under one or more agreements.
+;; The.NET Foundation licenses this file to you under the MIT license.
+
+;; OS provided macros
+#include <ksarm.h>
+;; generated by the build from AsmOffsets.cpp
+#include "AsmOffsets.inc"
+
+;;
+;; CONSTANTS -- INTEGER
+;;
+TSF_Attached                    equ 0x01
+TSF_SuppressGcStress            equ 0x08
+TSF_DoNotTriggerGc              equ 0x10
+TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC equ 0x18
+
+;; GC type flags
+GC_ALLOC_FINALIZE               equ 1
+GC_ALLOC_ALIGN8_BIAS            equ 4
+GC_ALLOC_ALIGN8                 equ 8
+
+;; GC minimal sized object. We use this to switch between 4 and 8 byte alignment in the GC heap (see AllocFast.asm).
+SIZEOF__MinObject               equ 12
+    ASSERT (SIZEOF__MinObject :MOD: 8) == 4
+
+;; Note: these must match the defs in PInvokeTransitionFrameFlags
+PTFF_SAVE_R4            equ 0x00000001
+PTFF_SAVE_R5            equ 0x00000002
+PTFF_SAVE_R6            equ 0x00000004
+PTFF_SAVE_R7            equ 0x00000008
+PTFF_SAVE_R8            equ 0x00000010
+PTFF_SAVE_R9            equ 0x00000020
+PTFF_SAVE_R10           equ 0x00000040
+PTFF_SAVE_ALL_PRESERVED equ 0x00000077  ;; NOTE: FP is not included in this set!
+PTFF_SAVE_SP            equ 0x00000100
+PTFF_SAVE_R0            equ 0x00000200  ;; R0 is saved if it contains a GC ref and we're in hijack handler
+PTFF_SAVE_ALL_SCRATCH   equ 0x00003e00  ;; R0-R3,LR (R12 is trashed by the helpers anyway, but LR is relevant for loop hijacking
+PTFF_R0_IS_GCREF        equ 0x00004000  ;; iff PTFF_SAVE_R0: set -> r0 is Object, clear -> r0 is scalar
+PTFF_R0_IS_BYREF        equ 0x00008000  ;; iff PTFF_SAVE_R0: set -> r0 is ByRef, clear -> r0 is Object or scalar
+PTFF_THREAD_ABORT       equ 0x00010000  ;; indicates that ThreadAbortException should be thrown when returning from the transition
+
+;; These must match the TrapThreadsFlags enum
+TrapThreadsFlags_None            equ 0
+TrapThreadsFlags_AbortInProgress equ 1
+TrapThreadsFlags_TrapThreads     equ 2
+
+;; This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT
+STATUS_REDHAWK_THREAD_ABORT      equ 0x43
+
+;;
+;; Rename fields of nested structs
+;;
+OFFSETOF__Thread__m_alloc_context__alloc_ptr        equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr
+OFFSETOF__Thread__m_alloc_context__alloc_limit      equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit
+
+
+__tls_array     equ 0x2C    ;; offsetof(TEB, ThreadLocalStoragePointer)
+
+;;
+;; MACROS
+;;
+
+        GBLS __SECTIONREL_tls_CurrentThread
+__SECTIONREL_tls_CurrentThread SETS "SECTIONREL_tls_CurrentThread"
+
+    MACRO
+        INLINE_GETTHREAD $destReg, $trashReg
+        EXTERN _tls_index
+
+        ldr         $destReg, =_tls_index
+        ldr         $destReg, [$destReg]
+        mrc         p15, 0, $trashReg, c13, c0, 2
+        ldr         $trashReg, [$trashReg, #__tls_array]
+        ldr         $destReg, [$trashReg, $destReg, lsl #2]
+        ldr         $trashReg, $__SECTIONREL_tls_CurrentThread
+        add         $destReg, $trashReg
+    MEND
+
+        ;; INLINE_GETTHREAD_CONSTANT_POOL macro has to be used after the last function in the .asm file that used
+        ;; INLINE_GETTHREAD. Optionally, it can be also used after any function that used INLINE_GETTHREAD
+        ;; to improve density, or to reduce distance betweeen the constant pool and its use.
+    MACRO
+        INLINE_GETTHREAD_CONSTANT_POOL
+        EXTERN tls_CurrentThread
+
+$__SECTIONREL_tls_CurrentThread
+        DCD tls_CurrentThread
+        RELOC 15 ;; SECREL
+
+__SECTIONREL_tls_CurrentThread SETS "$__SECTIONREL_tls_CurrentThread":CC:"_"
+
+    MEND
+
+    MACRO
+        INLINE_THREAD_UNHIJACK $threadReg, $trashReg1, $trashReg2
+        ;;
+        ;; Thread::Unhijack()
+        ;;
+        ldr         $trashReg1, [$threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        cbz         $trashReg1, %ft0
+
+        ldr         $trashReg2, [$threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        str         $trashReg1, [$trashReg2]
+        mov         $trashReg1, #0
+        str         $trashReg1, [$threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        str         $trashReg1, [$threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+0
+    MEND
+
+DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP
+
+;;
+;; Macro used from unmanaged helpers called from managed code where the helper does not transition immediately
+;; into pre-emptive mode but may cause a GC and thus requires the stack is crawlable. This is typically the
+;; case for helpers that meddle in GC state (e.g. allocation helpers) where the code must remain in
+;; cooperative mode since it handles object references and internal GC state directly but a garbage collection
+;; may be inevitable. In these cases we need to be able to transition to pre-meptive mode deep within the
+;; unmanaged code but still be able to initialize the stack iterator at the first stack frame which may hold
+;; interesting GC references. In all our helper cases this corresponds to the most recent managed frame (e.g.
+;; the helper's caller).
+;;
+;; This macro builds a frame describing the current state of managed code.
+;;
+;; INVARIANTS
+;; - The macro assumes it defines the method prolog, it should typically be the first code in a method and
+;;   certainly appear before any attempt to alter the stack pointer.
+;; - This macro uses trashReg (after its initial value has been saved in the frame) and upon exit trashReg
+;;   will contain the address of transition frame.
+;;
+    MACRO
+        PUSH_COOP_PINVOKE_FRAME $trashReg
+
+        PROLOG_STACK_ALLOC 4        ; Save space for caller's SP
+        PROLOG_PUSH {r4-r6,r8-r10}  ; Save preserved registers
+        PROLOG_STACK_ALLOC 8        ; Save space for flags and Thread*
+        PROLOG_PUSH {r7}            ; Save caller's FP
+        PROLOG_PUSH {r11,lr}        ; Save caller's frame-chain pointer and PC
+
+        ; Compute SP value at entry to this method and save it in the last slot of the frame (slot #11).
+        add         $trashReg, sp, #(12 * 4)
+        str         $trashReg, [sp, #(11 * 4)]
+
+        ; Record the bitmask of saved registers in the frame (slot #4).
+        mov         $trashReg, #DEFAULT_FRAME_SAVE_FLAGS
+        str         $trashReg, [sp, #(4 * 4)]
+
+        mov         $trashReg, sp
+    MEND
+
+;; Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME
+    MACRO
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_POP  {r11,lr}        ; Restore caller's frame-chain pointer and PC (return address)
+        EPILOG_POP  {r7}            ; Restore caller's FP
+        EPILOG_STACK_FREE 8         ; Discard flags and Thread*
+        EPILOG_POP  {r4-r6,r8-r10}  ; Restore preserved registers
+        EPILOG_STACK_FREE 4         ; Discard caller's SP
+    MEND
+
+
+; Macro used to assign an alternate name to a symbol containing characters normally disallowed in a symbol
+; name (e.g. C++ decorated names).
+    MACRO
+      SETALIAS   $name, $symbol
+        GBLS    $name
+$name   SETS    "|$symbol|"
+    MEND
+
+
+        ;
+        ; Helper macro: create a global label for the given name,
+        ; decorate it, and export it for external consumption.
+        ;
+
+        MACRO
+        __ExportLabelName $FuncName
+
+        LCLS    Name
+Name    SETS    "|$FuncName|"
+        EXPORT  $Name
+$Name
+        MEND
+
+        ;
+        ; Macro for indicating an alternate entry point into a function.
+        ;
+
+        MACRO
+        LABELED_RETURN_ADDRESS $ReturnAddressName
+
+        ; export the return address name, but do not perturb the code by forcing alignment
+        __ExportLabelName $ReturnAddressName
+
+        ; flush any pending literal pool stuff
+        ROUT
+
+        MEND
+
+        MACRO
+        EXPORT_POINTER_TO_ADDRESS $Name
+
+1
+
+        AREA        |.rdata|, ALIGN=4, DATA, READONLY
+
+$Name
+
+        DCD         %BT1
+
+        EXPORT      $Name
+
+        TEXTAREA
+
+        ROUT
+
+        MEND
+
+;-----------------------------------------------------------------------------
+; Macro used to check (in debug builds only) whether the stack is 64-bit aligned (a requirement before calling
+; out into C++/OS code). Invoke this directly after your prolog (if the stack frame size is fixed) or directly
+; before a call (if you have a frame pointer and a dynamic stack). A breakpoint will be invoked if the stack
+; is misaligned.
+;
+    MACRO
+        CHECK_STACK_ALIGNMENT
+
+#ifdef _DEBUG
+        push    {r0}
+        add     r0, sp, #4
+        tst     r0, #7
+        pop     {r0}
+        beq     %F0
+        EMIT_BREAKPOINT
+0
+#endif
+    MEND
+
+;; Loads a 32bit constant into destination register
+    MACRO
+        MOV32   $destReg, $constant
+
+        movw    $destReg, #(($constant) & 0xFFFF)
+        movt    $destReg, #(($constant) >> 16)
+    MEND
+
+;;
+;; CONSTANTS -- SYMBOLS
+;;
+
+        SETALIAS G_LOWEST_ADDRESS, g_lowest_address
+        SETALIAS G_HIGHEST_ADDRESS, g_highest_address
+        SETALIAS G_EPHEMERAL_LOW, g_ephemeral_low
+        SETALIAS G_EPHEMERAL_HIGH, g_ephemeral_high
+        SETALIAS G_CARD_TABLE, g_card_table
+        SETALIAS G_FREE_OBJECT_EETYPE, ?g_pFreeObjectEEType@@3PAVEEType@@A
+#ifdef FEATURE_GC_STRESS
+        SETALIAS THREAD__HIJACKFORGCSTRESS, ?HijackForGcStress@Thread@@SAXPAUPAL_LIMITED_CONTEXT@@@Z
+        SETALIAS REDHAWKGCINTERFACE__STRESSGC, ?StressGc@RedhawkGCInterface@@SAXXZ
+#endif ;; FEATURE_GC_STRESS
+;;
+;; IMPORTS
+;;
+        EXTERN RhpGcAlloc
+        EXTERN RhDebugBreak
+        EXTERN RhpWaitForSuspend2
+        EXTERN RhpWaitForGC2
+        EXTERN RhpReversePInvokeAttachOrTrapThread2
+        EXTERN RhExceptionHandling_FailedAllocation
+        EXTERN RhpPublishObject
+        EXTERN RhpCalculateStackTraceWorker
+
+
+        EXTERN $G_LOWEST_ADDRESS
+        EXTERN $G_HIGHEST_ADDRESS
+        EXTERN $G_EPHEMERAL_LOW
+        EXTERN $G_EPHEMERAL_HIGH
+        EXTERN $G_CARD_TABLE
+        EXTERN RhpTrapThreads
+        EXTERN $G_FREE_OBJECT_EETYPE
+
+        EXTERN RhThrowHwEx
+        EXTERN RhThrowEx
+        EXTERN RhRethrow
+
+#ifdef FEATURE_GC_STRESS
+        EXTERN $REDHAWKGCINTERFACE__STRESSGC
+        EXTERN $THREAD__HIJACKFORGCSTRESS
+#endif ;; FEATURE_GC_STRESS
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/AsmOffsetsCpu.h b/src/coreclr/src/nativeaot/Runtime/arm/AsmOffsetsCpu.h
new file mode 100644
index 0000000000000..a8b3b9465a9f7
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/AsmOffsetsCpu.h
@@ -0,0 +1,58 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This file is used by AsmOffsets.h to validate that our
+// assembly-code offsets always match their C++ counterparts.
+//
+// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix
+
+PLAT_ASM_SIZEOF(138, ExInfo)
+PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo)
+PLAT_ASM_OFFSET(4, ExInfo, m_pExContext)
+PLAT_ASM_OFFSET(8, ExInfo, m_exception)
+PLAT_ASM_OFFSET(0c, ExInfo, m_kind)
+PLAT_ASM_OFFSET(0d, ExInfo, m_passNumber)
+PLAT_ASM_OFFSET(10, ExInfo, m_idxCurClause)
+PLAT_ASM_OFFSET(18, ExInfo, m_frameIter)
+PLAT_ASM_OFFSET(130, ExInfo, m_notifyDebuggerSP)
+
+PLAT_ASM_OFFSET(4, PInvokeTransitionFrame, m_RIP)
+PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer)
+PLAT_ASM_OFFSET(0c, PInvokeTransitionFrame, m_pThread)
+PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_Flags)
+PLAT_ASM_OFFSET(14, PInvokeTransitionFrame, m_PreservedRegs)
+
+PLAT_ASM_SIZEOF(118, StackFrameIterator)
+PLAT_ASM_OFFSET(08, StackFrameIterator, m_FramePointer)
+PLAT_ASM_OFFSET(0c, StackFrameIterator, m_ControlPC)
+PLAT_ASM_OFFSET(10, StackFrameIterator, m_RegDisplay)
+PLAT_ASM_OFFSET(114, StackFrameIterator, m_OriginalControlPC)
+
+PLAT_ASM_SIZEOF(70, PAL_LIMITED_CONTEXT)
+PLAT_ASM_OFFSET(24, PAL_LIMITED_CONTEXT, IP)
+
+PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, R0)
+PLAT_ASM_OFFSET(4, PAL_LIMITED_CONTEXT, R4)
+PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, R5)
+PLAT_ASM_OFFSET(0c, PAL_LIMITED_CONTEXT, R6)
+PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, R7)
+PLAT_ASM_OFFSET(14, PAL_LIMITED_CONTEXT, R8)
+PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, R9)
+PLAT_ASM_OFFSET(1c, PAL_LIMITED_CONTEXT, R10)
+PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, R11)
+PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, SP)
+PLAT_ASM_OFFSET(2c, PAL_LIMITED_CONTEXT, LR)
+
+PLAT_ASM_SIZEOF(88, REGDISPLAY)
+PLAT_ASM_OFFSET(38, REGDISPLAY, SP)
+
+PLAT_ASM_OFFSET(10, REGDISPLAY, pR4)
+PLAT_ASM_OFFSET(14, REGDISPLAY, pR5)
+PLAT_ASM_OFFSET(18, REGDISPLAY, pR6)
+PLAT_ASM_OFFSET(1c, REGDISPLAY, pR7)
+PLAT_ASM_OFFSET(20, REGDISPLAY, pR8)
+PLAT_ASM_OFFSET(24, REGDISPLAY, pR9)
+PLAT_ASM_OFFSET(28, REGDISPLAY, pR10)
+PLAT_ASM_OFFSET(2c, REGDISPLAY, pR11)
+PLAT_ASM_OFFSET(48, REGDISPLAY, D)
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/CallDescrWorker.S b/src/coreclr/src/nativeaot/Runtime/arm/CallDescrWorker.S
new file mode 100644
index 0000000000000..53184d9b28b2a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/CallDescrWorker.S
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.syntax unified
+.thumb
+
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+// TODO: Implement Arm support
+
+NESTED_ENTRY RhCallDescrWorker, _TEXT, NoHandler
+LOCAL_LABEL(ReturnFromCallDescrThunk):
+
+        EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk
+
+        // UNIXTODO: Implement this function
+        EMIT_BREAKPOINT
+NESTED_END RhCallDescrWorker, _TEXT
+
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/CallDescrWorker.asm b/src/coreclr/src/nativeaot/Runtime/arm/CallDescrWorker.asm
new file mode 100644
index 0000000000000..55a1be033bbc4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/CallDescrWorker.asm
@@ -0,0 +1,128 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+;;-----------------------------------------------------------------------------
+;; This helper routine enregisters the appropriate arguments and makes the
+;; actual call.
+;;-----------------------------------------------------------------------------
+;;void RhCallDescrWorker(CallDescrData * pCallDescrData);
+        NESTED_ENTRY RhCallDescrWorker
+        PROLOG_PUSH         {r4,r5,r7,lr}
+        PROLOG_STACK_SAVE   r7
+
+        mov     r5,r0 ; save pCallDescrData in r5
+
+        ldr     r1, [r5,#OFFSETOF__CallDescrData__numStackSlots]
+        cbz     r1, Ldonestack
+
+        ;; Add frame padding to ensure frame size is a multiple of 8 (a requirement of the OS ABI).
+        ;; We push four registers (above) and numStackSlots arguments (below). If this comes to an odd number
+        ;; of slots we must pad with another. This simplifies to "if the low bit of numStackSlots is set,
+        ;; extend the stack another four bytes".
+        lsls    r2, r1, #2
+        and     r3, r2, #4
+        sub     sp, sp, r3
+
+        ;; This loop copies numStackSlots words
+        ;; from [pSrcEnd-4,pSrcEnd-8,...] to [sp-4,sp-8,...]
+        ldr     r0, [r5,#OFFSETOF__CallDescrData__pSrc]
+        add     r0,r0,r2
+Lstackloop
+        ldr     r2, [r0,#-4]!
+        str     r2, [sp,#-4]!
+        subs    r1, r1, #1
+        bne     Lstackloop
+Ldonestack
+
+        ;; If FP arguments are supplied in registers (r3 != NULL) then initialize all of them from the pointer
+        ;; given in r3. Do not use "it" since it faults in floating point even when the instruction is not executed.
+        ldr     r3, [r5,#OFFSETOF__CallDescrData__pFloatArgumentRegisters]
+        cbz     r3, LNoFloatingPoint
+        vldm    r3, {s0-s15}
+LNoFloatingPoint
+
+        ;; Copy [pArgumentRegisters, ..., pArgumentRegisters + 12]
+        ;; into r0, ..., r3
+
+        ldr     r4, [r5,#OFFSETOF__CallDescrData__pArgumentRegisters]
+        ldm     r4, {r0-r3}
+
+        CHECK_STACK_ALIGNMENT
+
+        ;; call pTarget
+        ;; Note that remoting expect target in r4.
+        ldr     r4, [r5,#OFFSETOF__CallDescrData__pTarget]
+        blx     r4
+
+        EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk
+
+        ;; Symbol used to identify thunk call to managed function so the special
+        ;; case unwinder can unwind through this function. Sadly we cannot directly
+        ;; export this symbol right now because it confuses DIA unwinder to believe
+        ;; it's the beginning of a new method, therefore we export the address
+        ;; of an auxiliary variable holding the address instead.
+        
+        ldr     r3, [r5,#OFFSETOF__CallDescrData__fpReturnSize]
+
+        ;; Save FP return value if appropriate
+        cbz     r3, LFloatingPointReturnDone
+
+        ;; Float return case
+        ;; Do not use "it" since it faults in floating point even when the instruction is not executed.
+        cmp     r3, #4
+        bne     LNoFloatReturn
+        vmov    r0, s0
+        b       LFloatingPointReturnDone
+LNoFloatReturn
+
+        ;; Double return case
+        ;; Do not use "it" since it faults in floating point even when the instruction is not executed.
+        cmp     r3, #8
+        bne     LNoDoubleReturn
+        vmov    r0, r1, s0, s1
+        b       LFloatingPointReturnDone
+LNoDoubleReturn
+; Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself
+        ldr     r2, [r5, #OFFSETOF__CallDescrData__pReturnBuffer]
+
+        cmp     r3, #16
+        bne     LNoFloatHFAReturn
+        vstm    r2, {s0-s3}
+        b       LReturnDone
+LNoFloatHFAReturn
+
+        cmp     r3, #32
+        bne     LNoDoubleHFAReturn
+        vstm    r2, {d0-d3}
+        b       LReturnDone
+LNoDoubleHFAReturn
+
+        EMIT_BREAKPOINT ; Unreachable
+
+LFloatingPointReturnDone
+
+; Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself
+        ldr     r5, [r5, #OFFSETOF__CallDescrData__pReturnBuffer]
+
+        ;; Save return value into retbuf
+        str     r0, [r5, #(0)]
+        str     r1, [r5, #(4)]
+
+LReturnDone
+
+#ifdef _DEBUG
+        ;; trash the floating point registers to ensure that the HFA return values 
+        ;; won't survive by accident
+        vldm    sp, {d0-d3}
+#endif
+
+        EPILOG_STACK_RESTORE    r7
+        EPILOG_POP              {r4,r5,r7,pc}
+
+        NESTED_END RhCallDescrWorker
+
+        END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/CallingConventionConverterHelpers.S b/src/coreclr/src/nativeaot/Runtime/arm/CallingConventionConverterHelpers.S
new file mode 100644
index 0000000000000..3e216602b0c6b
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/CallingConventionConverterHelpers.S
@@ -0,0 +1,65 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement Arm support
+.syntax unified
+.thumb
+
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+//
+// void CallingConventionConverter_ReturnVoidReturnThunk()
+//
+LEAF_ENTRY CallingConventionConverter_ReturnVoidReturnThunk, _TEXT
+        bx          lr
+LEAF_END CallingConventionConverter_ReturnVoidReturnThunk, _TEXT
+
+//
+// int CallingConventionConverter_ReturnIntegerReturnThunk(int)
+//
+LEAF_ENTRY CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT
+        // UNIXTODO: Implement this function
+        EMIT_BREAKPOINT
+LEAF_END CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT
+
+//
+// __jmpstub__CallingConventionConverter_CommonCallingStub
+//
+// struct CallingConventionConverter_CommonCallingStub_PointerData
+// {
+//     void *ManagedCallConverterThunk;
+//     void *UniversalThunk;
+// }
+//
+// struct CommonCallingStubInputData
+// {
+//     ULONG_PTR CallingConventionId;
+//     CallingConventionConverter_CommonCallingStub_PointerData *commonData; // Only the ManagedCallConverterThunk field is used
+//                                                                           // However, it is specified just like other platforms, so the behavior of the common
+//                                                                           // calling stub is easier to debug
+// }
+//
+// sp-4 - Points at CommonCallingStubInputData
+//
+//
+LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub
+        // UNIXTODO: Implement this function
+        EMIT_BREAKPOINT
+LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub
+
+//
+// void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonCallingStub)
+//
+LEAF_ENTRY CallingConventionConverter_GetStubs, _TEXT
+        // UNIXTODO: Implement this function
+        EMIT_BREAKPOINT
+LEAF_END CallingConventionConverter_GetStubs, _TEXT
+
+//
+// void CallingConventionConverter_SpecifyCommonStubData(CallingConventionConverter_CommonCallingStub_PointerData *commonData);
+//
+LEAF_ENTRY CallingConventionConverter_SpecifyCommonStubData
+        // UNIXTODO: Implement this function
+        EMIT_BREAKPOINT
+LEAF_END CallingConventionConverter_SpecifyCommonStubData
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/CallingConventionConverterHelpers.asm b/src/coreclr/src/nativeaot/Runtime/arm/CallingConventionConverterHelpers.asm
new file mode 100644
index 0000000000000..0383182caf567
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/CallingConventionConverterHelpers.asm
@@ -0,0 +1,88 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "kxarm.h"
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  DATA SECTIONS  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+    DATAAREA
+UniversalThunkPointer % 4
+    TEXTAREA
+
+OFFSETOF_CallingConventionId EQU 0
+OFFSETOF_commonData EQU 4
+OFFSETOF_ManagedCallConverterThunk EQU 0
+OFFSETOF_UniversalThunk EQU 4
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; CallingConventionCoverter Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;
+;; Note: The "__jmpstub__" prefix is used to indicate to debugger
+;; that it must step-through this stub when it encounters it while
+;; stepping.
+;;
+
+    ;;
+    ;; void CallingConventionConverter_ReturnThunk()
+    ;;
+    LEAF_ENTRY CallingConventionConverter_ReturnThunk
+        bx          lr
+    LEAF_END CallingConventionConverter_ReturnThunk
+
+    ;;
+    ;; __jmpstub__CallingConventionConverter_CommonCallingStub
+    ;;
+    ;; struct CallingConventionConverter_CommonCallingStub_PointerData
+    ;; {
+    ;;     void *ManagedCallConverterThunk;
+    ;;     void *UniversalThunk;
+    ;; }
+    ;;
+    ;; struct CommonCallingStubInputData
+    ;; {
+    ;;     ULONG_PTR CallingConventionId;
+    ;;     CallingConventionConverter_CommonCallingStub_PointerData *commonData; // Only the ManagedCallConverterThunk field is used
+    ;;                                                                           // However, it is specified just like other platforms, so the behavior of the common
+    ;;                                                                           // calling stub is easier to debug
+    ;; }
+    ;;
+    ;; sp-4 - Points at CommonCallingStubInputData
+    ;;  
+    ;;
+    LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub
+        ldr     r12, [sp, #-4]
+        ldr     r12, [r12, #OFFSETOF_CallingConventionId] ; Get CallingConventionId into r12
+        str     r12, [sp, #-8] ; Put calling convention id into red zone
+        ldr     r12, [sp, #-4]
+        ldr     r12, [r12, #OFFSETOF_commonData] ; Get pointer to common data
+        ldr     r12, [r12, #OFFSETOF_ManagedCallConverterThunk] ; Get pointer to managed call converter thunk
+        str     r12, [sp, #-4] ; Put managed calling convention thunk pointer into red zone (overwrites pointer to CommonCallingStubInputData)
+        ldr     r12, =UniversalThunkPointer
+        ldr     r12, [r12]
+        bx      r12
+    LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub
+
+    ;;
+    ;; void CallingConventionConverter_SpecifyCommonStubData(CallingConventionConverter_CommonCallingStub_PointerData *commonData);
+    ;;
+    LEAF_ENTRY CallingConventionConverter_SpecifyCommonStubData
+        ldr     r1, [r0, #OFFSETOF_ManagedCallConverterThunk]     ; Load ManagedCallConverterThunk into r1 {r1 = (CallingConventionConverter_CommonCallingStub_PointerData*)r0->ManagedCallConverterThunk }
+        ldr     r2, [r0, #OFFSETOF_UniversalThunk]                ; Load UniversalThunk into r2 {r2 = (CallingConventionConverter_CommonCallingStub_PointerData*)r0->UniversalThunk }
+        ldr     r12, =UniversalThunkPointer
+        str     r2, [r12]
+        bx      lr
+    LEAF_END CallingConventionConverter_SpecifyCommonStubData
+
+    ;;
+    ;; void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonCallingStub)
+    ;;
+    LEAF_ENTRY CallingConventionConverter_GetStubs
+        ldr     r12, =CallingConventionConverter_ReturnThunk
+        str     r12, [r0] ;; ARM doesn't need different return thunks.
+        str     r12, [r1]
+        ldr     r12, =__jmpstub__CallingConventionConverter_CommonCallingStub
+        str     r12, [r2]
+        bx      lr
+    LEAF_END CallingConventionConverter_GetStubs
+
+    END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/Dummies.asm b/src/coreclr/src/nativeaot/Runtime/arm/Dummies.asm
new file mode 100644
index 0000000000000..ea6c21fc810d0
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/Dummies.asm
@@ -0,0 +1,18 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+        LEAF_ENTRY RhpLMod
+        DCW     0xdefe
+        bx      lr
+        LEAF_END RhpLMod
+
+        LEAF_ENTRY RhpLMul
+        DCW     0xdefe
+        bx      lr
+        LEAF_END RhpLMul
+
+        END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/ExceptionHandling.S b/src/coreclr/src/nativeaot/Runtime/arm/ExceptionHandling.S
new file mode 100644
index 0000000000000..82a1d89d8df17
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/ExceptionHandling.S
@@ -0,0 +1,500 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.syntax unified
+.thumb
+
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 7)&(~7))
+
+#define rsp_offsetof_ExInfo  0
+#define rsp_offsetof_Context STACKSIZEOF_ExInfo
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// RhpThrowHwEx
+//
+// INPUT:  R0:  exception code of fault
+//         R1:  faulting RIP
+//
+// OUTPUT:
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler
+
+        mov         r2, r0         // save exception code into r2
+        mov         r0, sp         // get SP of fault site
+
+        mov         lr, r1         // set IP of fault site
+
+        // Setup a PAL_LIMITED_CONTEXT on the stack {
+        PROLOG_VPUSH {d8-d15}
+        PROLOG_PUSH "{r0,lr}"        // push {sp, pc} of fault site
+        PROLOG_PUSH "{r0,r4-r11,lr}"
+        // } end PAL_LIMITED_CONTEXT
+
+        PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo
+
+        // r0: SP of fault site
+        // r1: IP of fault site
+        // r2: exception code of fault
+        // lr: IP of fault site (as a 'return address')
+        mov         r4, r2 // save exception code of fault
+
+        // r0 = GetThread()
+        INLINE_GETTHREAD
+
+        // r1 <- ExInfo*
+        add         r1, sp, #rsp_offsetof_ExInfo
+        mov         r3, #0
+        str         r3, [r1, #OFFSETOF__ExInfo__m_exception]        // pExInfo->m_exception = null
+        mov         r3, #1
+        strb        r3, [r1, #OFFSETOF__ExInfo__m_passNumber]       // pExInfo->m_passNumber = 1
+        mov         r3, #0xFFFFFFFF
+        str         r3, [r1, #OFFSETOF__ExInfo__m_idxCurClause]     // pExInfo->m_idxCurClause = MaxTryRegionIdx
+        mov         r3, #2
+        strb        r3, [r1, #OFFSETOF__ExInfo__m_kind]             // pExInfo->m_kind = ExKind.HardwareFault
+
+        // link the ExInfo into the thread's ExInfo chain
+        ldr         r3, [r0, #OFFSETOF__Thread__m_pExInfoStackHead]
+        str         r3, [r1, #OFFSETOF__ExInfo__m_pPrevExInfo]      // pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        str         r1, [r0, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo
+
+        mov         r0, r4 	        // restore the exception code
+        // r0 contains the exception code
+        // r1 contains the address of the ExInfo
+        bl          C_FUNC(RhThrowHwEx)
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2
+
+        // no return
+        EMIT_BREAKPOINT
+
+NESTED_END RhpThrowHwEx
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// RhpThrowEx
+//
+// INPUT:  R0:  exception object
+//
+// OUTPUT:
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+NESTED_ENTRY RhpThrowEx, _TEXT, NoHandler
+
+        // Setup a PAL_LIMITED_CONTEXT on the stack {
+        PROLOG_VPUSH {d8-d15}
+        PROLOG_PUSH "{r0,lr}"        	// Reserve space for SP and store LR
+        PROLOG_PUSH "{r0,r4-r11,lr}"
+        // } end PAL_LIMITED_CONTEXT
+
+        PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo
+
+        // Calculate  SP at callsite and save into the PAL_LIMITED_CONTEXT
+        add         r4, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT)
+        str         r4, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__SP)]
+
+        mov         r4, r0 // Save exception object
+        // r0 = GetThread()
+        INLINE_GETTHREAD
+
+        add         r2, sp, #(rsp_offsetof_Context + SIZEOF__PAL_LIMITED_CONTEXT + 0x8) 	// r2 <- addr of return address
+
+        // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So  the return
+        // address could have been hijacked when we were in that C# code and we must remove the hijack and
+        // reflect the correct return address in our exception context record.  The other throw helpers don't
+        // need this because they cannot be tail-called from C#.
+        // NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location
+        // where the tail-calling thread had saved LR, which may not match where we have saved LR.
+
+        ldr         r1, [r0, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        cbz         r1, LOCAL_LABEL(NotHiJacked)
+
+        ldr         r3, [r0, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+
+        // r4: exception object
+        // r1: hijacked return address
+        // r0: pThread
+        // r3: hijacked return address location
+
+        add         r12, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT)        // re-compute SP at callsite
+        cmp         r3, r12             // if (m_ppvHijackedReturnAddressLocation < SP at callsite)
+        blo         LOCAL_LABEL(TailCallWasHijacked)
+
+        // normal case where a valid return address location is hijacked
+        str         r1, [r3]
+        b           LOCAL_LABEL(ClearThreadState)
+
+LOCAL_LABEL(TailCallWasHijacked):
+
+        // Abnormal case where the return address location is now invalid because we ended up here via a tail
+        // call.  In this case, our hijacked return address should be the correct caller of this method.
+        //
+
+        // stick the previous return address in LR as well as in the right spots in our PAL_LIMITED_CONTEXT.
+        mov         lr, r1
+        str         lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__LR)]
+        str         lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP)]
+
+LOCAL_LABEL(ClearThreadState):
+
+        // clear the Thread's hijack state
+        mov         r3, #0
+        str         r3, [r0, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        str         r3, [r0, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+
+LOCAL_LABEL(NotHiJacked):
+
+        add         r1, sp, #rsp_offsetof_ExInfo                    // r1 <- ExInfo*
+        mov         r3, #0
+        str         r3, [r1, #OFFSETOF__ExInfo__m_exception] 	      // init the exception object to null
+        mov         r3, #1
+        strb        r3, [r1, #OFFSETOF__ExInfo__m_passNumber]       // init to the first pass
+        strb        r3, [r1, #OFFSETOF__ExInfo__m_kind]
+        mov         r3, #0xFFFFFFFF
+        str         r3, [r1, #OFFSETOF__ExInfo__m_idxCurClause]     // ExKind.Throw
+
+        // link the ExInfo into the thread's ExInfo chain
+        ldr         r3, [r0, #OFFSETOF__Thread__m_pExInfoStackHead]
+        str         r3, [r1, #OFFSETOF__ExInfo__m_pPrevExInfo]  	  // pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        str         r1, [r0, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo
+
+        // set the exception context field on the ExInfo
+        add         r3, sp, #rsp_offsetof_Context             // r3 <- PAL_LIMITED_CONTEXT*
+        str         r3, [r1, #OFFSETOF__ExInfo__m_pExContext] // init ExInfo.m_pExContext
+
+        mov         r0, r4 // Restore exception object
+        // r0 contains the exception object
+        // r1 contains the address of the new ExInfo
+        bl          C_FUNC(RhThrowEx)
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2
+
+        // no return
+        EMIT_BREAKPOINT
+
+NESTED_END RhpThrowEx, _TEXT
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// void FASTCALL RhpRethrow()
+//
+// SUMMARY:  Similar to RhpThrowEx, except that it passes along the currently active ExInfo
+//
+// INPUT:
+//
+// OUTPUT:
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+NESTED_ENTRY RhpRethrow, _TEXT, NoHandler
+
+        // Setup a PAL_LIMITED_CONTEXT on the stack {
+        PROLOG_VPUSH {d8-d15}
+        PROLOG_PUSH "{r0,lr}"         // Reserve space for SP and store LR
+        PROLOG_PUSH "{r0,r4-r11,lr}"
+        // } end PAL_LIMITED_CONTEXT
+
+        PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo
+
+        // Compute and save SP at callsite.
+        add         r1, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT)
+        str         r1, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__SP)]
+
+        // r0 = GetThread();
+        INLINE_GETTHREAD
+
+        // r1 <- ExInfo*
+        add         r1, sp, #rsp_offsetof_ExInfo
+
+        mov         r3, #0
+        str         r3, [r1, #OFFSETOF__ExInfo__m_exception]         // init the exception object to null
+        strb        r3, [r1, #OFFSETOF__ExInfo__m_kind]              // init to a deterministic value (ExKind.None)
+        mov         r3, #1
+        strb        r3, [r1, #OFFSETOF__ExInfo__m_passNumber]        // pExInfo->m_passNumber = 1
+        mov         r3, #0xFFFFFFFF
+        str         r3, [r1, #OFFSETOF__ExInfo__m_idxCurClause]
+
+        // link the ExInfo into the thread's ExInfo chain
+        ldr         r3, [r0, #OFFSETOF__Thread__m_pExInfoStackHead] 	// r3 <- currently active ExInfo
+        str         r3, [r1, #OFFSETOF__ExInfo__m_pPrevExInfo]        // pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        str         r1, [r0, #OFFSETOF__Thread__m_pExInfoStackHead]   // m_pExInfoStackHead = pExInfo
+
+        // set the exception context field on the ExInfo
+        add         r2, sp, #rsp_offsetof_Context                 	  // r2 <- PAL_LIMITED_CONTEXT*
+        str         r2, [r1, #OFFSETOF__ExInfo__m_pExContext]         // init ExInfo.m_pExContext
+
+        mov         r0, r3
+        // r0 contains the currently active ExInfo
+        // r1 contains the address of the new ExInfo
+        blx    C_FUNC(RhRethrow)
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2
+
+        // no return
+        EMIT_BREAKPOINT
+
+NESTED_END RhpRethrow, _TEXT
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj,
+//                                    void* pHandlerIP,
+//                                    REGDISPLAY* pRegDisplay,
+//                                    ExInfo* pExInfo)
+//
+// INPUT:  R0:  exception object
+//         R1:  handler funclet address
+//         R2:  REGDISPLAY*
+//         R3:  ExInfo*
+//
+// OUTPUT:
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler
+
+        PROLOG_PUSH "{r0,r2-r11,lr}" // r0, r2 & r3 are saved so we have the exception object,
+	                                   // REGDISPLAY and ExInfo later
+        PROLOG_VPUSH {d8-d15}
+
+#define rsp_offset_r2 (8 * 8) + 4
+#define rsp_offset_r3 (8 * 8) + 8
+
+        mov         r4, r0 // Save exception object
+        mov         r5, r1 // Save handler funclet address
+        mov         r6, r2 // Save REGDISPLAY*
+
+        // Clear the DoNotTriggerGc state before calling out to our managed catch funclet,
+        // trashes r0-r2.
+        // r0 = GetThread()
+        INLINE_GETTHREAD
+
+LOCAL_LABEL(ClearRetry_Catch):
+        ldrex       r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags]
+        bics        r1, #TSF_DoNotTriggerGc
+        strex       r2, r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags]
+        cbz         r2, LOCAL_LABEL(ClearSuccess_Catch)
+        b           LOCAL_LABEL(ClearRetry_Catch)
+LOCAL_LABEL(ClearSuccess_Catch):
+
+        mov         r0, r4 // Reload exception object
+        mov         r3, r5 // Reload handler funclet address
+        mov         r2, r6 // Reload REGDISPLAY pointer
+
+        //
+        // set preserved regs to the values expected by the funclet
+        //
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR4]
+        ldr         r4, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR5]
+        ldr         r5, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR6]
+        ldr         r6, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR7]
+        ldr         r7, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR8]
+        ldr         r8, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR9]
+        ldr         r9, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR10]
+        ldr         r10, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR11]
+        ldr         r11, [r12]
+
+        //
+        // load vfp preserved regs
+        //
+        add         r12, r2, #OFFSETOF__REGDISPLAY__D
+        vldm        r12!, {d8-d15}
+
+        // r0 <- exception object
+        blx         r3                                 // call handler funclet
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2
+
+        mov         r4, r0    // Save the result
+
+        INLINE_GETTHREAD                                 // r0 <- Thread*
+        // We must unhijack the thread at this point because the section of stack where the
+        // hijack is applied may go dead. If it does, then the next time we try to unhijack
+        // the thread, it will corrupt the stack.
+        INLINE_THREAD_UNHIJACK r0, r3, r12               // Thread in r0, trashes r3 and r1
+        ldr         r2, [sp, #rsp_offset_r2]             // r2 <- REGDISPLAY*
+        ldr         r3, [sp, #rsp_offset_r3]             // r3 <- current ExInfo*
+        ldr         r2, [r2, #OFFSETOF__REGDISPLAY__SP]  // r2 <- resume SP value
+
+LOCAL_LABEL(PopExInfoLoop):
+        ldr         r3, [r3, #OFFSETOF__ExInfo__m_pPrevExInfo]  // r3 <- next ExInfo
+        cbz         r3, LOCAL_LABEL(DonePopping)                // if (pExInfo == null) { we're done }
+        cmp         r3, r2
+        blt         LOCAL_LABEL(PopExInfoLoop)                  // if (pExInfo < resume SP} { keep going }
+LOCAL_LABEL(DonePopping):
+
+        str         r3, [r0, #OFFSETOF__Thread__m_pExInfoStackHead]   // store the new head on the Thread
+
+        // reset RSP and jump to the continuation address
+        mov         sp, r2
+        bx          r4
+
+NESTED_END RhpCallCatchFunclet, _TEXT
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay)
+//
+// INPUT:  R0:  handler funclet address
+//         R1:  REGDISPLAY*
+//
+// OUTPUT:
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler
+
+        PROLOG_PUSH "{r1,r4-r11,lr}"  // r1 is saved so we have the REGDISPLAY later
+        PROLOG_VPUSH {d8-d15}
+#define rsp_offset_r1 8 * 8
+
+        //
+        // We want to suppress hijacking between invocations of subsequent finallys. We do
+        // this because we cannot tolerate a GC after one finally has run (and possibly
+        // side-effected the GC state of the method) and then been popped off the stack,
+        // leaving behind no trace of its effect.
+        //
+        // So we clear the state before and set it after invocation of the handler.
+        //
+
+        mov         r4, r0 // Save handler funclet address
+        mov         r5, r1 // Save REGDISPLAY*
+        //
+        // clear the DoNotTriggerGc flag, trashes r0-r2
+        //
+        INLINE_GETTHREAD  // r0 <- Thread*
+
+LOCAL_LABEL(ClearRetry):
+        ldrex       r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags]
+        bics        r1, #TSF_DoNotTriggerGc
+        strex       r2, r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags]
+        cbz         r2, LOCAL_LABEL(ClearSuccess)
+        b           LOCAL_LABEL(ClearRetry)
+LOCAL_LABEL(ClearSuccess):
+
+        mov         r2, r4        // reload handler funclet address
+        mov         r1, r5        // reload REGDISPLAY pointer
+
+        //
+        // set preserved regs to the values expected by the funclet
+        //
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR4]
+        ldr         r4, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR5]
+        ldr         r5, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR6]
+        ldr         r6, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR7]
+        ldr         r7, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR8]
+        ldr         r8, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR9]
+        ldr         r9, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR10]
+        ldr         r10, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR11]
+        ldr         r11, [r12]
+
+        //
+        // load vfp preserved regs
+        //
+        add         r12, r1, #OFFSETOF__REGDISPLAY__D
+        vldm        r12!, {d8-d15}
+
+        blx         r2                                  // handler funclet address
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2
+
+        ldr         r1, [sp, #rsp_offset_r1]        // reload REGDISPLAY pointer
+
+        //
+        // save new values of preserved regs into REGDISPLAY
+        //
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR4]
+        str         r4, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR5]
+        str         r5, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR6]
+        str         r6, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR7]
+        str         r7, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR8]
+        str         r8, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR9]
+        str         r9, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR10]
+        str         r10, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR11]
+        str         r11, [r12]
+
+        //
+        // store vfp preserved regs
+        //
+        add         r12, r1, #OFFSETOF__REGDISPLAY__D
+        vstm        r12!, {d8-d15}
+
+        //
+        // set the DoNotTriggerGc flag, trashes r0-r2
+        //
+        INLINE_GETTHREAD      // r0 <- Thread*
+LOCAL_LABEL(SetRetry):
+        ldrex       r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags]
+        orrs        r1, #TSF_DoNotTriggerGc
+        strex       r2, r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags]
+        cbz         r2, LOCAL_LABEL(SetSuccess)
+        b           LOCAL_LABEL(SetRetry)
+LOCAL_LABEL(SetSuccess):
+
+        EPILOG_VPOP {d8-d15}
+        EPILOG_POP  "{r1,r4-r11,pc}"
+
+NESTED_END RhpCallFinallyFunclet, _TEXT
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay)
+//
+// INPUT:  R0:  exception object
+//         R1:  filter funclet address
+//         R2:  REGDISPLAY*
+//
+// OUTPUT:
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler
+
+        PROLOG_PUSH "{r2,r4-r11,lr}"
+        PROLOG_VPUSH {d8-d15}
+
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR11]
+        ldr         r11, [r12]
+
+        mov         r12, r1                              // r12 <- handler funclet address
+        // r0 still contains the exception object
+        ldr         r1, [r2, #OFFSETOF__REGDISPLAY__SP]  // r1 <- establisher frame
+
+        //
+        // call the funclet
+        //    r0 = exception object
+        //    r1 = establisher frame
+        blx         r12
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2
+
+        // R0 contains the result of the filter execution
+
+        EPILOG_VPOP {d8-d15}
+        EPILOG_POP  "{r2,r4-r11,pc}"
+
+NESTED_END RhpCallFilterFunclet, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/ExceptionHandling.asm b/src/coreclr/src/nativeaot/Runtime/arm/ExceptionHandling.asm
new file mode 100644
index 0000000000000..41b7a66bda2c3
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/ExceptionHandling.asm
@@ -0,0 +1,555 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpThrowHwEx
+;;
+;; INPUT:  R0:  exception code of fault
+;;         R1:  faulting IP
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpThrowHwEx
+
+#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 7)&(~7))
+
+#define rsp_offsetof_ExInfo  0
+#define rsp_offsetof_Context STACKSIZEOF_ExInfo
+
+        PROLOG_NOP mov r2, r0       ;; save exception code into r2
+        PROLOG_NOP mov r0, sp       ;; get SP of fault site
+
+        PROLOG_NOP mov lr, r1       ;; set IP of fault site
+
+        ;; Setup a PAL_LIMITED_CONTEXT on the stack {
+        PROLOG_NOP vpush {d8-d15}
+        PROLOG_NOP push {r0,lr}     ;; push {sp, pc} of fault site
+        PROLOG_PUSH_MACHINE_FRAME   ;; unwind code only
+        PROLOG_PUSH {r0,r4-r11,lr}
+        ;; } end PAL_LIMITED_CONTEXT
+
+        PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo
+
+        ; r0: SP of fault site
+        ; r1: IP of fault site 
+        ; r2: exception code of fault
+        ; lr: IP of fault site (as a 'return address')
+
+        mov         r0, r2          ;; r0 <- exception code of fault
+
+        ;; r2 = GetThread(), TRASHES r1
+        INLINE_GETTHREAD r2, r1
+
+        add         r1, sp, #rsp_offsetof_ExInfo                    ;; r1 <- ExInfo*
+        mov         r3, #0
+        str         r3, [r1, #OFFSETOF__ExInfo__m_exception]        ;; pExInfo->m_exception = null
+        mov         r3, #1
+        strb        r3, [r1, #OFFSETOF__ExInfo__m_passNumber]       ;; pExInfo->m_passNumber = 1
+        mov         r3, #0xFFFFFFFF
+        str         r3, [r1, #OFFSETOF__ExInfo__m_idxCurClause]     ;; pExInfo->m_idxCurClause = MaxTryRegionIdx
+        mov         r3, #2
+        strb        r3, [r1, #OFFSETOF__ExInfo__m_kind]             ;; pExInfo->m_kind = ExKind.HardwareFault
+
+
+        ;; link the ExInfo into the thread's ExInfo chain
+        ldr         r3, [r2, #OFFSETOF__Thread__m_pExInfoStackHead]
+        str         r3, [r1, #OFFSETOF__ExInfo__m_pPrevExInfo]      ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        str         r1, [r2, #OFFSETOF__Thread__m_pExInfoStackHead] ;; m_pExInfoStackHead = pExInfo
+
+        ;; set the exception context field on the ExInfo
+        add         r2, sp, #rsp_offsetof_Context                   ;; r2 <- PAL_LIMITED_CONTEXT*
+        str         r2, [r1, #OFFSETOF__ExInfo__m_pExContext]       ;; pExInfo->m_pExContext = pContext
+
+        ;; r0: exception code
+        ;; r1: ExInfo*
+        bl          RhThrowHwEx
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2
+
+        ;; no return
+        __debugbreak
+
+    NESTED_END RhpThrowHwEx
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpThrowEx
+;;
+;; INPUT:  R0:  exception object
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpThrowEx
+
+        ;; Setup a PAL_LIMITED_CONTEXT on the stack {
+        PROLOG_VPUSH {d8-d15}
+        PROLOG_PUSH {r0,lr}         ;; Reserve space for SP and store LR
+        PROLOG_PUSH {r0,r4-r11,lr}
+        ;; } end PAL_LIMITED_CONTEXT
+
+        PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo
+
+        ;; Compute and save SP at callsite.
+        add         r1, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT)
+        str         r1, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__SP)]
+
+        ;; r2 = GetThread(), TRASHES r1
+        INLINE_GETTHREAD r2, r1
+
+        ;; There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic.  So the return 
+        ;; address could have been hijacked when we were in that C# code and we must remove the hijack and
+        ;; reflect the correct return address in our exception context record.  The other throw helpers don't
+        ;; need this because they cannot be tail-called from C#.
+
+        ;; NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location 
+        ;; where the tail-calling thread had saved LR, which may not match where we have saved LR.
+
+        ldr         r1, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        cbz         r1, NotHijacked
+
+        ldr         r3, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+
+        ;; r0: exception object
+        ;; r1: hijacked return address
+        ;; r2: pThread
+        ;; r3: hijacked return address location
+
+        add         r12, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT)        ;; re-compute SP at callsite
+        cmp         r3, r12             ;; if (m_ppvHijackedReturnAddressLocation < SP at callsite)
+        blo         TailCallWasHijacked
+
+        ;; normal case where a valid return address location is hijacked
+        str         r1, [r3]
+        b           ClearThreadState
+
+TailCallWasHijacked
+
+        ;; Abnormal case where the return address location is now invalid because we ended up here via a tail 
+        ;; call.  In this case, our hijacked return address should be the correct caller of this method.
+        ;; 
+
+        ;; stick the previous return address in LR as well as in the right spots in our PAL_LIMITED_CONTEXT.
+        mov         lr, r1
+        str         lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__LR)]
+        str         lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP)]
+
+ClearThreadState
+
+        ;; clear the Thread's hijack state
+        mov         r3, #0
+        str         r3, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        str         r3, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+
+NotHijacked
+
+        add         r1, sp, #rsp_offsetof_ExInfo                    ;; r1 <- ExInfo*
+        mov         r3, #0
+        str         r3, [r1, #OFFSETOF__ExInfo__m_exception]        ;; pExInfo->m_exception = null
+        mov         r3, #1
+        strb        r3, [r1, #OFFSETOF__ExInfo__m_passNumber]       ;; pExInfo->m_passNumber = 1
+        mov         r3, #0xFFFFFFFF
+        str         r3, [r1, #OFFSETOF__ExInfo__m_idxCurClause]     ;; pExInfo->m_idxCurClause = MaxTryRegionIdx
+        mov         r3, #1
+        strb        r3, [r1, #OFFSETOF__ExInfo__m_kind]             ;; pExInfo->m_kind = ExKind.Throw
+
+        ;; link the ExInfo into the thread's ExInfo chain
+        ldr         r3, [r2, #OFFSETOF__Thread__m_pExInfoStackHead]
+        str         r3, [r1, #OFFSETOF__ExInfo__m_pPrevExInfo]      ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        str         r1, [r2, #OFFSETOF__Thread__m_pExInfoStackHead] ;; m_pExInfoStackHead = pExInfo
+
+        ;; set the exception context field on the ExInfo
+        add         r2, sp, #rsp_offsetof_Context                   ;; r2 <- PAL_LIMITED_CONTEXT*
+        str         r2, [r1, #OFFSETOF__ExInfo__m_pExContext]       ;; pExInfo->m_pExContext = pContext
+
+        ;; r0: exception object
+        ;; r1: ExInfo*
+        bl          RhThrowEx
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2
+
+        ;; no return
+        __debugbreak
+
+    NESTED_END RhpThrowEx
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void FASTCALL RhpRethrow()
+;;
+;; SUMMARY:  Similar to RhpThrowEx, except that it passes along the currently active ExInfo
+;;
+;; INPUT:
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpRethrow
+
+        ;; Setup a PAL_LIMITED_CONTEXT on the stack {
+        PROLOG_VPUSH {d8-d15}
+        PROLOG_PUSH {r0,lr}         ;; Reserve space for SP and store LR
+        PROLOG_PUSH {r0,r4-r11,lr}
+        ;; } end PAL_LIMITED_CONTEXT
+
+        PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo
+
+        ;; Compute and save SP at callsite.
+        add         r1, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT)
+        str         r1, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__SP)]
+
+        ;; r2 = GetThread(), TRASHES r1
+        INLINE_GETTHREAD r2, r1
+
+        add         r1, sp, #rsp_offsetof_ExInfo                    ;; r1 <- ExInfo*
+        mov         r3, #0
+        str         r3, [r1, #OFFSETOF__ExInfo__m_exception]        ;; pExInfo->m_exception = null
+        strb        r3, [r1, #OFFSETOF__ExInfo__m_kind]             ;; init to a deterministic value (ExKind.None)
+        mov         r3, #1
+        strb        r3, [r1, #OFFSETOF__ExInfo__m_passNumber]       ;; pExInfo->m_passNumber = 1
+        mov         r3, #0xFFFFFFFF
+        str         r3, [r1, #OFFSETOF__ExInfo__m_idxCurClause]     ;; pExInfo->m_idxCurClause = MaxTryRegionIdx
+
+        ;; link the ExInfo into the thread's ExInfo chain
+        ldr         r3, [r2, #OFFSETOF__Thread__m_pExInfoStackHead]
+        mov         r0, r3                                          ;; r0 <- current ExInfo
+        str         r3, [r1, #OFFSETOF__ExInfo__m_pPrevExInfo]      ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        str         r1, [r2, #OFFSETOF__Thread__m_pExInfoStackHead] ;; m_pExInfoStackHead = pExInfo
+
+        ;; set the exception context field on the ExInfo
+        add         r2, sp, #rsp_offsetof_Context                   ;; r2 <- PAL_LIMITED_CONTEXT*
+        str         r2, [r1, #OFFSETOF__ExInfo__m_pExContext]       ;; pExInfo->m_pExContext = pContext
+
+        ;; r0 contains the currently active ExInfo
+        ;; r1 contains the address of the new ExInfo
+        bl          RhRethrow
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2
+
+        ;; no return
+        __debugbreak
+
+    NESTED_END RhpRethrow
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay,
+;;                                    ExInfo* pExInfo)
+;;
+;; INPUT:  R0:  exception object
+;;         R1:  handler funclet address
+;;         R2:  REGDISPLAY*
+;;         R3:  ExInfo*
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpCallCatchFunclet
+
+        PROLOG_PUSH     {r0,r2-r11,lr}  ;; r0, r2 & r3 are saved so we have the exception object,
+                                        ;; REGDISPLAY and ExInfo later
+        PROLOG_VPUSH    {d8-d15}
+
+#define rsp_offset_is_not_handling_thread_abort (8 * 8) + 0
+#define rsp_offset_r2 (8 * 8) + 4
+#define rsp_offset_r3 (8 * 8) + 8
+
+        ;;
+        ;; clear the DoNotTriggerGc flag, trashes r4-r6
+        ;;
+        INLINE_GETTHREAD    r5, r6      ;; r5 <- Thread*, r6 <- trashed
+
+        ldr         r4, [r5, #OFFSETOF__Thread__m_threadAbortException]
+        sub         r4, r0
+        str         r4, [sp, #rsp_offset_is_not_handling_thread_abort] ;; Non-zero if the exception is not ThreadAbortException
+
+ClearRetry_Catch
+        ldrex       r4, [r5, #OFFSETOF__Thread__m_ThreadStateFlags]
+        bic         r4, #TSF_DoNotTriggerGc
+        strex       r6, r4, [r5, #OFFSETOF__Thread__m_ThreadStateFlags]
+        cbz         r6, ClearSuccess_Catch
+        b           ClearRetry_Catch
+ClearSuccess_Catch
+
+        ;;
+        ;; set preserved regs to the values expected by the funclet
+        ;;
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR4]
+        ldr         r4, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR5]
+        ldr         r5, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR6]
+        ldr         r6, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR7]
+        ldr         r7, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR8]
+        ldr         r8, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR9]
+        ldr         r9, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR10]
+        ldr         r10, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR11]
+        ldr         r11, [r12]
+
+#if 0 // def _DEBUG  ;; @TODO: temporarily removed because trashing the frame pointer breaks the debugger
+        ;; trash the values at the old homes to make sure nobody uses them
+        movw        r3, #0xdeed
+        movt        r3, #0xbaad
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR4]
+        str         r3, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR5]
+        str         r3, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR6]
+        str         r3, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR7]
+        str         r3, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR8]
+        str         r3, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR9]
+        str         r3, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR10]
+        str         r3, [r12]
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR11]
+        str         r3, [r12]
+#endif // _DEBUG
+
+        ;;
+        ;; load vfp preserved regs
+        ;;
+        add         r12, r2, #OFFSETOF__REGDISPLAY__D
+        vldm        r12!, {d8-d15}
+
+        ;;
+        ;; call the funclet
+        ;; 
+        ;; r0 still contains the exception object
+        blx         r1
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2
+
+        ;; r0 contains resume IP
+
+        ldr         r2, [sp, #rsp_offset_r2]                    ;; r2 <- REGDISPLAY*
+
+;; @TODO: add debug-only validation code for ExInfo pop
+
+        INLINE_GETTHREAD r1, r3                                 ;; r1 <- Thread*, r3 <- trashed
+
+        ;; We must unhijack the thread at this point because the section of stack where the hijack is applied
+        ;; may go dead.  If it does, then the next time we try to unhijack the thread, it will corrupt the stack.
+        INLINE_THREAD_UNHIJACK r1, r3, r12                      ;; Thread in r1, trashes r3 and r12
+
+        ldr         r3, [sp, #rsp_offset_r3]                    ;; r3 <- current ExInfo*
+        ldr         r2, [r2, #OFFSETOF__REGDISPLAY__SP]         ;; r2 <- resume SP value
+
+PopExInfoLoop
+        ldr         r3, [r3, #OFFSETOF__ExInfo__m_pPrevExInfo]  ;; r3 <- next ExInfo
+        cbz         r3, DonePopping                             ;; if (pExInfo == null) { we're done }
+        cmp         r3, r2
+        blt         PopExInfoLoop                               ;; if (pExInfo < resume SP} { keep going }
+
+DonePopping
+        str         r3, [r1, #OFFSETOF__Thread__m_pExInfoStackHead]     ;; store the new head on the Thread
+
+        ldr         r3, =RhpTrapThreads
+        ldr         r3, [r3]
+        tst         r3, #TrapThreadsFlags_AbortInProgress
+        beq         NoAbort
+
+        ldr         r3, [sp, #rsp_offset_is_not_handling_thread_abort]
+        cmp         r3, #0
+        bne         NoAbort
+
+        ;; It was the ThreadAbortException, so rethrow it
+        ;; reset SP
+        mov         r1, r0                                     ;; r1 <- continuation address as exception PC
+        mov         r0, #STATUS_REDHAWK_THREAD_ABORT
+        mov         sp, r2
+        b           RhpThrowHwEx
+
+NoAbort
+        ;; reset SP and jump to continuation address
+        mov         sp, r2
+        bx          r0
+
+    NESTED_END RhpCallCatchFunclet
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay)
+;;
+;; INPUT:  R0:  handler funclet address
+;;         R1:  REGDISPLAY*
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpCallFinallyFunclet
+
+        PROLOG_PUSH     {r1,r4-r11,lr}  ;; r1 is saved so we have the REGDISPLAY later
+        PROLOG_VPUSH    {d8-d15}
+#define rsp_offset_r1 8 * 8
+
+        ;;
+        ;; We want to suppress hijacking between invocations of subsequent finallys.  We do this because we
+        ;; cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the
+        ;; method) and then been popped off the stack, leaving behind no trace of its effect.
+        ;;
+        ;; So we clear the state before and set it after invocation of the handler.
+        ;;
+
+        ;;
+        ;; clear the DoNotTriggerGc flag, trashes r1-r3
+        ;;
+        INLINE_GETTHREAD    r2, r3      ;; r2 <- Thread*, r3 <- trashed
+ClearRetry
+        ldrex       r1, [r2, #OFFSETOF__Thread__m_ThreadStateFlags]
+        bic         r1, #TSF_DoNotTriggerGc
+        strex       r3, r1, [r2, #OFFSETOF__Thread__m_ThreadStateFlags]
+        cbz         r3, ClearSuccess
+        b           ClearRetry
+ClearSuccess
+
+        ldr         r1, [sp, #rsp_offset_r1]        ;; reload REGDISPLAY pointer
+
+        ;;
+        ;; set preserved regs to the values expected by the funclet
+        ;;
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR4]
+        ldr         r4, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR5]
+        ldr         r5, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR6]
+        ldr         r6, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR7]
+        ldr         r7, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR8]
+        ldr         r8, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR9]
+        ldr         r9, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR10]
+        ldr         r10, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR11]
+        ldr         r11, [r12]
+
+#if 0 // def _DEBUG  ;; @TODO: temporarily removed because trashing the frame pointer breaks the debugger
+        ;; trash the values at the old homes to make sure nobody uses them
+        movw        r3, #0xdeed
+        movt        r3, #0xbaad
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR4]
+        str         r3, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR5]
+        str         r3, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR6]
+        str         r3, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR7]
+        str         r3, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR8]
+        str         r3, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR9]
+        str         r3, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR10]
+        str         r3, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR11]
+        str         r3, [r12]
+#endif // _DEBUG
+
+        ;;
+        ;; load vfp preserved regs
+        ;;
+        add         r12, r1, #OFFSETOF__REGDISPLAY__D
+        vldm        r12!, {d8-d15}
+
+        ;;
+        ;; call the funclet
+        ;; 
+        blx         r0
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2
+
+        ldr         r1, [sp, #rsp_offset_r1]        ;; reload REGDISPLAY pointer
+
+        ;;
+        ;; save new values of preserved regs into REGDISPLAY
+        ;;
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR4]
+        str         r4, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR5]
+        str         r5, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR6]
+        str         r6, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR7]
+        str         r7, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR8]
+        str         r8, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR9]
+        str         r9, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR10]
+        str         r10, [r12]
+        ldr         r12, [r1, #OFFSETOF__REGDISPLAY__pR11]
+        str         r11, [r12]
+
+        ;;
+        ;; store vfp preserved regs
+        ;;
+        add         r12, r1, #OFFSETOF__REGDISPLAY__D
+        vstm        r12!, {d8-d15}
+
+        ;;
+        ;; set the DoNotTriggerGc flag, trashes r1-r3
+        ;;
+        INLINE_GETTHREAD    r2, r3      ;; r2 <- Thread*, r3 <- trashed
+SetRetry
+        ldrex       r1, [r2, #OFFSETOF__Thread__m_ThreadStateFlags]
+        orr         r1, #TSF_DoNotTriggerGc
+        strex       r3, r1, [r2, #OFFSETOF__Thread__m_ThreadStateFlags]
+        cbz         r3, SetSuccess
+        b           SetRetry
+SetSuccess
+
+        EPILOG_VPOP {d8-d15}
+        EPILOG_POP {r1,r4-r11,pc}
+
+    NESTED_END RhpCallFinallyFunclet
+
+        INLINE_GETTHREAD_CONSTANT_POOL
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay)
+;;
+;; INPUT:  R0:  exception object
+;;         R1:  filter funclet address
+;;         R2:  REGDISPLAY*
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpCallFilterFunclet
+
+        PROLOG_PUSH     {r2,r4-r11,lr}
+        PROLOG_VPUSH    {d8-d15}
+
+        ldr         r12, [r2, #OFFSETOF__REGDISPLAY__pR7]
+        ldr         r7, [r12]
+
+        ;;
+        ;; call the funclet
+        ;; 
+        ;; r0 still contains the exception object
+        blx         r1
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2
+
+        EPILOG_VPOP {d8-d15}
+        EPILOG_POP {r2,r4-r11,pc}
+
+    NESTED_END RhpCallFilterFunclet
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/FloatingPoint.asm b/src/coreclr/src/nativeaot/Runtime/arm/FloatingPoint.asm
new file mode 100644
index 0000000000000..9d872fecb6576
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/FloatingPoint.asm
@@ -0,0 +1,38 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+        IMPORT fmod
+
+        NESTED_ENTRY RhpFltRemRev
+
+        PROLOG_PUSH     {r4,lr}     ; Save return address (and r4 for stack alignment)
+
+        ;; The CRT only exports the double form of fmod, so we need to convert our input registers (s0, s1) to
+        ;; doubles (d0, d1). Unfortunately these registers overlap (d0 == s0/s1) so we need to move our inputs
+        ;; elsewhere first. In this case we can move them into s4/s5, which are also volatile and don't need
+        ;; to be preserved.
+        vmov.f32        s4, s0
+        vmov.f32        s5, s1
+
+        ;; Convert s4 and s5 into d0 and d1.
+        vcvt.f64.f32    d0, s4
+        vcvt.f64.f32    d1, s5
+
+        ;; Call the CRT's fmod to calculate the remainder into d0.
+        ldr             r12, =fmod
+        blx             r12
+
+        ;; Convert double result back to single. As far as I can see it's legal to do this directly even
+        ;; though d0 overlaps s0.
+        vcvt.f32.f64    s0, d0
+
+        EPILOG_POP      {r4,lr}
+        EPILOG_RETURN
+
+        NESTED_END RhpFltRemRev
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/GcProbe.asm b/src/coreclr/src/nativeaot/Runtime/arm/GcProbe.asm
new file mode 100644
index 0000000000000..6418b0af16441
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/GcProbe.asm
@@ -0,0 +1,620 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+    ;; ARM64TODO: do same fix here as on Arm64?
+    SETALIAS    g_fGcStressStarted, ?g_GCShadow@@3PAEA
+
+    EXTERN      $g_fGcStressStarted
+
+PROBE_SAVE_FLAGS_EVERYTHING     equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH
+PROBE_SAVE_FLAGS_R0_IS_GCREF    equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF
+
+
+    ;; Build a map of symbols representing offsets into a transition frame (see PInvokeTransitionFrame in
+    ;; rhbinder.h and keep these two in sync.
+    map 0
+m_ChainPointer  field 4         ; r11 - OS frame chain used for quick stackwalks
+m_RIP           field 4         ; lr
+m_FramePointer  field 4         ; r7
+m_pThread       field 4
+m_Flags         field 4         ; bitmask of saved registers
+m_PreservedRegs field (4 * 6)   ; r4-r6,r8-r10
+m_CallersSP     field 4         ; sp at routine entry
+m_SavedR0       field 4         ; r0
+m_VolatileRegs  field (4 * 4)   ; r1-r3,lr
+m_ReturnVfpRegs field (8 * 4)   ; d0-d3, not really part of the struct
+m_SavedAPSR     field 4         ; saved condition codes
+PROBE_FRAME_SIZE    field 0
+
+    ;; Support for setting up a transition frame when performing a GC probe. In many respects this is very
+    ;; similar to the logic in PUSH_COOP_PINVOKE_FRAME in AsmMacros.h. In most cases setting up the
+    ;; transition frame comprises the entirety of the caller's prolog (and initial non-prolog code) and
+    ;; similarly for the epilog. Those cases can be dealt with using PROLOG_PROBE_FRAME and EPILOG_PROBE_FRAME
+    ;; defined below. For the special cases where additional work has to be done in the prolog we also provide
+    ;; the lower level macros ALLOC_PROBE_FRAME, FREE_PROBE_FRAME and INIT_PROBE_FRAME that allow more control
+    ;; to be asserted.
+    ;;
+    ;; Note that we currently employ a significant simplification of frame setup: we always allocate a
+    ;; maximally-sized PInvokeTransitionFrame and save all of the registers. Depending on the caller this can
+    ;; lead to upto five additional register saves (r0-r3,r12) or 20 bytes of stack space. I have done no
+    ;; analysis to see whether any of the worst cases occur on performance sensitive paths and whether the
+    ;; additional saves will show any measurable degradation.
+
+    ;; Perform the parts of setting up a probe frame that can occur during the prolog (and indeed this macro
+    ;; can only be called from within the prolog).
+    MACRO
+        ALLOC_PROBE_FRAME
+
+        PROLOG_STACK_ALLOC  4                   ; Space for saved APSR
+        PROLOG_VPUSH        {d0-d3}             ; Save floating point return registers
+        PROLOG_PUSH         {r0-r3,lr}          ; Save volatile registers
+        PROLOG_STACK_ALLOC  4                   ; Space for caller's SP
+        PROLOG_PUSH         {r4-r6,r8-r10}      ; Save non-volatile registers
+        PROLOG_STACK_ALLOC  8                   ; Space for flags and Thread*
+        PROLOG_PUSH         {r7}                ; Save caller's frame pointer
+        PROLOG_PUSH         {r11,lr}            ; Save frame-chain pointer and return address
+    MEND
+
+    ;; Undo the effects of an ALLOC_PROBE_FRAME. This may only be called within an epilog. Note that all
+    ;; registers are restored (apart for sp and pc), even volatiles.
+    MACRO
+        FREE_PROBE_FRAME
+
+        EPILOG_POP          {r11,lr}            ; Restore frame-chain pointer and return address
+        EPILOG_POP          {r7}                ; Restore caller's frame pointer
+        EPILOG_STACK_FREE   8                   ; Discard flags and Thread*
+        EPILOG_POP          {r4-r6,r8-r10}      ; Restore non-volatile registers
+        EPILOG_STACK_FREE   4                   ; Discard caller's SP
+        EPILOG_POP          {r0-r3,lr}          ; Restore volatile registers
+        EPILOG_VPOP         {d0-d3}             ; Restore floating point return registers
+        EPILOG_STACK_FREE   4                   ; Space for saved APSR
+    MEND
+
+    ;; Complete the setup of a probe frame allocated with ALLOC_PROBE_FRAME with the initialization that can
+    ;; occur only outside the prolog (includes linking the frame to the current Thread). This macro assumes SP
+    ;; is invariant outside of the prolog.
+    ;;
+    ;;  $threadReg  : register containing the Thread* (this will be preserved)
+    ;;  $trashReg   : register that can be trashed by this macro
+    ;;  $BITMASK    : value to initialize m_Flags field with (register or #constant)
+    ;;  $frameSize  : total size of the method's stack frame (including probe frame size)
+    MACRO
+        INIT_PROBE_FRAME $threadReg, $trashReg, $BITMASK, $frameSize
+
+        str         $threadReg, [sp, #m_pThread]    ; Thread *
+        mov         $trashReg, $BITMASK             ; Bitmask of preserved registers
+        str         $trashReg, [sp, #m_Flags]
+        add         $trashReg, sp, #$frameSize
+        str         $trashReg, [sp, #m_CallersSP]
+    MEND
+
+    ;; Simple macro to use when setting up the probe frame can comprise the entire prolog. Call this macro
+    ;; first in the method (no further prolog instructions can be added after this).
+    ;;
+    ;;  $threadReg  : register containing the Thread* (this will be preserved). If defaulted (specify |) then
+    ;;                the current thread will be calculated inline into r2 ($trashReg must not equal r2 in
+    ;;                this case)
+    ;;  $trashReg   : register that can be trashed by this macro
+    ;;  $BITMASK    : value to initialize m_Flags field with (register or #constant)
+    MACRO
+        PROLOG_PROBE_FRAME $threadReg, $trashReg, $BITMASK
+
+        ; Local string tracking the name of the register in which the Thread* is kept. Defaults to the value
+        ; of $threadReg.
+        LCLS __PPF_ThreadReg
+__PPF_ThreadReg SETS "$threadReg"
+
+        ; Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving
+        ; incoming register values into it.
+        ALLOC_PROBE_FRAME
+
+        ; If the caller didn't provide a value for $threadReg then generate code to fetch the Thread* into r2.
+        ; Record that r2 holds the Thread* in our local variable.
+        IF "$threadReg" == ""
+            ASSERT "$trashReg" != "r2"
+__PPF_ThreadReg SETS "r2"
+            INLINE_GETTHREAD $__PPF_ThreadReg, $trashReg
+        ENDIF
+
+        ; Perform the rest of the PInvokeTransitionFrame initialization.
+        INIT_PROBE_FRAME $__PPF_ThreadReg, $trashReg, $BITMASK, PROBE_FRAME_SIZE
+        str         sp, [$__PPF_ThreadReg, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
+    MEND
+
+    ; Simple macro to use when PROLOG_PROBE_FRAME was used to set up and initialize the prolog and
+    ; PInvokeTransitionFrame. This will define the epilog including a return via the restored LR.
+    MACRO
+        EPILOG_PROBE_FRAME
+
+        FREE_PROBE_FRAME
+        EPILOG_RETURN
+    MEND
+
+
+;;
+;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this 
+;; thread if it finds it at an IP that isn't managed code.
+;;
+;; Register state on entry:
+;;  r2: thread pointer
+;;  
+;; Register state on exit:
+;;  r12: trashed
+;;
+    MACRO
+        ClearHijackState
+
+        mov         r12, #0
+        str         r12, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        str         r12, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+    MEND
+
+
+;;
+;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and 
+;; clears the hijack state.
+;;
+;; Register state on entry:
+;;  All registers correct for return to the original return address.
+;;  
+;; Register state on exit:
+;;  r2: thread pointer
+;;  r3: trashed
+;;  r12: trashed
+;;
+    MACRO
+        FixupHijackedCallstack
+
+        ;; r2 <- GetThread(), TRASHES r3
+        INLINE_GETTHREAD r2, r3
+        
+        ;;
+        ;; Fix the stack by restoring the original return address
+        ;;
+        ldr         lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+
+        ClearHijackState
+    MEND
+
+;;
+;; Set the Thread state and wait for a GC to complete.
+;;
+;; Register state on entry:
+;;  r4: thread pointer
+;;  
+;; Register state on exit:
+;;  r4: thread pointer
+;;  All other registers trashed
+;;
+
+    EXTERN RhpWaitForGCNoAbort
+
+    MACRO
+        WaitForGCCompletion
+
+        ldr         r2, [r4, #OFFSETOF__Thread__m_ThreadStateFlags]
+        tst         r2, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC
+        bne         %ft0
+
+        ldr         r2, [r4, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
+        bl          RhpWaitForGCNoAbort
+0
+    MEND
+
+
+    MACRO
+        HijackTargetFakeProlog
+
+        ;; This is a fake entrypoint for the method that 'tricks' the OS into calling our personality routine.
+        ;; The code here should never be executed, and the unwind info is bogus, but we don't mind since the
+        ;; stack is broken by the hijack anyway until after we fix it below.
+        PROLOG_PUSH {lr}
+        nop                     ; We also need a nop here to simulate the implied bl instruction.  Without 
+                                ; this, an OS-applied -2 will back up into the method prolog and the unwind 
+                                ; will not be applied as desired.
+
+    MEND
+
+
+;;
+;;
+;;
+;; GC Probe Hijack targets
+;;
+;;
+    EXTERN RhpPInvokeExceptionGuard
+
+
+    NESTED_ENTRY RhpGcProbeHijackScalarWrapper, .text, RhpPInvokeExceptionGuard
+
+        HijackTargetFakeProlog
+
+    LABELED_RETURN_ADDRESS RhpGcProbeHijackScalar
+
+        FixupHijackedCallstack
+        mov         r12, #DEFAULT_FRAME_SAVE_FLAGS
+        b           RhpGcProbe
+    NESTED_END RhpGcProbeHijackScalarWrapper
+
+    NESTED_ENTRY RhpGcProbeHijackObjectWrapper, .text, RhpPInvokeExceptionGuard
+
+        HijackTargetFakeProlog
+
+    LABELED_RETURN_ADDRESS RhpGcProbeHijackObject
+
+        FixupHijackedCallstack
+        mov         r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF)
+        b           RhpGcProbe
+    NESTED_END RhpGcProbeHijackObjectWrapper
+
+    NESTED_ENTRY RhpGcProbeHijackByrefWrapper, .text, RhpPInvokeExceptionGuard
+
+        HijackTargetFakeProlog
+
+    LABELED_RETURN_ADDRESS RhpGcProbeHijackByref
+
+        FixupHijackedCallstack
+        mov         r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_BYREF)
+        b           RhpGcProbe
+    NESTED_END RhpGcProbeHijackByrefWrapper
+
+#ifdef FEATURE_GC_STRESS
+;;
+;;
+;; GC Stress Hijack targets
+;;
+;;
+    LEAF_ENTRY RhpGcStressHijackScalar
+        FixupHijackedCallstack
+        mov         r12, #DEFAULT_FRAME_SAVE_FLAGS
+        b           RhpGcStressProbe
+    LEAF_END RhpGcStressHijackScalar
+
+    LEAF_ENTRY RhpGcStressHijackObject
+        FixupHijackedCallstack
+        mov         r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF)
+        b           RhpGcStressProbe
+    LEAF_END RhpGcStressHijackObject
+
+    LEAF_ENTRY RhpGcStressHijackByref
+        FixupHijackedCallstack
+        mov         r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_BYREF)
+        b           RhpGcStressProbe
+    LEAF_END RhpGcStressHijackByref
+
+
+;;
+;; Worker for our GC stress probes.  Do not call directly!!  
+;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. 
+;; This worker performs the GC Stress work and returns to the original return address.
+;;
+;; Register state on entry:
+;;  r0: hijacked function return value
+;;  r1: hijacked function return value
+;;  r2: thread pointer
+;;  r12: register bitmask
+;;
+;; Register state on exit:
+;;  Scratch registers, except for r0, have been trashed
+;;  All other registers restored as they were when the hijack was first reached.
+;;
+    NESTED_ENTRY RhpGcStressProbe
+        PROLOG_PROBE_FRAME r2, r3, r12
+
+        bl          $REDHAWKGCINTERFACE__STRESSGC
+
+        EPILOG_PROBE_FRAME
+    NESTED_END RhpGcStressProbe
+#endif ;; FEATURE_GC_STRESS
+
+    EXTERN RhpThrowHwEx
+
+    LEAF_ENTRY RhpGcProbe
+        ldr         r3, =RhpTrapThreads
+        ldr         r3, [r3]
+        tst         r3, #TrapThreadsFlags_TrapThreads
+        bne         %0
+        bx          lr
+0
+        b           RhpGcProbeRare
+    LEAF_END RhpGcProbe
+
+    NESTED_ENTRY RhpGcProbeRare
+        PROLOG_PROBE_FRAME r2, r3, r12
+
+        mov         r4, r2
+        WaitForGCCompletion
+
+        ldr         r2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
+        tst         r2, #PTFF_THREAD_ABORT
+        bne         %1
+
+        EPILOG_PROBE_FRAME
+
+1        
+        FREE_PROBE_FRAME
+        EPILOG_NOP mov         r0, #STATUS_REDHAWK_THREAD_ABORT
+        EPILOG_NOP mov         r1, lr ;; return address as exception PC
+        EPILOG_BRANCH RhpThrowHwEx
+
+    NESTED_END RhpGcProbe
+
+    LEAF_ENTRY RhpGcPoll
+        ; @todo: I'm assuming it's not OK to trash any register here. If that's not true we can optimize the
+        ; push/pops out of this fast path.
+        push        {r0}
+        ldr         r0, =RhpTrapThreads
+        ldr         r0, [r0]
+        tst         r0, #TrapThreadsFlags_TrapThreads
+        bne         %0
+        pop         {r0}
+        bx          lr
+0
+        pop         {r0}
+        b           RhpGcPollRare
+    LEAF_END RhpGcPoll
+
+    NESTED_ENTRY RhpGcPollRare
+        PROLOG_PROBE_FRAME |, r3, #PROBE_SAVE_FLAGS_EVERYTHING
+
+        ; Unhijack this thread, if necessary.
+        INLINE_THREAD_UNHIJACK  r2, r0, r1       ;; trashes r0, r1
+
+        mov         r4, r2
+        WaitForGCCompletion
+
+        EPILOG_PROBE_FRAME
+    NESTED_END RhpGcPollRare
+
+    LEAF_ENTRY RhpGcPollStress
+        ;
+        ; loop hijacking is used instead
+        ;
+        __debugbreak
+
+    LEAF_END RhpGcPollStress
+
+
+#ifdef FEATURE_GC_STRESS
+    NESTED_ENTRY RhpHijackForGcStress
+        PROLOG_PUSH {r0,r1}     ; Save return value
+        PROLOG_VPUSH {d0-d3}    ; Save VFP return value
+
+        ;;
+        ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the
+        ;; IP after the call to this helper.
+        ;;
+        ;; This is very likely overkill since the calculation of the return address should only need SP and 
+        ;; LR, but this is test code, so I'm not too worried about efficiency.
+        ;;
+        ;; Setup a PAL_LIMITED_CONTEXT on the stack {
+        ;; we'll need to reserve the size of the D registers in the context
+        ;; compute in the funny way below to include any padding between LR and D
+DREG_SZ equ     (SIZEOF__PAL_LIMITED_CONTEXT - (OFFSETOF__PAL_LIMITED_CONTEXT__LR + 4))
+
+        PROLOG_STACK_ALLOC  DREG_SZ ;; Reserve space for d8-d15
+        PROLOG_PUSH {r0,lr}         ;; Reserve space for SP and store LR
+        PROLOG_PUSH {r0,r4-r11,lr}
+        ;; } end PAL_LIMITED_CONTEXT
+
+        ;; Compute and save SP at callsite.
+        add         r0, sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20 + 8)   ;; +0x20 for vpush {d0-d3}, +8 for push {r0,r1}
+        str         r0, [sp, #OFFSETOF__PAL_LIMITED_CONTEXT__SP]
+
+        mov         r0, sp      ; Address of PAL_LIMITED_CONTEXT
+        bl          $THREAD__HIJACKFORGCSTRESS
+
+        ;; epilog
+        EPILOG_POP  {r0,r4-r11,lr}
+        EPILOG_STACK_FREE DREG_SZ + 8 ; Discard saved SP and LR and space for d8-d15
+        EPILOG_VPOP {d0-d3}             ; Restore VFP return value
+        EPILOG_POP  {r0,r1}             ; Restore return value
+        bx          lr
+    NESTED_END RhpHijackForGcStress
+#endif ;; FEATURE_GC_STRESS
+
+
+;;
+;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH 
+;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing
+;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of 
+;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the 
+;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be 
+;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the 
+;; handler in the caller.
+;; 
+;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to 
+;; complete. There are also variants for GC stress.
+;;
+;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to 
+;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack 
+;; another thread while the target thread is suspended in managed code, and this is _not_ managed code.
+;;
+;; Register state on entry:
+;;  r0: pointer to this function (i.e., trash)
+;;  r1: reference to the exception object.
+;;  r2: handler address we want to jump to.
+;;  Non-volatile registers are all already correct for return to the caller.
+;;  LR still contains the return address.
+;;  
+;; Register state on exit:
+;;  All registers except r0 and lr unchanged
+;;
+    MACRO
+        RTU_EH_JUMP_HELPER $funcName, $hijackFuncName, $isStress, $stressFuncName
+
+        LEAF_ENTRY $funcName
+        ; Currently the EH epilog won't pop the return address back into LR,
+        ; so we have to have a funny load from [sp-4] here to retrieve it.
+
+            ldr         r0, =$hijackFuncName
+            cmp         r0, lr
+            beq         RhpGCProbeForEHJump
+
+            IF $isStress
+            ldr         r0, =$stressFuncName
+            cmp         r0, lr
+            beq         RhpGCStressProbeForEHJump
+            ENDIF
+
+            ;; We are not hijacked, so we can return to the handler.
+            ;; We return to keep the call/return prediction balanced.
+            mov         lr, r2  ; Update the return address
+            bx          lr
+        LEAF_END $funcName
+    MEND
+
+;; We need an instance of the helper for each possible hijack function. The binder has enough
+;; information to determine which one we need to use for any function.
+    RTU_EH_JUMP_HELPER RhpEHJumpScalar,         RhpGcProbeHijackScalar, {false}, 0
+    RTU_EH_JUMP_HELPER RhpEHJumpObject,         RhpGcProbeHijackObject, {false}, 0
+    RTU_EH_JUMP_HELPER RhpEHJumpByref,          RhpGcProbeHijackByref,  {false}, 0
+#ifdef FEATURE_GC_STRESS
+    RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijackScalar, {true},  RhpGcStressHijackScalar
+    RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijackObject, {true},  RhpGcStressHijackObject
+    RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress,  RhpGcProbeHijackByref,  {true},  RhpGcStressHijackByref
+#endif
+
+;;
+;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs.
+;;
+;; Register state on entry:
+;;  r0: scratch
+;;  r1: reference to the exception object.
+;;  r2: handler address we want to jump to.
+;;  Non-volatile registers are all already correct for return to the caller.
+;;  The stack is as if we are just about to returned from the call
+;;  
+;; Register state on exit:
+;;  r0: reference to the exception object
+;;  r2: thread pointer
+;;
+    MACRO
+        EHJumpProbeProlog
+
+        PROLOG_PUSH         {r1,r2}     ; save the handler address so we can jump to it later (save r1 just for alignment)
+        PROLOG_NOP          mov r0, r1  ; move the ex object reference into r0 so we can report it
+        ALLOC_PROBE_FRAME
+
+        ;; r2 <- GetThread(), TRASHES r1
+        INLINE_GETTHREAD r2, r1
+        
+        ;; Recover the original return address and update the frame
+        ldr         lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        str         lr, [sp, #OFFSETOF__PInvokeTransitionFrame__m_RIP]
+
+        ;; ClearHijackState expects thread in r2 (trashes r12).
+        ClearHijackState
+
+        ; TRASHES r1
+        INIT_PROBE_FRAME r2, r1, #PROBE_SAVE_FLAGS_R0_IS_GCREF, (PROBE_FRAME_SIZE + 8)
+        str         sp, [r2, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
+    MEND
+
+;;
+;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the 
+;; final jump to the handler for EH jump probe funcs.
+;;
+;; Register state on entry:
+;;  r0: reference to the exception object
+;;  r1-r3: scratch
+;;  
+;; Register state on exit:
+;;  sp: correct for return to the caller
+;;  r1: reference to the exception object
+;;
+    MACRO
+        EHJumpProbeEpilog
+
+        FREE_PROBE_FRAME        ; This restores exception object back into r0
+        EPILOG_NOP mov r1, r0   ; Move the Exception object back into r1 where the catch handler expects it
+        EPILOG_POP {r0,pc}      ; Recover the handler address and jump to it
+    MEND
+
+;;
+;; We are hijacked for a normal GC (not GC stress), so we need to unhijack and wait for the GC to complete.
+;;
+;; Register state on entry:
+;;  r0: reference to the exception object.
+;;  r2: thread
+;;  Non-volatile registers are all already correct for return to the caller.
+;;  The stack is as if we have tail called to this function (lr points to return address).
+;;        
+;; Register state on exit:
+;;  r7: previous frame pointer
+;;  r0: reference to the exception object
+;;
+    NESTED_ENTRY RhpGCProbeForEHJump
+        EHJumpProbeProlog
+
+#ifdef _DEBUG
+        ;;
+        ;; If we get here, then we have been hijacked for a real GC, and our SyncState must
+        ;; reflect that we've been requested to synchronize.
+
+        ldr         r1, =RhpTrapThreads
+        ldr         r1, [r1]
+        tst         r1, #TrapThreadsFlags_TrapThreads
+        bne         %0
+
+        bl          RhDebugBreak
+0
+#endif ;; _DEBUG
+
+        mov         r4, r2
+        WaitForGCCompletion
+
+        EHJumpProbeEpilog
+    NESTED_END RhpGCProbeForEHJump
+
+#ifdef FEATURE_GC_STRESS
+;;
+;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper.
+;;
+;; Register state on entry:
+;;  r1: reference to the exception object.
+;;  r2: thread
+;;  Non-volatile registers are all already correct for return to the caller.
+;;  The stack is as if we have tail called to this function (lr points to return address).
+;;        
+;; Register state on exit:
+;;  r7: previous frame pointer
+;;  r0: reference to the exception object
+;;
+    NESTED_ENTRY RhpGCStressProbeForEHJump
+        EHJumpProbeProlog
+
+        bl          $REDHAWKGCINTERFACE__STRESSGC
+
+        EHJumpProbeEpilog
+    NESTED_END RhpGCStressProbeForEHJump
+
+;;
+;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this.
+;;
+    LEAF_ENTRY RhpSuppressGcStress
+
+        push        {r0-r2}
+        INLINE_GETTHREAD    r0, r1
+
+Retry
+        ldrex       r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags]
+        orr         r1, #TSF_SuppressGcStress
+        strex       r2, r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags]
+        cbz         r2, Success
+        b           Retry
+
+Success
+        pop         {r0-r2}
+        bx          lr
+
+    LEAF_END RhpSuppressGcStress
+#endif ;; FEATURE_GC_STRESS
+
+        INLINE_GETTHREAD_CONSTANT_POOL
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/GetThread.asm b/src/coreclr/src/nativeaot/Runtime/arm/GetThread.asm
new file mode 100644
index 0000000000000..b78319f8f15c2
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/GetThread.asm
@@ -0,0 +1,32 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpGetThread
+;;
+;;
+;; INPUT: none
+;;
+;; OUTPUT: r0: Thread pointer
+;;
+;; MUST PRESERVE ARGUMENT REGISTERS
+;; @todo check the actual requirements here, r0 is both return and argument register
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+        LEAF_ENTRY RhpGetThread
+
+        ;; r0 = GetThread(), TRASHES r12
+        INLINE_GETTHREAD r0, r12
+        bx lr
+
+        LEAF_END
+FASTCALL_ENDFUNC
+
+        INLINE_GETTHREAD_CONSTANT_POOL
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/Interlocked.S b/src/coreclr/src/nativeaot/Runtime/arm/Interlocked.S
new file mode 100644
index 0000000000000..c3aada2e9d6c5
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/Interlocked.S
@@ -0,0 +1,57 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.syntax unified
+.thumb
+
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg32AVLocation
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+// r0 = destination address
+// r1 = value
+// r2 = comparand
+LEAF_ENTRY RhpLockCmpXchg32, _TEXT
+          dmb
+ALTERNATE_ENTRY RhpLockCmpXchg32AVLocation
+LOCAL_LABEL(CmpXchg32Retry):
+          ldrex        r3, [r0]
+          cmp          r2, r3
+          bne          LOCAL_LABEL(CmpXchg32Exit)
+          strex        r12, r1, [r0]
+          cmp          r12, #0
+          bne          LOCAL_LABEL(CmpXchg32Retry)
+LOCAL_LABEL(CmpXchg32Exit):
+          mov          r0, r3
+          dmb
+          bx           lr
+LEAF_END RhpLockCmpXchg32, _TEXT
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg64AVLocation
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+// r0      = destination address
+// {r2,r3} = value
+// sp[0+8] = comparand
+LEAF_ENTRY RhpLockCmpXchg64, _TEXT
+ALTERNATE_ENTRY RhpLockCmpXchg64AVLocation
+          ldr          r12, [r0]        // dummy read for null check
+          PROLOG_PUSH  "{r4-r6,lr}"
+          dmb
+          ldrd         r4, r5, [sp,#0x10]
+LOCAL_LABEL(CmpXchg64Retry):
+          ldrexd       r6, r1, [r0]
+          cmp          r6, r4
+          bne          LOCAL_LABEL(CmpXchg64Exit)
+          cmp          r1, r5
+          bne          LOCAL_LABEL(CmpXchg64Exit)
+          strexd       r12, r2, r3, [r0]
+          cmp          r12, #0
+          bne          LOCAL_LABEL(CmpXchg64Retry)
+LOCAL_LABEL(CmpXchg64Exit):
+          mov          r0, r6
+          dmb
+          EPILOG_POP   "{r4-r6,pc}"
+LEAF_END RhpLockCmpXchg64, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/InteropThunksHelpers.S b/src/coreclr/src/nativeaot/Runtime/arm/InteropThunksHelpers.S
new file mode 100644
index 0000000000000..d8012f088a6ec
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/InteropThunksHelpers.S
@@ -0,0 +1,60 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.syntax unified
+.thumb
+
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+#define POINTER_SIZE 4
+
+//
+// RhCommonStub
+//
+NESTED_ENTRY RhCommonStub, _TEXT, NoHandler
+          // Custom calling convention:
+          //      red zone has pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers)
+          //      Copy red zone value into r12 so that the PROLOG_PUSH doesn't destroy it
+          ldr          r12, [sp, #-4]
+          PROLOG_PUSH  "{r0-r4, lr}"
+          PROLOG_VPUSH {d0-d7}        // Capture the floating point argument registers
+
+          mov          r4, r12
+
+          INLINE_GET_TLS_VAR  tls_thunkData
+
+          // r0 = base address of TLS data
+          // r4 = address of context cell in thunk's data
+
+          ldr          r12, [r4]
+          str          r12, [r0]
+
+          // Now load the target address and jump to it.
+          ldr          r12, [r4, #POINTER_SIZE]
+          EPILOG_VPOP  {d0-d7}
+          EPILOG_POP   "{r0-r4, lr}"
+          bx           r12
+
+NESTED_END RhCommonStub, _TEXT
+
+//
+// IntPtr RhGetCommonStubAddress()
+//
+LEAF_ENTRY RhGetCommonStubAddress, _TEXT
+          ldr           r0, =C_FUNC(RhCommonStub)
+          bx            lr
+LEAF_END RhGetCommonStubAddress, _TEXT
+
+//
+// IntPtr RhGetCurrentThunkContext()
+//
+LEAF_ENTRY RhGetCurrentThunkContext, _TEXT
+
+          PROLOG_PUSH   "{r12, lr}"
+
+          INLINE_GET_TLS_VAR  tls_thunkData
+
+          ldr           r0, [r0]
+          EPILOG_POP    "{r12, pc}"
+LEAF_END RhGetCurrentThunkContext, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/InteropThunksHelpers.asm b/src/coreclr/src/nativeaot/Runtime/arm/InteropThunksHelpers.asm
new file mode 100644
index 0000000000000..3d652ecf924fd
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/InteropThunksHelpers.asm
@@ -0,0 +1,83 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+
+#include "kxarm.h"
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  DATA SECTIONS  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+__tls_array                         equ 0x2C    ;; offsetof(TEB, ThreadLocalStoragePointer)
+
+POINTER_SIZE                        equ 0x04
+
+;; TLS variables
+    AREA    |.tls$|, DATA
+ThunkParamSlot % 0x4
+
+    TEXTAREA
+    
+    EXTERN _tls_index
+
+    
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+    ;;
+    ;; RhCommonStub
+    ;;
+    NESTED_ENTRY RhCommonStub
+        ;; Custom calling convention:
+        ;;      red zone has pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers)
+        ;;      Copy red zone value into r12 so that the PROLOG_PUSH doesn't destroy it
+        PROLOG_NOP  ldr r12, [sp, #-4]
+        PROLOG_PUSH {r0-r3}
+
+        ;; Save context data into the ThunkParamSlot thread-local variable
+        ;; A pointer to the delegate and function pointer for open static delegate should have been saved in the thunk's context cell during thunk allocation
+        ldr         r3, =_tls_index
+        ldr         r2, [r3]
+        mrc         p15, #0, r3, c13, c0, #2
+        ldr         r3, [r3, #__tls_array]
+        ldr         r2, [r3, r2, lsl #2]    ;; r2 <- our TLS base
+       
+        ;; r2  = base address of TLS data
+        ;; r12 = address of context cell in thunk's data
+
+        ;; store thunk address in thread static
+        ldr         r1, [r12]
+        ldr         r3, =ThunkParamSlot
+        str         r1, [r2, r3]            ;; ThunkParamSlot <- context slot data
+        
+        ;; Now load the target address and jump to it.
+        ldr         r12, [r12, #POINTER_SIZE]
+        EPILOG_POP  {r0-r3}
+        bx          r12
+    NESTED_END RhCommonStub
+
+
+    ;;
+    ;; IntPtr RhGetCommonStubAddress()
+    ;;
+    LEAF_ENTRY RhGetCommonStubAddress
+        ldr     r0, =RhCommonStub
+        bx      lr
+    LEAF_END RhGetCommonStubAddress
+
+
+    ;;
+    ;; IntPtr RhGetCurrentThunkContext()
+    ;;
+    LEAF_ENTRY RhGetCurrentThunkContext
+
+        ldr         r3, =_tls_index
+        ldr         r2, [r3]
+        mrc         p15, #0, r3, c13, c0, #2
+        ldr         r3, [r3, #__tls_array]
+        ldr         r2, [r3, r2, lsl #2]    ;; r2 <- our TLS base
+
+        ldr         r3, =ThunkParamSlot
+        ldr         r0, [r2, r3]            ;; r0 <- ThunkParamSlot
+
+        bx          lr
+    LEAF_END RhGetCurrentThunkContext
+
+    END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/MiscStubs.S b/src/coreclr/src/nativeaot/Runtime/arm/MiscStubs.S
new file mode 100644
index 0000000000000..53616c2269615
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/MiscStubs.S
@@ -0,0 +1,2 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/MiscStubs.asm b/src/coreclr/src/nativeaot/Runtime/arm/MiscStubs.asm
new file mode 100644
index 0000000000000..462f31f00294d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/MiscStubs.asm
@@ -0,0 +1,243 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+    EXTERN GetClasslibCCtorCheck
+    EXTERN memcpy
+    EXTERN memcpyGCRefs
+    EXTERN memcpyGCRefsWithWriteBarrier
+    EXTERN memcpyAnyWithWriteBarrier
+
+    TEXTAREA
+
+;;
+;; Checks whether the static class constructor for the type indicated by the context structure has been
+;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will
+;; execute the cctor and update the context to record this fact.
+;;
+;;  Input:
+;;      r0 : Address of StaticClassConstructionContext structure
+;;
+;;  Output:
+;;      All volatile registers and the condition codes may be trashed.
+;;
+    LEAF_ENTRY RhpCheckCctor
+
+        ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the
+        ;; initial state is 0 and the remaining values are reserved for classlib use). This check is
+        ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for
+        ;; synchronizing with other threads and re-checking the value.
+        ldr     r12, [r0, #OFFSETOF__StaticClassConstructionContext__m_initialized]
+        cmp     r12, #1
+        bne     RhpCheckCctor__SlowPath
+        bx      lr
+RhpCheckCctor__SlowPath
+        mov     r1, r0
+        b       RhpCheckCctor2 ; tail-call the check cctor helper that actually has an implementation to call
+                               ; the cctor
+    LEAF_END RhpCheckCctor
+
+;;
+;; Checks whether the static class constructor for the type indicated by the context structure has been
+;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will
+;; execute the cctor and update the context to record this fact.
+;;
+;;  Input:
+;;      r0 : Value that must be preserved in this register across the cctor check.
+;;      r1 : Address of StaticClassConstructionContext structure
+;;
+;;  Output:
+;;      All volatile registers other than r0 may be trashed and the condition codes may also be trashed.
+;;
+    LEAF_ENTRY RhpCheckCctor2
+
+        ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the
+        ;; initial state is 0 and the remaining values are reserved for classlib use). This check is
+        ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for
+        ;; synchronizing with other threads and re-checking the value.
+        ldr     r12, [r1, #OFFSETOF__StaticClassConstructionContext__m_initialized]
+        cmp     r12, #1
+        bne     RhpCheckCctor2__SlowPath
+        bx      lr
+
+    LEAF_END RhpCheckCctor2
+
+;;
+;; Slow path helper for RhpCheckCctor.
+;;
+;;  Input:
+;;      r0 : Value that must be preserved in this register across the cctor check.
+;;      r1 : Address of StaticClassConstructionContext structure
+;;
+;;  Output:
+;;      All volatile registers other than r0 may be trashed and the condition codes may also be trashed.
+;;
+    NESTED_ENTRY RhpCheckCctor2__SlowPath
+
+        ;; Need to preserve r0, r1 and lr across helper call. r2 is also pushed to keep the stack 8 byte aligned.
+        PROLOG_PUSH {r0-r2,lr}
+
+        ;; Call a C++ helper to retrieve the address of the classlib callback. The caller's return address is
+        ;; passed as the argument to the helper; it's an address in the module and is used by the helper to
+        ;; locate the classlib.
+        mov     r0, lr
+        bl      GetClasslibCCtorCheck
+
+        ;; R0 now contains the address of the classlib method to call. The single argument is the context
+        ;; structure address currently in stashed on the stack. Clean up and tail call to the classlib
+        ;; callback so we're not on the stack should a GC occur (so we don't need to worry about transition
+        ;; frames).
+        mov     r12, r0
+        EPILOG_POP {r0-r2,lr}
+        ;; tail-call the class lib cctor check function. This function is required to return its first
+        ;; argument, so that r0 can be preserved.
+        EPILOG_BRANCH_REG r12
+
+    NESTED_END RhpCheckCctor__SlowPath2
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* RhpCopyMultibyteNoGCRefs(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;;
+
+    LEAF_ENTRY    RhpCopyMultibyteNoGCRefs
+
+        ; r0    dest
+        ; r1    src
+        ; r2    count
+
+        cmp     r2, #0                      ; check for a zero-length copy
+        beq     NothingToCopy_NoGCRefs
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+    ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsDestAVLocation
+        ldrb    r3, [r0]
+    ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsSrcAVLocation
+        ldrb    r3, [r1]
+
+        ; tail-call to plain-old-memcpy
+        b       memcpy
+
+NothingToCopy_NoGCRefs
+        ; dest is already still in r0
+        bx      lr
+
+    LEAF_END
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* RhpCopyMultibyte(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;;
+
+    LEAF_ENTRY    RhpCopyMultibyte
+
+        ; r0    dest
+        ; r1    src
+        ; r2    count
+
+        cmp     r2, #0                      ; check for a zero-length copy
+        beq     NothingToCopy_RhpCopyMultibyte
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+    ALTERNATE_ENTRY RhpCopyMultibyteDestAVLocation
+        ldrb    r3, [r0]
+    ALTERNATE_ENTRY RhpCopyMultibyteSrcAVLocation
+        ldrb    r3, [r1]
+
+        ; tail-call to the GC-safe memcpy implementation
+        b       memcpyGCRefs
+
+NothingToCopy_RhpCopyMultibyte
+        ; dest is already still in r0
+        bx      lr
+
+    LEAF_END
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* RhpCopyMultibyteWithWriteBarrier(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;; Runs a card table update via RhpBulkWriteBarrier after the copy
+;;
+
+    LEAF_ENTRY    RhpCopyMultibyteWithWriteBarrier
+
+        ; r0    dest
+        ; r1    src
+        ; r2    count
+
+        cmp     r2, #0                      ; check for a zero-length copy
+        beq     NothingToCopy_RhpCopyMultibyteWithWriteBarrier
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+    ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierDestAVLocation
+        ldrb    r3, [r0]
+    ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierSrcAVLocation
+        ldrb    r3, [r1]
+
+        ; tail-call to the GC-safe memcpy implementation
+        b       memcpyGCRefsWithWriteBarrier
+
+NothingToCopy_RhpCopyMultibyteWithWriteBarrier
+        ; dest is already still in r0
+        bx      lr
+
+    LEAF_END
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* RhpCopyAnyWithWriteBarrier(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;; Runs a card table update via RhpBulkWriteBarrier after the copy if it contained GC pointers
+;;
+
+    LEAF_ENTRY    RhpCopyAnyWithWriteBarrier
+
+        ; r0    dest
+        ; r1    src
+        ; r2    count
+
+        cmp     r2, #0                      ; check for a zero-length copy
+        beq     NothingToCopy_RhpCopyAnyWithWriteBarrier
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+    ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierDestAVLocation
+        ldrb    r3, [r0]
+    ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierSrcAVLocation
+        ldrb    r3, [r1]
+
+        ; tail-call to the GC-safe memcpy implementation
+        b       memcpyAnyWithWriteBarrier
+
+NothingToCopy_RhpCopyAnyWithWriteBarrier
+        ; dest is already still in r0
+        bx      lr
+
+    LEAF_END
+
+    end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/PInvoke.S b/src/coreclr/src/nativeaot/Runtime/arm/PInvoke.S
new file mode 100644
index 0000000000000..6be1544876c7b
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/PInvoke.S
@@ -0,0 +1,67 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include <AsmOffsets.inc>
+#include <unixasmmacros.inc>
+
+.syntax unified
+.thumb
+
+//
+// RhpPInvoke
+//
+// IN:  R0: address of pinvoke frame
+//
+// This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite.
+// The codegenerator must treat the callsite of this helper as GC triggering and generate the GC info for it.
+// Also, the codegenerator must ensure that there are no live GC references in callee saved registers.
+//
+
+NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler
+        str     lr, [r0, #OFFSETOF__PInvokeTransitionFrame__m_RIP]
+        str     r11, [r0, #OFFSETOF__PInvokeTransitionFrame__m_FramePointer]
+        str     sp, [r0, #OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs]
+        mov     r3, #PTFF_SAVE_SP
+        str     r3, [r0, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
+
+        PROLOG_PUSH "{r5,lr}"
+
+        mov     r5, r0
+        // get TLS global variable address
+        // r0 = GetThread()
+        INLINE_GETTHREAD
+        str     r0, [r5, #OFFSETOF__PInvokeTransitionFrame__m_pThread]
+        str     r5, [r0, #OFFSETOF__Thread__m_pTransitionFrame]
+
+        ldr     r3, =C_FUNC(RhpTrapThreads)
+        ldr     r3, [r3]
+        cbnz    r3, LOCAL_LABEL(InvokeRareTrapThread)  // TrapThreadsFlags_None = 0
+
+        EPILOG_POP "{r5,pc}"
+
+LOCAL_LABEL(InvokeRareTrapThread):
+        EPILOG_POP "{r5,lr}"
+        b       C_FUNC(RhpWaitForSuspend2)
+NESTED_END RhpPInvoke, _TEXT
+
+
+//
+// RhpPInvokeReturn
+//
+// IN:  R0: address of pinvoke frame
+//
+LEAF_ENTRY RhpPInvokeReturn, _TEXT
+        ldr     r3, [r0, #OFFSETOF__PInvokeTransitionFrame__m_pThread]
+
+        mov     r2, #0
+        str     r2, [r3, #OFFSETOF__Thread__m_pTransitionFrame]
+
+        ldr     r3, =C_FUNC(RhpTrapThreads)
+        ldr     r3, [r3]
+        cbnz    r3, LOCAL_LABEL(ReturnRareTrapThread)  // TrapThreadsFlags_None = 0
+
+        bx      lr
+LOCAL_LABEL(ReturnRareTrapThread):
+        // passing transition frame pointer in r0
+        b       C_FUNC(RhpWaitForGC2)
+LEAF_END RhpPInvokeReturn, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/PInvoke.asm b/src/coreclr/src/nativeaot/Runtime/arm/PInvoke.asm
new file mode 100644
index 0000000000000..7ead71ac6ccc1
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/PInvoke.asm
@@ -0,0 +1,238 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+        IMPORT RhpReversePInvokeBadTransition
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpWaitForSuspend -- rare path for RhpPInvoke and RhpReversePInvokeReturn
+;;
+;;
+;; INPUT: none
+;;
+;; TRASHES: none
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+        NESTED_ENTRY RhpWaitForSuspend
+
+        PROLOG_PUSH {r0-r4,lr}     ; Need to save argument registers r0-r3 and lr, r4 is just for alignment
+        PROLOG_VPUSH {d0-d7}       ; Save float argument registers as well since they're volatile
+
+        bl          RhpWaitForSuspend2
+        
+        EPILOG_VPOP {d0-d7}
+        EPILOG_POP  {r0-r4,pc}
+
+        NESTED_END RhpWaitForSuspend
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpWaitForGCNoAbort
+;;
+;;
+;; INPUT: r2: transition frame
+;;
+;; OUTPUT: 
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+        NESTED_ENTRY RhpWaitForGCNoAbort
+
+        PROLOG_PUSH {r0-r6,lr}  ; Even number of registers to maintain 8-byte stack alignment
+        PROLOG_VPUSH {d0-d3}    ; Save float return value registers as well
+
+        ldr         r5, [r2, #OFFSETOF__PInvokeTransitionFrame__m_pThread]
+
+        ldr         r0, [r5, #OFFSETOF__Thread__m_ThreadStateFlags]
+        tst         r0, #TSF_DoNotTriggerGc
+        bne         Done
+
+        mov         r0, r2      ; passing transition frame in r0
+        bl          RhpWaitForGC2
+
+Done
+        EPILOG_VPOP {d0-d3}
+        EPILOG_POP  {r0-r6,pc}
+
+        NESTED_END RhpWaitForGCNoAbort
+
+        EXTERN RhpThrowHwEx
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpWaitForGC
+;;
+;;
+;; INPUT: r2: transition frame
+;;
+;; OUTPUT: 
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+        NESTED_ENTRY RhpWaitForGC
+        PROLOG_PUSH  {r0,lr}
+
+        ldr         r0, =RhpTrapThreads
+        ldr         r0, [r0]
+        tst         r0, #TrapThreadsFlags_TrapThreads
+        beq         NoWait
+        bl          RhpWaitForGCNoAbort
+NoWait
+        tst         r0, #TrapThreadsFlags_AbortInProgress
+        beq         NoAbort
+        ldr         r0, [r2, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
+        tst         r0, #PTFF_THREAD_ABORT
+        beq         NoAbort
+        EPILOG_POP  {r0,r1}         ; hijack target address as exception PC
+        EPILOG_NOP  mov r0, #STATUS_REDHAWK_THREAD_ABORT
+        EPILOG_BRANCH RhpThrowHwEx        
+NoAbort
+        EPILOG_POP  {r0,pc}
+        NESTED_END RhpWaitForGC
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpReversePInvoke
+;;
+;; IN:  r4: address of reverse pinvoke frame
+;;                  0: save slot for previous M->U transition frame
+;;                  4: save slot for thread pointer to avoid re-calc in epilog sequence
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+        NESTED_ENTRY RhpReversePInvoke
+
+        PROLOG_PUSH {r5-r7,lr}  ; Even number of registers to maintain 8-byte stack alignment
+
+        INLINE_GETTHREAD r5, r6     ; r5 = Thread, r6 trashed
+        str         r5, [r4, #4]    ; save Thread pointer for RhpReversePInvokeReturn
+
+        ; r4 = prev save slot
+        ; r5 = thread
+        ; r6 = scratch
+
+        ldr         r6, [r5, #OFFSETOF__Thread__m_ThreadStateFlags]
+        tst         r6, #TSF_Attached
+        beq         AttachThread
+
+ThreadAttached
+        ;;
+        ;; Check for the correct mode.  This is accessible via various odd things that we cannot completely 
+        ;; prevent such as :
+        ;;     1) Registering a reverse pinvoke entrypoint as a vectored exception handler
+        ;;     2) Performing a managed delegate invoke on a reverse pinvoke delegate.
+        ;;
+        ldr         r6, [r5, #OFFSETOF__Thread__m_pTransitionFrame]
+        cbz         r6, CheckBadTransition
+
+        ;; Save previous TransitionFrame prior to making the mode transition so that it is always valid 
+        ;; whenever we might attempt to hijack this thread.
+        str         r6, [r4]
+
+        mov         r6, #0
+        str         r6, [r5, #OFFSETOF__Thread__m_pTransitionFrame]
+        dmb
+
+        ldr         r6, =RhpTrapThreads
+        ldr         r6, [r6]
+        tst         r6, #TrapThreadsFlags_TrapThreads
+        bne         TrapThread
+
+AllDone
+        EPILOG_POP  {r5-r7,lr}
+        EPILOG_RETURN
+
+
+CheckBadTransition
+        ;; Allow 'bad transitions' in when the TSF_DoNotTriggerGc mode is set.  This allows us to have 
+        ;; [UnmanagedCallersOnly] methods that are called via the "restricted GC callouts" as well as from native,
+        ;; which is necessary because the methods are CCW vtable methods on interfaces passed to native.
+        ldr         r7, [r5, #OFFSETOF__Thread__m_ThreadStateFlags]
+        tst         r7, #TSF_DoNotTriggerGc
+        beq         BadTransition
+
+        ;; zero-out our 'previous transition frame' save slot
+        mov         r7, #0
+        str         r7, [r4]
+
+        ;; nothing more to do
+        b           AllDone
+
+TrapThread
+        ;; put the previous frame back (sets us back to preemptive mode)
+        ldr         r6, [r4]
+        str         r6, [r5, #OFFSETOF__Thread__m_pTransitionFrame]
+        dmb
+
+AttachThread
+        ; passing address of reverse pinvoke frame in r4
+        EPILOG_POP  {r5-r7,lr}
+        EPILOG_BRANCH RhpReversePInvokeAttachOrTrapThread
+
+BadTransition
+        EPILOG_POP  {r5-r7,lr}
+        EPILOG_NOP  mov r0, lr  ; arg <- return address
+        EPILOG_BRANCH RhpReversePInvokeBadTransition
+
+        NESTED_END RhpReversePInvoke
+
+        INLINE_GETTHREAD_CONSTANT_POOL
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke
+;;
+;;
+;; INPUT: r4: address of reverse pinvoke frame
+;;
+;; TRASHES: none
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+        NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread
+
+        PROLOG_PUSH {r0-r4,lr}     ; Need to save argument registers r0-r3 and lr, r4 is just for alignment
+        PROLOG_VPUSH {d0-d7}       ; Save float argument registers as well since they're volatile
+
+        mov         r0, r4         ; passing reverse pinvoke frame pointer in r0
+        bl          RhpReversePInvokeAttachOrTrapThread2
+
+        EPILOG_VPOP {d0-d7}
+        EPILOG_POP  {r0-r4,pc}
+
+        NESTED_END RhpReversePInvokeTrapThread
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpReversePInvokeReturn
+;;
+;; IN:  r3: address of reverse pinvoke frame
+;;                  0: save slot for previous M->U transition frame
+;;                  4: save slot for thread pointer to avoid re-calc in epilog sequence
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+        LEAF_ENTRY RhpReversePInvokeReturn
+
+        ldr         r2, [r3, #4]    ; get Thread pointer
+        ldr         r3, [r3, #0]    ; get previous M->U transition frame
+
+        str         r3, [r2, #OFFSETOF__Thread__m_pTransitionFrame]
+        dmb
+
+        ldr         r3, =RhpTrapThreads
+        ldr         r3, [r3]
+        tst         r3, #TrapThreadsFlags_TrapThreads
+        bne         RareTrapThread
+
+        bx          lr
+
+RareTrapThread
+        b           RhpWaitForSuspend
+
+        LEAF_END RhpReversePInvokeReturn
+
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/StubDispatch.S b/src/coreclr/src/nativeaot/Runtime/arm/StubDispatch.S
new file mode 100644
index 0000000000000..6b7cf11a4a58a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/StubDispatch.S
@@ -0,0 +1,145 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.syntax unified
+.thumb
+
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+// TODO: Implement Arm support
+#ifdef _DEBUG
+.rodata
+AssertMsg:                                       .asciz "__FILE__:%s: %s is not implemented\n"
+FileName:                                        .asciz "StubDispatch.S"
+RhpVTableOffsetDispatchName:                     .asciz "RhpVTableOffsetDispatch"
+.text
+.macro GEN_ASSERT_FUNC func
+        GEN_ASSERT AssertMsg, FileName, \func
+.endm
+#endif
+
+
+// Macro that generates a stub consuming a cache with the given number of entries.
+.macro DEFINE_INTERFACE_DISPATCH_STUB entries
+
+LEAF_ENTRY RhpInterfaceDispatch\entries, _TEXT
+        // r12 currently contains the indirection cell address. But we need more scratch registers and
+        // we may A/V on a null this. Both of these suggest we need a real prolog and epilog.
+        PROLOG_PUSH {r1-r2}
+
+        // r12 currently holds the indirection cell address. We need to get the cache structure instead.
+        ldr         r2, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        // Load the EEType from the object instance in r0.
+        ldr         r1, [r0]
+
+        CurrentOffset = OFFSETOF__InterfaceDispatchCache__m_rgEntries
+        // For each entry in the cache, see if its EEType type matches the EEType in r1.
+        // If so, call the second cache entry.  If not, skip the InterfaceDispatchCacheEntry.
+        //  R1 : Instance EEType*
+        //  R2: Cache data structure
+        //  R12 : Trashed. On succesful check, set to the target address to jump to.
+        .rept \entries
+              ldr        r12, [r2, #CurrentOffset]
+              cmp        r1, r12
+              bne        0f
+              ldr        r12, [r2, #(CurrentOffset + 4)]
+              b          LOCAL_LABEL(99_\entries)
+        0:
+              CurrentOffset = CurrentOffset + 8
+        .endr
+
+        // Point r12 to the indirection cell using the back pointer in the cache block
+        ldr         r12, [r2, #OFFSETOF__InterfaceDispatchCache__m_pCell]
+
+        EPILOG_POP  {r1-r2}
+        b           C_FUNC(RhpInterfaceDispatchSlow)
+
+        // Common epilog for cache hits. Have to out of line it here due to limitation on the number of
+        // epilogs imposed by the unwind code macros.
+LOCAL_LABEL(99_\entries):
+        // R2 contains address of the cache block. We store it in the red zone in case the target we jump
+        // to needs it.
+        // R12 contains the target address to jump to
+        EPILOG_POP  {r1}
+        // The red zone is only 8 bytes long, so we have to store r2 into it between the pops.
+        str         r2, [sp, #-4]
+        EPILOG_POP  {r2}
+        EPILOG_BRANCH_REG r12
+
+LEAF_END RhpInterfaceDispatch\entries, _TEXT
+
+.endm // DEFINE_INTERFACE_DISPATCH_STUB
+
+// Define all the stub routines we currently need.
+//
+// The mrt100dbi requires these be exported to identify mrt100 code that dispatches back into managed.
+// If you change or add any new dispatch stubs, please also change slr.def and dbi\process.cpp CordbProcess::GetExportStepInfo
+//
+DEFINE_INTERFACE_DISPATCH_STUB 1
+DEFINE_INTERFACE_DISPATCH_STUB 2
+DEFINE_INTERFACE_DISPATCH_STUB 4
+DEFINE_INTERFACE_DISPATCH_STUB 8
+DEFINE_INTERFACE_DISPATCH_STUB 16
+DEFINE_INTERFACE_DISPATCH_STUB 32
+DEFINE_INTERFACE_DISPATCH_STUB 64
+
+// Stub dispatch routine for dispatch to a vtable slot
+LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT
+        // On input we have the indirection cell data structure in r12. But we need more scratch registers and
+        // we may A/V on a null this. Both of these suggest we need a real prolog and epilog.
+        PROLOG_PUSH {r1}
+
+        // r12 currently holds the indirection cell address. We need to update it to point to the vtable
+        // offset instead.
+        ldr         r12, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        // Load the EEType from the object instance in r0.
+        ldr         r1, [r0]
+
+        // add the vtable offset to the EEType pointer
+        add         r12, r1, r12
+
+        // Load the target address of the vtable into r12
+        ldr         r12, [r12]
+
+        EPILOG_POP  {r1}
+        EPILOG_BRANCH_REG r12
+LEAF_END RhpVTableOffsetDispatch, _TEXT
+
+// Initial dispatch on an interface when we don't have a cache yet.
+LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT
+        // The stub that jumped here pushed r12, which contains the interface dispatch cell
+        // we need to pop it here
+        pop         { r12 }
+
+        // Just tail call to the cache miss helper.
+        b           C_FUNC(RhpInterfaceDispatchSlow)
+LEAF_END RhpInitialInterfaceDispatch, _TEXT
+
+// No as alternate entry due to missed thumb bit in this case
+// See https://github.com/dotnet/runtime/issues/8608
+LEAF_ENTRY RhpInitialDynamicInterfaceDispatch, _TEXT
+        // Just tail call to the cache miss helper.
+        b           C_FUNC(RhpInterfaceDispatchSlow)
+LEAF_END RhpInitialDynamicInterfaceDispatch, _TEXT
+
+// Cache miss case, call the runtime to resolve the target and update the cache.
+// Use universal transition helper to allow an exception to flow out of resolution
+LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT
+        // r12 has the interface dispatch cell address in it.
+        // The calling convention of the universal thunk is that the parameter
+        // for the universal thunk target is to be placed in sp-8
+        // and the universal thunk target address is to be placed in sp-4
+        str         r12, [sp, #-8]
+        ldr         r12, =C_FUNC(RhpCidResolve)
+        str         r12, [sp, #-4]
+
+        // jump to universal transition thunk
+        b           C_FUNC(RhpUniversalTransition_DebugStepTailCall)
+LEAF_END RhpInterfaceDispatchSlow, _TEXT
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/StubDispatch.asm b/src/coreclr/src/nativeaot/Runtime/arm/StubDispatch.asm
new file mode 100644
index 0000000000000..600d2776e19e9
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/StubDispatch.asm
@@ -0,0 +1,141 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+    EXTERN RhpCidResolve
+    EXTERN RhpUniversalTransition_DebugStepTailCall
+
+    ;; Macro that generates code to check a single cache entry.
+    MACRO
+        CHECK_CACHE_ENTRY $entry
+        ;; Check a single entry in the cache.
+        ;;  R1 : Instance EEType*
+        ;;  R2: Cache data structure
+        ;;  R12 : Trashed. On succesful check, set to the target address to jump to.
+
+        ldr     r12, [r2, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 8))]
+        cmp     r1, r12
+        bne     %ft0
+        ldr     r12, [r2, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 8) + 4)]
+        b       %fa99
+0
+    MEND
+
+
+;; Macro that generates a stub consuming a cache with the given number of entries.
+    GBLS StubName
+
+    MACRO
+        DEFINE_INTERFACE_DISPATCH_STUB $entries
+
+StubName    SETS    "RhpInterfaceDispatch$entries"
+
+    NESTED_ENTRY $StubName
+        ;; On input we have the indirection cell data structure in r12. But we need more scratch registers and
+        ;; we may A/V on a null this. Both of these suggest we need a real prolog and epilog.
+        PROLOG_PUSH {r1-r2}
+
+        ;; r12 currently holds the indirection cell address. We need to get the cache structure instead.
+        ldr     r2, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        ;; Load the EEType from the object instance in r0.
+        ldr     r1, [r0]
+
+        GBLA CurrentEntry 
+CurrentEntry SETA 0
+    WHILE CurrentEntry < $entries
+        CHECK_CACHE_ENTRY CurrentEntry
+CurrentEntry SETA CurrentEntry + 1
+    WEND
+
+        ;; Point r12 to the indirection cell using the back pointer in the cache block
+        ldr     r12, [r2, #OFFSETOF__InterfaceDispatchCache__m_pCell]
+
+        EPILOG_POP {r1-r2}
+        EPILOG_BRANCH RhpInterfaceDispatchSlow
+
+        ;; Common epilog for cache hits. Have to out of line it here due to limitation on the number of
+        ;; epilogs imposed by the unwind code macros.
+99
+        ;; R2 contains address of the cache block. We store it in the red zone in case the target we jump
+        ;; to needs it.
+        ;; R12 contains the target address to jump to
+        EPILOG_POP r1
+        ;; The red zone is only 8 bytes long, so we have to store r2 into it between the pops.
+        EPILOG_NOP str     r2, [sp, #-4]
+        EPILOG_POP r2
+        EPILOG_BRANCH_REG r12
+
+    NESTED_END $StubName
+
+    MEND
+
+;; Define all the stub routines we currently need.
+        DEFINE_INTERFACE_DISPATCH_STUB 1
+        DEFINE_INTERFACE_DISPATCH_STUB 2
+        DEFINE_INTERFACE_DISPATCH_STUB 4
+        DEFINE_INTERFACE_DISPATCH_STUB 8
+        DEFINE_INTERFACE_DISPATCH_STUB 16
+        DEFINE_INTERFACE_DISPATCH_STUB 32
+        DEFINE_INTERFACE_DISPATCH_STUB 64
+
+
+;; Initial dispatch on an interface when we don't have a cache yet.
+    LEAF_ENTRY RhpInitialInterfaceDispatch
+
+        ;; The stub that jumped here pushed r12, which contains the interface dispatch cell
+        ;; we need to pop it here
+        pop     { r12 }
+
+        ;; Simply tail call the slow dispatch helper.
+        b       RhpInterfaceDispatchSlow
+
+    LEAF_END RhpInitialInterfaceDispatch
+
+    LEAF_ENTRY RhpVTableOffsetDispatch
+        ;; On input we have the indirection cell data structure in r12. But we need more scratch registers and
+        ;; we may A/V on a null this. Both of these suggest we need a real prolog and epilog.
+        PROLOG_PUSH {r1}
+
+        ;; r12 currently holds the indirection cell address. We need to update it to point to the vtable
+        ;; offset instead.
+        ldr     r12, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        ;; Load the EEType from the object instance in r0.
+        ldr     r1, [r0]
+
+        ;; add the vtable offset to the EEType pointer 
+        add     r12, r1, r12
+
+        ;; Load the target address of the vtable into r12
+        ldr     r12, [r12]
+
+        EPILOG_POP {r1}
+        EPILOG_BRANCH_REG r12
+    LEAF_END RhpVTableOffsetDispatch
+
+;; Cache miss case, call the runtime to resolve the target and update the cache.
+    LEAF_ENTRY RhpInterfaceDispatchSlow
+    ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
+        ;; r12 has the interface dispatch cell address in it. 
+        ;; The calling convention of the universal thunk is that the parameter
+        ;; for the universal thunk target is to be placed in sp-8
+        ;; and the universal thunk target address is to be placed in sp-4
+        str     r12, [sp, #-8]
+        ldr     r12, =RhpCidResolve
+        str     r12, [sp, #-4]
+        
+        ;; jump to universal transition thunk
+        b       RhpUniversalTransition_DebugStepTailCall
+    LEAF_END RhpInterfaceDispatchSlow
+
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/ThunkPoolThunks.asm b/src/coreclr/src/nativeaot/Runtime/arm/ThunkPoolThunks.asm
new file mode 100644
index 0000000000000..59086e36681ec
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/ThunkPoolThunks.asm
@@ -0,0 +1,273 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "kxarm.h"
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  STUBS & DATA SECTIONS  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+THUNK_CODESIZE                      equ 0x10    ;; 4-byte mov, 2-byte add, 4-byte str, 4-byte ldr, 2-byte branch
+THUNK_DATASIZE                      equ 0x08    ;; 2 dwords
+
+THUNK_POOL_NUM_THUNKS_PER_PAGE      equ 0xFA    ;; 250 thunks per page
+
+PAGE_SIZE                           equ 0x1000  ;; 4K
+POINTER_SIZE                        equ 0x04
+
+    MACRO 
+        NAMED_READONLY_DATA_SECTION $name, $areaAlias
+        AREA    $areaAlias,DATA,READONLY
+RO$name % 4
+    MEND
+        
+    MACRO 
+        NAMED_READWRITE_DATA_SECTION $name, $areaAlias
+        AREA    $areaAlias,DATA
+RW$name % 4
+    MEND
+
+    MACRO 
+        LOAD_DATA_ADDRESS $groupIndex, $index
+        ALIGN       0x10                        ;; make sure we align to 16-byte boundary for CFG table
+        
+        ;; set r12 to begining of data page : r12 <- pc - (THUNK_CODESIZE * current thunk's index - sizeof(mov+add instructions)) + PAGE_SIZE
+        ;; fix offset of the data           : r12 <- r12 + (THUNK_DATASIZE * current thunk's index)
+        mov.w     r12, PAGE_SIZE + ($groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * $index) - (8 + $groupIndex * THUNK_CODESIZE * 10 + THUNK_CODESIZE * $index)
+        add.n     r12, r12, pc
+    MEND
+
+    MACRO 
+        JUMP_TO_COMMON $groupIndex, $index
+        ;; start                                        : r12 points to the current thunks first data cell in the data page
+        ;; put r12 into the red zone                    : r12 isn't changed
+        ;; set r12 to begining of data page             : r12 <- r12 - (THUNK_DATASIZE * current thunk's index)
+        ;; fix offset to point to last DWROD in page    : r12 <- r11 + PAGE_SIZE - POINTER_SIZE
+        ;; jump to the location pointed at by the last dword in the data page
+        str.w     r12, [sp, #-4]
+        ldr.w     r12, [r12, #(PAGE_SIZE - POINTER_SIZE - ($groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * $index))]
+        bx.n      r12
+    MEND
+
+    MACRO 
+        TenThunks $groupIndex
+        ;; Each thunk will load the address of its corresponding data (from the page that immediately follows)
+        ;; and call a common stub. The address of the common stub is setup by the caller (last dword
+        ;; in the thunks data section) depending on the 'kind' of thunks needed (interop, fat function pointers, etc...)
+        
+        ;; Each data block used by a thunk consists of two dword values:
+        ;;      - Context: some value given to the thunk as context (passed in eax). Example for fat-fptrs: context = generic dictionary
+        ;;      - Target : target code that the thunk eventually jumps to.
+
+        LOAD_DATA_ADDRESS $groupIndex,0
+        JUMP_TO_COMMON    $groupIndex,0
+
+        LOAD_DATA_ADDRESS $groupIndex,1
+        JUMP_TO_COMMON    $groupIndex,1
+
+        LOAD_DATA_ADDRESS $groupIndex,2
+        JUMP_TO_COMMON    $groupIndex,2
+
+        LOAD_DATA_ADDRESS $groupIndex,3
+        JUMP_TO_COMMON    $groupIndex,3
+
+        LOAD_DATA_ADDRESS $groupIndex,4
+        JUMP_TO_COMMON    $groupIndex,4
+
+        LOAD_DATA_ADDRESS $groupIndex,5
+        JUMP_TO_COMMON    $groupIndex,5
+
+        LOAD_DATA_ADDRESS $groupIndex,6
+        JUMP_TO_COMMON    $groupIndex,6
+
+        LOAD_DATA_ADDRESS $groupIndex,7
+        JUMP_TO_COMMON    $groupIndex,7
+
+        LOAD_DATA_ADDRESS $groupIndex,8
+        JUMP_TO_COMMON    $groupIndex,8
+
+        LOAD_DATA_ADDRESS $groupIndex,9
+        JUMP_TO_COMMON    $groupIndex,9
+    MEND
+    
+    MACRO 
+        THUNKS_PAGE_BLOCK
+        
+        TenThunks 0
+        TenThunks 1
+        TenThunks 2
+        TenThunks 3
+        TenThunks 4
+        TenThunks 5 
+        TenThunks 6 
+        TenThunks 7 
+        TenThunks 8 
+        TenThunks 9 
+        TenThunks 10 
+        TenThunks 11 
+        TenThunks 12 
+        TenThunks 13 
+        TenThunks 14 
+        TenThunks 15 
+        TenThunks 16 
+        TenThunks 17 
+        TenThunks 18 
+        TenThunks 19 
+        TenThunks 20
+        TenThunks 21
+        TenThunks 22
+        TenThunks 23
+        TenThunks 24
+    MEND
+
+    ;;
+    ;; The first thunks section should be 64K aligned because it can get
+    ;; mapped multiple  times in memory, and mapping works on allocation
+    ;; granularity boundaries (we don't want to map more than what we need)
+    ;;
+    ;; The easiest way to do so is by having the thunks section at the 
+    ;; first 64K aligned virtual address in the binary. We provide a section
+    ;; layout file to the linker to tell it how to layout the thunks sections
+    ;; that we care about. (ndp\rh\src\runtime\DLLs\app\mrt100_app_sectionlayout.txt)
+    ;;
+    ;; The PE spec says images cannot have gaps between sections (other 
+    ;; than what is required by the section alignment value in the header),
+    ;; therefore we need a couple of padding data sections (otherwise the
+    ;; OS will not load the image).
+    ;;
+
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment0, "|.pad0|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment1, "|.pad1|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment2, "|.pad2|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment3, "|.pad3|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment4, "|.pad4|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment5, "|.pad5|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment6, "|.pad6|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment7, "|.pad7|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment8, "|.pad8|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment9, "|.pad9|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment10, "|.pad10|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment11, "|.pad11|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment12, "|.pad12|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment13, "|.pad13|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment14, "|.pad14|"
+
+    ;;
+    ;; Thunk Stubs
+    ;; NOTE: Keep number of blocks in sync with macro/constant named 'NUM_THUNK_BLOCKS' in:
+    ;;      - ndp\FxCore\src\System.Private.CoreLib\System\Runtime\InteropServices\ThunkPool.cs
+    ;;      - ndp\rh\src\tools\rhbind\zapimage.h
+    ;;
+    LEAF_ENTRY ThunkPool, "|.tks0|"
+        THUNKS_PAGE_BLOCK
+    LEAF_END ThunkPool
+
+    NAMED_READWRITE_DATA_SECTION ThunkData0, "|.tkd0|"
+
+    LEAF_ENTRY ThunkPool1, "|.tks1|"
+        THUNKS_PAGE_BLOCK
+    LEAF_END ThunkPool1
+
+    NAMED_READWRITE_DATA_SECTION ThunkData1, "|.tkd1|"
+
+    LEAF_ENTRY ThunkPool2, "|.tks2|"
+        THUNKS_PAGE_BLOCK
+    LEAF_END ThunkPool2
+
+    NAMED_READWRITE_DATA_SECTION ThunkData2, "|.tkd2|"
+
+    LEAF_ENTRY ThunkPool3, "|.tks3|"
+        THUNKS_PAGE_BLOCK
+    LEAF_END ThunkPool3
+
+    NAMED_READWRITE_DATA_SECTION ThunkData3, "|.tkd3|"
+
+    LEAF_ENTRY ThunkPool4, "|.tks4|"
+        THUNKS_PAGE_BLOCK
+    LEAF_END ThunkPool4
+
+    NAMED_READWRITE_DATA_SECTION ThunkData4, "|.tkd4|"
+
+    LEAF_ENTRY ThunkPool5, "|.tks5|"
+        THUNKS_PAGE_BLOCK
+    LEAF_END ThunkPool5
+
+    NAMED_READWRITE_DATA_SECTION ThunkData5, "|.tkd5|"
+
+    LEAF_ENTRY ThunkPool6, "|.tks6|"
+        THUNKS_PAGE_BLOCK
+    LEAF_END ThunkPool6
+
+    NAMED_READWRITE_DATA_SECTION ThunkData6, "|.tkd6|"
+
+    LEAF_ENTRY ThunkPool7, "|.tks7|"
+        THUNKS_PAGE_BLOCK
+    LEAF_END ThunkPool7
+
+    NAMED_READWRITE_DATA_SECTION ThunkData7, "|.tkd7|"
+    
+    
+    ;;
+    ;; IntPtr RhpGetThunksBase()
+    ;;
+    LEAF_ENTRY RhpGetThunksBase
+        ;; Return the address of the first thunk pool to the caller (this is really the base address)
+        ldr     r0, =ThunkPool
+        sub     r0, r0, #1    
+        bx      lr
+    LEAF_END RhpGetThunksBase
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; General Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+    ;;
+    ;; int RhpGetNumThunksPerBlock()
+    ;;
+    LEAF_ENTRY RhpGetNumThunksPerBlock
+        mov     r0, THUNK_POOL_NUM_THUNKS_PER_PAGE
+        bx      lr
+    LEAF_END RhpGetNumThunksPerBlock
+
+    ;;
+    ;; int RhpGetThunkSize()
+    ;;
+    LEAF_ENTRY RhpGetThunkSize
+        mov     r0, THUNK_CODESIZE
+        bx      lr
+    LEAF_END RhpGetThunkSize
+
+    ;;
+    ;; int RhpGetNumThunkBlocksPerMapping()
+    ;;
+    LEAF_ENTRY RhpGetNumThunkBlocksPerMapping
+        mov     r0, 8
+        bx      lr
+    LEAF_END RhpGetNumThunkBlocksPerMapping
+
+    ;;
+    ;; int RhpGetThunkBlockSize
+    ;;
+    LEAF_ENTRY RhpGetThunkBlockSize
+        mov     r0, PAGE_SIZE * 2
+        bx      lr
+    LEAF_END RhpGetThunkBlockSize
+
+    ;; 
+    ;; IntPtr RhpGetThunkDataBlockAddress(IntPtr thunkStubAddress)
+    ;; 
+    LEAF_ENTRY RhpGetThunkDataBlockAddress
+        mov     r12, PAGE_SIZE - 1
+        bic     r0, r0, r12
+        add     r0, PAGE_SIZE
+        bx      lr
+    LEAF_END RhpGetThunkDataBlockAddress
+
+    ;; 
+    ;; IntPtr RhpGetThunkStubsBlockAddress(IntPtr thunkDataAddress)
+    ;; 
+    LEAF_ENTRY RhpGetThunkStubsBlockAddress
+        mov     r12, PAGE_SIZE - 1
+        bic     r0, r0, r12
+        sub     r0, PAGE_SIZE
+        bx      lr
+    LEAF_END RhpGetThunkStubsBlockAddress
+
+    END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/UniversalTransition.S b/src/coreclr/src/nativeaot/Runtime/arm/UniversalTransition.S
new file mode 100644
index 0000000000000..45c137f9f13d1
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/UniversalTransition.S
@@ -0,0 +1,157 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.syntax unified
+.thumb
+
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+#ifdef FEATURE_DYNAMIC_CODE
+
+#ifdef _DEBUG
+#define TRASH_SAVED_ARGUMENT_REGISTERS
+#endif
+
+#define COUNT_ARG_REGISTERS (4)
+#define INTEGER_REGISTER_SIZE (4)
+#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE)
+
+// Largest return block is 4 doubles
+#define RETURN_BLOCK_SIZE (32) 
+
+#define COUNT_FLOAT_ARG_REGISTERS (8)
+#define FLOAT_REGISTER_SIZE (8)
+#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE)
+
+#define PUSHED_LR_SIZE (4)
+#define PUSHED_R11_SIZE (4)
+
+//
+// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions:
+//
+//      ARGUMENT_REGISTERS_SIZE
+//      RETURN_BLOCK_SIZE
+//      FLOAT_ARG_REGISTERS_SIZE
+//      PUSHED_LR
+//      PUSHED_R11
+
+
+#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_R11_SIZE + PUSHED_LR_SIZE + FLOAT_ARG_REGISTERS_SIZE)
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// RhpUniversalTransition
+//
+// At input to this function, r0-3, d0-7 and the stack may contain any number of arguments.
+//
+// In addition, there are 2 extra arguments passed in the RED ZONE (8 byte negative space
+// off of sp).
+// sp-4 will contain the managed function that is to be called by this transition function
+// sp-8 will contain the pointer sized extra argument to the managed function
+//
+// When invoking the callee:
+//
+//  r0 shall contain a pointer to the TransitionBlock
+//  r1 shall contain the value that was in sp-8 at entry to this function
+//
+// Frame layout is:
+//
+//  {StackPassedArgs}                           ChildSP+078     CallerSP+000
+//  {IntArgRegs (r0-r3) (0x10 bytes)}           ChildSP+068     CallerSP-010
+//  {ReturnBlock (0x20 bytes)}                  ChildSP+048     CallerSP-030
+//   -- The base address of the Return block is the TransitionBlock pointer, the floating point args are
+//      in the neg space of the TransitionBlock pointer.  Note that the callee has knowledge of the exact
+//      layout of all pieces of the frame that lie at or above the pushed floating point registers.
+//  {FpArgRegs (d0-d7) (0x40 bytes)}            ChildSP+008     CallerSP-070
+//  {PushedLR}                                  ChildSP+004     CallerSP-074
+//  {PushedR11}                                 ChildSP+000     CallerSP-078
+//
+// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure
+// must be updated as well.
+//
+// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has
+// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed
+// FpArgRegs.
+//
+// NOTE: The stack walker guarantees that conservative GC reporting will be applied to
+// everything between the base of the ReturnBlock and the top of the StackPassedArgs.
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+.macro UNIVERSAL_TRANSITION FunctionName
+
+NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler
+        // Save argument registers (including floating point) and the return address.
+        // NOTE: While we do that, capture the two arguments in the red zone into r12 and r3.
+        ldr          r12, [sp, #-4]          // Capture first argument from red zone into r12
+        PROLOG_PUSH  "{r3}"                  // Push r3
+        ldr          r3, [sp, #-4]           // Capture second argument from red zone into r3
+        PROLOG_PUSH  "{r0-r2}"               // Push the rest of the registers
+        PROLOG_STACK_ALLOC RETURN_BLOCK_SIZE // Save space a buffer to be used to hold return buffer data.
+        PROLOG_VPUSH {d0-d7}                 // Capture the floating point argument registers
+        PROLOG_PUSH  "{r11,lr}"              // Save caller's frame chain pointer and PC
+
+        // Setup the arguments to the transition thunk.
+        mov         r1, r3
+
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+
+        // Before calling out, trash all of the argument registers except the ones (r0, r1) that
+        // hold outgoing arguments.  All of these registers have been saved to the transition
+        // frame, and the code at the call target is required to use only the transition frame
+        // copies when dispatching this call to the eventual callee.
+
+        ldr         r3, =C_FUNC(RhpFpTrashValues)
+        vldr        d0, [r3, #(0 * 8)]
+        vldr        d1, [r3, #(1 * 8)]
+        vldr        d2, [r3, #(2 * 8)]
+        vldr        d3, [r3, #(3 * 8)]
+        vldr        d4, [r3, #(4 * 8)]
+        vldr        d5, [r3, #(5 * 8)]
+        vldr        d6, [r3, #(6 * 8)]
+        vldr        d7, [r3, #(7 * 8)]
+
+        ldr         r3, =C_FUNC(RhpIntegerTrashValues)
+        ldr         r2, [r3, #(2 * 4)]
+        ldr         r3, [r3, #(3 * 4)]
+
+#endif // TRASH_SAVED_ARGUMENT_REGISTERS
+
+        // Make the ReturnFromUniversalTransition alternate entry 4 byte aligned
+        .balign 4
+        add         r0, sp, #DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK  // First parameter to target function is a pointer to the return block
+        blx         r12
+
+        EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom\FunctionName
+
+        // We cannot make the label public as that tricks DIA stackwalker into thinking
+        // it's the beginning of a method. For this reason we export an auxiliary variable
+        // holding the address instead.
+
+        // Move the result (the target address) to r12 so it doesn't get overridden when we restore the
+        // argument registers. Additionally make sure the thumb2 bit is set.
+        orr	         r12, r0, #1
+
+        // Restore caller's frame chain pointer and PC.
+        EPILOG_POP   "{r11,lr}"
+
+        // Restore the argument registers.
+        EPILOG_VPOP  {d0-d7}
+        EPILOG_STACK_FREE RETURN_BLOCK_SIZE        // pop return block conservatively reported area
+        EPILOG_POP   "{r0-r3}"
+
+        // Tailcall to the target address.
+        EPILOG_BRANCH_REG r12
+
+NESTED_END Rhp\FunctionName, _TEXT
+
+.endm
+
+// To enable proper step-in behavior in the debugger, we need to have two instances
+// of the thunk. For the first one, the debugger steps into the call in the function,
+// for the other, it steps over it.
+UNIVERSAL_TRANSITION UniversalTransition
+UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall
+
+#endif // FEATURE_DYNAMIC_CODE
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/UniversalTransition.asm b/src/coreclr/src/nativeaot/Runtime/arm/UniversalTransition.asm
new file mode 100644
index 0000000000000..db01d02eb0c2f
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/UniversalTransition.asm
@@ -0,0 +1,157 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+#ifdef _DEBUG
+#define TRASH_SAVED_ARGUMENT_REGISTERS
+#endif
+
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+        EXTERN RhpIntegerTrashValues
+        EXTERN RhpFpTrashValues
+#endif ;; TRASH_SAVED_ARGUMENT_REGISTERS
+
+#define COUNT_ARG_REGISTERS (4)
+#define INTEGER_REGISTER_SIZE (4)
+#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE)
+
+;; Largest return block is 4 doubles
+#define RETURN_BLOCK_SIZE (32) 
+
+#define COUNT_FLOAT_ARG_REGISTERS (8)
+#define FLOAT_REGISTER_SIZE (8)
+#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE)
+
+#define PUSHED_LR_SIZE (4)
+#define PUSHED_R11_SIZE (4)
+
+;;
+;; From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions:
+;;
+;;      ARGUMENT_REGISTERS_SIZE
+;;      RETURN_BLOCK_SIZE
+;;      FLOAT_ARG_REGISTERS_SIZE
+;;      PUSHED_LR
+;;      PUSHED_R11
+;;
+
+#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_R11_SIZE + PUSHED_LR_SIZE + FLOAT_ARG_REGISTERS_SIZE)
+
+        TEXTAREA
+
+;;
+;; RhpUniversalTransition
+;; 
+;; At input to this function, r0-3, d0-7 and the stack may contain any number of arguments.
+;;
+;; In addition, there are 2 extra arguments passed in the RED ZONE (8 byte negative space
+;; off of sp).
+;; sp-4 will contain the managed function that is to be called by this transition function
+;; sp-8 will contain the pointer sized extra argument to the managed function
+;;
+;; When invoking the callee:
+;;
+;;  r0 shall contain a pointer to the TransitionBlock
+;;  r1 shall contain the value that was in sp-8 at entry to this function
+;;
+;; Frame layout is:
+;;
+;;  {StackPassedArgs}                           ChildSP+078     CallerSP+000
+;;  {IntArgRegs (r0-r3) (0x10 bytes)}           ChildSP+068     CallerSP-010
+;;  {ReturnBlock (0x20 bytes)}                  ChildSP+048     CallerSP-030
+;;   -- The base address of the Return block is the TransitionBlock pointer, the floating point args are
+;;      in the neg space of the TransitionBlock pointer.  Note that the callee has knowledge of the exact
+;;      layout of all pieces of the frame that lie at or above the pushed floating point registers.
+;;  {FpArgRegs (d0-d7) (0x40 bytes)}            ChildSP+008     CallerSP-070
+;;  {PushedLR}                                  ChildSP+004     CallerSP-074
+;;  {PushedR11}                                 ChildSP+000     CallerSP-078
+;;
+;; NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure
+;; must be updated as well.
+;;
+;; NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has
+;; knowledge of the exact layout of all pieces of the frame that lie at or above the pushed
+;; FpArgRegs.
+;;
+;; NOTE: The stack walker guarantees that conservative GC reporting will be applied to
+;; everything between the base of the ReturnBlock and the top of the StackPassedArgs.
+;;
+
+        MACRO 
+        UNIVERSAL_TRANSITION $FunctionName
+
+        NESTED_ENTRY Rhp$FunctionName
+        ;; Save argument registers (including floating point) and the return address.
+        ;; NOTE: While we do that, capture the two arguments in the red zone into r12 and r3.
+        PROLOG_NOP  ldr r12, [sp, #-4]              ; Capture first argument from red zone into r12
+        PROLOG_PUSH {r3}                            ; Push r3
+        PROLOG_NOP  ldr r3, [sp, #-4]               ; Capture second argument from red zone into r3
+        PROLOG_PUSH {r0-r2}                         ; Push the rest of the registers
+        PROLOG_STACK_ALLOC RETURN_BLOCK_SIZE        ; Save space a buffer to be used to hold return buffer data.
+        PROLOG_VPUSH {d0-d7}                        ; Capture the floating point argument registers
+        PROLOG_PUSH {r11,lr}                        ; Save caller's frame chain pointer and PC
+
+        ;; Setup the arguments to the transition thunk.
+        mov         r1, r3
+
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+
+        ;; Before calling out, trash all of the argument registers except the ones (r0, r1) that
+        ;; hold outgoing arguments.  All of these registers have been saved to the transition
+        ;; frame, and the code at the call target is required to use only the transition frame
+        ;; copies when dispatching this call to the eventual callee.
+
+        ldr         r3, =RhpFpTrashValues
+        vldr        d0, [r3, #(0 * 8)]
+        vldr        d1, [r3, #(1 * 8)]
+        vldr        d2, [r3, #(2 * 8)]
+        vldr        d3, [r3, #(3 * 8)]
+        vldr        d4, [r3, #(4 * 8)]
+        vldr        d5, [r3, #(5 * 8)]
+        vldr        d6, [r3, #(6 * 8)]
+        vldr        d7, [r3, #(7 * 8)]
+
+        ldr         r3, =RhpIntegerTrashValues
+        ldr         r2, [r3, #(2 * 4)]
+        ldr         r3, [r3, #(3 * 4)]
+
+#endif // TRASH_SAVED_ARGUMENT_REGISTERS
+
+        ;; Make the ReturnFromUniversalTransition alternate entry 4 byte aligned
+        ALIGN 4
+        add         r0, sp, #DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK  ;; First parameter to target function is a pointer to the return block
+        blx         r12
+
+        EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom$FunctionName
+
+        ; We cannot make the label public as that tricks DIA stackwalker into thinking
+        ; it's the beginning of a method. For this reason we export an auxiliary variable
+        ; holding the address instead.
+
+        ;; Move the result (the target address) to r12 so it doesn't get overridden when we restore the
+        ;; argument registers. Additionally make sure the thumb2 bit is set.
+        orr     r12, r0, #1
+
+        ;; Restore caller's frame chain pointer and PC.
+        EPILOG_POP {r11,lr}
+
+        ;; Restore the argument registers.
+        EPILOG_VPOP {d0-d7}
+        EPILOG_STACK_FREE RETURN_BLOCK_SIZE        ; pop return block conservatively reported area
+        EPILOG_POP {r0-r3}
+
+        ;; Tailcall to the target address.
+        EPILOG_BRANCH_REG r12
+
+        NESTED_END Rhp$FunctionName
+
+        MEND
+
+        ; To enable proper step-in behavior in the debugger, we need to have two instances
+        ; of the thunk. For the first one, the debugger steps into the call in the function, 
+        ; for the other, it steps over it.
+        UNIVERSAL_TRANSITION UniversalTransition
+        UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall
+
+        END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/WriteBarriers.S b/src/coreclr/src/nativeaot/Runtime/arm/WriteBarriers.S
new file mode 100644
index 0000000000000..e69839ed92ade
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/WriteBarriers.S
@@ -0,0 +1,382 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.syntax unified
+.thumb
+
+#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
+#include <unixasmmacros.inc>
+
+#ifdef WRITE_BARRIER_CHECK
+
+.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG
+
+          // If g_GCShadow is 0, don't perform the check.
+          ldr          r12, =C_FUNC(g_GCShadow)
+          ldr          r12, [r12]
+          cbz          r12, LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG)
+
+          // Save DESTREG since we're about to modify it (and we need the original value both within the macro and
+          // once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of
+          // the prolog inside a method without a frame. But given that this is only debug code and generally we
+          // shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier
+          // variants to set up frames. The compiler knows exactly which registers are trashed in the simple write
+          // barrier case, so we don't have any more scratch registers to play with (and doing so would only make
+          // things harder if at a later stage we want to allow multiple barrier versions based on the input
+          // registers).
+          push         \DESTREG
+
+          // Transform DESTREG into the equivalent address in the shadow heap.
+          ldr          r12, =C_FUNC(g_lowest_address)
+          ldr          r12, [r12]
+          sub          \DESTREG, r12
+          cmp          \DESTREG, #0
+          blo          LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG)
+          ldr          r12, =C_FUNC(g_GCShadow)
+          ldr          r12, [r12]
+          add          \DESTREG, r12
+          ldr          r12, =C_FUNC(g_GCShadowEnd)
+          ldr          r12, [r12]
+          cmp          \DESTREG, r12
+          jhi          LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG)
+
+          // Update the shadow heap.
+          str          \REFREG, [\DESTREG]
+
+          // The following read must be strongly ordered wrt to the write we've just performed in order to
+          // prevent race conditions.
+          dmb
+
+          // Now check that the real heap location still contains the value we just wrote into the shadow heap.
+          mov          r12, \DESTREG
+          ldr          \DESTREG, [sp]
+          str          r12, [sp]
+          ldr          r12, [\DESTREG]
+          cmp          r12, \REFREG
+          bne          LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG)
+
+          // The original DESTREG value is now restored but the stack has a value (the shadow version of the
+          // location) pushed. Need to discard this push before we are done.
+          add          sp, #4
+          b            LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG)
+
+LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Invalidate_\REFREG):
+          // Someone went and updated the real heap. We need to invalidate the shadow location since we can't
+          // guarantee whose shadow update won.
+
+          // Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an
+          // additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg
+          // variant that doesn't implicitly specify the lock prefix. Note that INVALIDGCVALUE is a 32-bit
+          // immediate and therefore must be moved into a register before it can be written to the shadow
+          // location.
+          mov          r12, \DESTREG
+          ldr          \DESTREG, [sp]
+          str          r12, [sp]
+          push         \REFREG
+          movw         \REFREG, #(INVALIDGCVALUE & 0xFFFF)
+          movt         \REFREG, #(INVALIDGCVALUE >> 16)
+          str          \REFREG, [\DESTREG]
+          pop          \REFREG
+
+LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_PopThenDone_\REFREG):
+          // Restore original DESTREG value from the stack.
+          pop          \DESTREG
+
+LOCAL_LABEL(\BASENAME\()_UpdateShadowHeap_Done_\REFREG):
+
+.endm
+
+#else  // WRITE_BARRIER_CHECK
+
+.macro UPDATE_GC_SHADOW BASENAME, REFREG, DESTREG
+.endm
+
+#endif // WRITE_BARRIER_CHECK
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+// name of the register that points to the location to be updated and the name of the register that holds the
+// object reference (this should be in upper case as it's used in the definition of the name of the helper).
+.macro DEFINE_UNCHECKED_WRITE_BARRIER_CORE BASENAME, REFREG
+
+          // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+          // we're in a debug build and write barrier checking has been enabled).
+          UPDATE_GC_SHADOW \BASENAME, \REFREG, r0
+
+          // If the reference is to an object that's not in an ephemeral generation we have no need to track it
+          // (since the object won't be collected or moved by an ephemeral collection).
+          ldr	         r12, =C_FUNC(g_ephemeral_low)
+          ldr          r12, [r12]
+          cmp          \REFREG, r12
+          blo          LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG)
+
+          ldr	         r12, =C_FUNC(g_ephemeral_high)
+          ldr          r12, [r12]
+          cmp	         \REFREG, r12
+          bhi          LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG)
+
+          // We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+          // track this write. The location address is translated into an offset in the card table bitmap. We set
+          // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+          // the byte if it hasn't already been done since writes are expensive and impact scaling.
+          ldr          r12, =C_FUNC(g_card_table)
+          ldr          r12, [r12]
+          add          r0, r12, r0, lsr #LOG2_CLUMP_SIZE
+          ldrb         r12, [r0]
+          cmp          r12, #0x0FF
+          bne          LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG)
+
+LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG):
+          b            LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG)
+
+// We get here if it's necessary to update the card table.
+LOCAL_LABEL(\BASENAME\()_UpdateCardTable_\REFREG):
+          mov          r12, #0x0FF
+          strb         r12, [r0]
+
+LOCAL_LABEL(\BASENAME\()_EXIT_\REFREG):
+
+.endm
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. One argument is taken, the
+// name of the register that will hold the object reference (this should be in upper case as it's used in the
+// definition of the name of the helper).
+.macro DEFINE_UNCHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME
+
+// Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard
+// decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that
+// location is in one of the other general registers determined by the value of REFREG.
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at WriteBarrierFunctionAvLOC
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpAssignRef\EXPORT_REG_NAME, _TEXT
+
+// Export the canonical write barrier under unqualified name as well
+.ifc \REFREG, r1
+ALTERNATE_ENTRY RhpAssignRef
+.endif
+
+          // Use the GC write barrier as a convenient place to implement the managed memory model for ARM. The
+          // intent is that writes to the target object ($REFREG) will be visible across all CPUs before the
+          // write to the destination ($DESTREG). This covers most of the common scenarios where the programmer
+          // might assume strongly ordered accessess, namely where the preceding writes are used to initialize
+          // the object and the final write, made by this barrier in the instruction following the DMB,
+          // publishes that object for other threads/cpus to see.
+          //
+          // Note that none of this is relevant for single cpu machines. We may choose to implement a
+          // uniprocessor specific version of this barrier if uni-proc becomes a significant scenario again.
+	        dmb
+
+          // Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+          // and the card table update we may perform below.
+ALTERNATE_ENTRY	"RhpAssignRefAvLocation"\EXPORT_REG_NAME  // WriteBarrierFunctionAvLocation
+.ifc \REFREG, r1
+ALTERNATE_ENTRY RhpAssignRefAVLocation
+.endif
+          str          \REFREG, [r0]
+
+          DEFINE_UNCHECKED_WRITE_BARRIER_CORE RhpAssignRef, \REFREG
+
+          bx           lr
+LEAF_END RhpAssignRef\EXPORT_REG_NAME, _TEXT
+.endm
+
+// One day we might have write barriers for all the possible argument registers but for now we have
+// just one write barrier that assumes the input register is RSI.
+DEFINE_UNCHECKED_WRITE_BARRIER r1, r1
+
+//
+// Define the helpers used to implement the write barrier required when writing an object reference into a
+// location residing on the GC heap. Such write barriers allow the GC to optimize which objects in
+// non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral
+// collection.
+//
+
+.macro DEFINE_CHECKED_WRITE_BARRIER_CORE BASENAME, REFREG
+
+          // The location being updated might not even lie in the GC heap (a handle or stack location for instance),
+          // in which case no write barrier is required.
+          ldr	         r12, =C_FUNC(g_lowest_address)
+          ldr          r12, [r12]
+          cmp          r0, r12
+          blo          LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+          ldr	         r12, =C_FUNC(g_highest_address)
+          ldr          r12, [r12]
+          cmp          r0, r12
+          bhi          LOCAL_LABEL(\BASENAME\()_NoBarrierRequired_\REFREG)
+
+          DEFINE_UNCHECKED_WRITE_BARRIER_CORE \BASENAME, \REFREG
+
+.endm
+
+// There are several different helpers used depending on which register holds the object reference. Since all
+// the helpers have identical structure we use a macro to define this structure. One argument is taken, the
+// name of the register that will hold the object reference (this should be in upper case as it's used in the
+// definition of the name of the helper).
+.macro DEFINE_CHECKED_WRITE_BARRIER REFREG, EXPORT_REG_NAME
+
+// Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard
+// decoration). The location to be updated is always in R0. The object reference that will be assigned into
+// that location is in one of the other general registers determined by the value of REFREG.
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+LEAF_ENTRY RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT
+
+// Export the canonical write barrier under unqualified name as well
+.ifc \REFREG, r1
+ALTERNATE_ENTRY RhpCheckedAssignRef
+.endif
+
+          // Use the GC write barrier as a convenient place to implement the managed memory model for ARM. The
+          // intent is that writes to the target object ($REFREG) will be visible across all CPUs before the
+          // write to the destination ($DESTREG). This covers most of the common scenarios where the programmer
+          // might assume strongly ordered accessess, namely where the preceding writes are used to initialize
+          // the object and the final write, made by this barrier in the instruction following the DMB,
+          // publishes that object for other threads/cpus to see.
+          //
+          // Note that none of this is relevant for single cpu machines. We may choose to implement a
+          // uniprocessor specific version of this barrier if uni-proc becomes a significant scenario again.
+          dmb
+          // Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+          // and the card table update we may perform below.
+ALTERNATE_ENTRY "RhpCheckedAssignRefAvLocation"\EXPORT_REG_NAME // WriteBarrierFunctionAvLocation
+.ifc \REFREG, r1
+ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+.endif
+          str          \REFREG, [r0]
+
+          DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, \REFREG
+
+          bx           lr
+LEAF_END RhpCheckedAssignRef\EXPORT_REG_NAME, _TEXT
+.endm
+
+// One day we might have write barriers for all the possible argument registers but for now we have
+// just one write barrier that assumes the input register is RSI.
+DEFINE_CHECKED_WRITE_BARRIER r1, r1
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+// r0 = destination address
+// r1 = value
+// r2 = comparand
+LEAF_ENTRY RhpCheckedLockCmpXchg, _TEXT
+          // To implement our chosen memory model for ARM we insert a memory barrier at GC write brriers. This
+          // barrier must occur before the object reference update, so we have to do it unconditionally even
+          // though the update may fail below.
+          dmb
+ALTERNATE_ENTRY 	RhpCheckedLockCmpXchgAVLocation
+LOCAL_LABEL(RhpCheckedLockCmpXchgRetry):
+          ldrex        r3, [r0]
+          cmp          r2, r3
+          bne          LOCAL_LABEL(RhpCheckedLockCmpXchg_NoBarrierRequired_r1)
+          strex        r3, r1, [r0]
+          cmp          r3, #0
+          bne          LOCAL_LABEL(RhpCheckedLockCmpXchgRetry)
+          mov          r3, r2
+
+          DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, r1
+
+          mov          r0, r3
+          bx           lr
+LEAF_END RhpCheckedLockCmpXchg, _TEXT
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedXchgAVLocation
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+// r0 = destination address
+// r1 = value
+LEAF_ENTRY RhpCheckedXchg, _TEXT
+          // To implement our chosen memory model for ARM we insert a memory barrier at GC write barriers. This
+          // barrier must occur before the object reference update.
+          dmb
+ALTERNATE_ENTRY RhpCheckedXchgAVLocation
+LOCAL_LABEL(RhpCheckedXchgRetry):
+          ldrex        r2, [r0]
+          strex        r3, r1, [r0]
+          cmp          r3, #0
+          bne          LOCAL_LABEL(RhpCheckedXchgRetry)
+
+          DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, r1
+
+          // The original value is currently in r2. We need to return it in r0.
+          mov          r0, r2
+
+          bx           lr
+LEAF_END RhpCheckedXchg, _TEXT
+
+//
+// RhpByRefAssignRef simulates movs instruction for object references.
+//
+// On entry:
+//      r0: address of ref-field (assigned to)
+//      r1: address of the data (source)
+//      r2, r3: be trashed
+//
+// On exit:
+//      r0, r1 are incremented by 4,
+//      r2, r3: trashed
+//
+LEAF_ENTRY RhpByRefAssignRef, _TEXT
+          ldr          r2, [r1]
+          str          r2, [r0]
+
+          // Check whether the writes were even into the heap. If not there's no card update required.
+          ldr          r3, =C_FUNC(g_lowest_address)
+          ldr          r3, [r3]
+          cmp          r0, r3
+          blo          LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
+          ldr          r3, =C_FUNC(g_highest_address)
+          ldr          r3, [r3]
+          cmp          r0, r3
+          bhi          LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
+
+          // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+          // we're in a debug build and write barrier checking has been enabled).
+          UPDATE_GC_SHADOW BASENAME, r2, r0
+
+          // If the reference is to an object that's not in an ephemeral generation we have no need to track it
+          // (since the object won't be collected or moved by an ephemeral collection).
+          ldr          r3, =C_FUNC(g_ephemeral_low)
+          ldr          r3, [r3]
+          cmp          r2, r3
+          blo          LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
+          ldr          r3, =C_FUNC(g_ephemeral_high)
+          ldr          r3, [r3]
+          cmp          r2, r3
+          bhi          LOCAL_LABEL(RhpByRefAssignRef_NotInHeap)
+
+          // move current r0 value into r2 and then increment the pointers
+          mov          r2, r0
+          add          r1, #4
+          add          r0, #4
+
+          // We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+          // track this write. The location address is translated into an offset in the card table bitmap. We set
+          // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+          // the byte if it hasn't already been done since writes are expensive and impact scaling.
+          ldr           r3, =C_FUNC(g_card_table)
+          ldr           r3, [r3]
+          add           r2, r3, r2, lsr #LOG2_CLUMP_SIZE
+          ldrb          r3, [r2]
+          cmp           r3, #0x0FF
+          bne           LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable)
+          bx            lr
+
+// We get here if it's necessary to update the card table.
+LOCAL_LABEL(RhpByRefAssignRef_UpdateCardTable):
+          mov           r3, #0x0FF
+          strb          r3, [r2]
+          bx            lr
+
+LOCAL_LABEL(RhpByRefAssignRef_NotInHeap):
+          // Increment the pointers before leaving
+          add           r0, #4
+          add           r1, #4
+          bx            lr
+LEAF_END RhpByRefAssignRef, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/arm/WriteBarriers.asm b/src/coreclr/src/nativeaot/Runtime/arm/WriteBarriers.asm
new file mode 100644
index 0000000000000..48b3ab83d6562
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm/WriteBarriers.asm
@@ -0,0 +1,421 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+;;
+;; Define the helpers used to implement the write barrier required when writing an object reference into a
+;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in
+;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral
+;; collection.
+;;
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used
+;; during garbage collections to verify that object references where never written to the heap without using a
+;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing
+;; new references to the real heap. Since this can't be solved perfectly without critical sections around the
+;; entire update process, we instead update the shadow location and then re-check the real location (as two
+;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value
+;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC
+;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the
+;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE.
+#ifdef WRITE_BARRIER_CHECK  
+
+    SETALIAS    g_GCShadow, ?g_GCShadow@@3PAEA
+    SETALIAS    g_GCShadowEnd, ?g_GCShadowEnd@@3PAEA
+
+    EXTERN      $g_GCShadow
+    EXTERN      $g_GCShadowEnd
+
+    MACRO
+        ;; On entry:
+        ;;  $DESTREG: location to be updated
+        ;;  $REFREG: objectref to be stored
+        ;;
+        ;; On exit:
+        ;;  r12: trashed
+        ;;  other registers are preserved
+        ;;
+        UPDATE_GC_SHADOW $DESTREG, $REFREG
+
+        ;; If g_GCShadow is 0, don't perform the check.
+        ldr     r12, =$g_GCShadow
+        ldr     r12, [r12]
+        cmp     r12, 0
+        beq     %ft1
+
+        ;; Save $DESTREG since we're about to modify it (and we need the original value both within the macro and
+        ;; once we exit the macro). Note that this is naughty since we're altering the stack pointer outside of
+        ;; the prolog inside a method without a frame. But given that this is only debug code and generally we
+        ;; shouldn't be walking the stack at this point it seems preferable to recoding the all the barrier
+        ;; variants to set up frames. The compiler knows exactly which registers are trashed in the simple write
+        ;; barrier case, so we don't have any more scratch registers to play with (and doing so would only make
+        ;; things harder if at a later stage we want to allow multiple barrier versions based on the input
+        ;; registers).
+        push    $DESTREG
+
+        ;; Transform $DESTREG into the equivalent address in the shadow heap.
+        ldr     r12, =$G_LOWEST_ADDRESS
+        ldr     r12, [r12]
+        subs    $DESTREG, r12
+        blo     %ft0
+
+        ldr     r12, =$g_GCShadow
+        ldr     r12, [r12]
+        add     $DESTREG, r12
+
+        ldr     r12, =$g_GCShadowEnd
+        ldr     r12, [r12]
+        cmp     $DESTREG, r12
+        bhs     %ft0
+
+        ;; Update the shadow heap.
+        str     $REFREG, [$DESTREG]
+
+        ;; The following read must be strongly ordered wrt to the write we've just performed in order to
+        ;; prevent race conditions.
+        dmb
+
+        ;; Now check that the real heap location still contains the value we just wrote into the shadow heap.
+        ldr     r12, [sp]
+        ldr     r12, [r12]
+        cmp     r12, $REFREG
+        beq     %ft0
+
+        ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't
+        ;; guarantee whose shadow update won.
+        movw    r12, #0xcccd
+        movt    r12, #0xcccc
+        str     r12, [$DESTREG]
+
+0
+        ;; Restore original $DESTREG value from the stack.
+        pop     $DESTREG
+
+1
+    MEND
+
+#else // WRITE_BARRIER_CHECK
+
+    MACRO
+        UPDATE_GC_SHADOW $DESTREG, $REFREG
+    MEND
+
+#endif // WRITE_BARRIER_CHECK
+
+;; There are several different helpers used depending on which register holds the object reference. Since all
+;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+;; name of the register that points to the location to be updated and the name of the register that holds the
+;; object reference (this should be in upper case as it's used in the definition of the name of the helper).
+
+;; Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for
+;; some interlocked helpers that need an inline barrier.
+    MACRO
+        ;; On entry:
+        ;;  $DESTREG: location to be updated
+        ;;  $REFREG: objectref to be stored
+        ;;
+        ;; On exit:
+        ;;  $DESTREG, r12: trashed
+        ;;  other registers are preserved
+        ;;
+        INSERT_CHECKED_WRITE_BARRIER_CORE  $DESTREG, $REFREG
+
+        ;; The location being updated might not even lie in the GC heap (a handle or stack location for
+        ;; instance), in which case no write barrier is required.
+        ldr     r12, =$G_LOWEST_ADDRESS
+        ldr     r12, [r12]
+        cmp     $DESTREG, r12
+        blo     %ft0
+        ldr     r12, =$G_HIGHEST_ADDRESS
+        ldr     r12, [r12]
+        cmp     $DESTREG, r12
+        bhs     %ft0
+
+        ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+        ;; we're in a debug build and write barrier checking has been enabled).
+        UPDATE_GC_SHADOW $DESTREG, $REFREG
+
+        ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
+        ;; (since the object won't be collected or moved by an ephemeral collection).
+        ldr     r12, =$G_EPHEMERAL_LOW
+        ldr     r12, [r12]
+        cmp     $REFREG, r12
+        blo     %ft0
+        ldr     r12, =$G_EPHEMERAL_HIGH
+        ldr     r12, [r12]
+        cmp     $REFREG, r12
+        bhs     %ft0
+
+        ;; All tests pass, so update the card table.
+        ldr     r12, =$G_CARD_TABLE
+        ldr     r12, [r12]
+        add     r12, r12, $DESTREG, lsr #10
+
+        ;; Check that this card hasn't already been written. Avoiding useless writes is a big win on
+        ;; multi-proc systems since it avoids cache thrashing.
+        ldrb    $DESTREG, [r12]
+        cmp     $DESTREG, #0xFF
+        beq     %ft0
+        mov     $DESTREG, #0xFF
+        strb    $DESTREG, [r12]
+
+0
+
+    MEND
+
+;; There are several different helpers used depending on which register holds the object reference. Since all
+;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+;; name of the register that points to the location to be updated and the name of the register that holds the
+;; object reference (this should be in upper case as it's used in the definition of the name of the helper).
+
+;; Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for
+;; some interlocked helpers that need an inline barrier.
+    MACRO
+        ;; On entry:
+        ;;  $DESTREG: location to be updated
+        ;;  $REFREG: objectref to be stored
+        ;;
+        ;; On exit:
+        ;;  $DESTREG, r12: trashed
+        ;;  other registers are preserved
+        ;;
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE  $DESTREG, $REFREG
+
+        ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+        ;; we're in a debug build and write barrier checking has been enabled).
+        UPDATE_GC_SHADOW $DESTREG, $REFREG
+
+        ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
+        ;; (since the object won't be collected or moved by an ephemeral collection).
+        ldr     r12, =$G_EPHEMERAL_LOW
+        ldr     r12, [r12]
+        cmp     $REFREG, r12
+        blo     %ft0
+        ldr     r12, =$G_EPHEMERAL_HIGH
+        ldr     r12, [r12]
+        cmp     $REFREG, r12
+        bhs     %ft0
+
+        ;; All tests pass, so update the card table.
+        ldr     r12, =$G_CARD_TABLE
+        ldr     r12, [r12]
+        add     r12, r12, $DESTREG, lsr #10
+
+        ;; Check that this card hasn't already been written. Avoiding useless writes is a big win on
+        ;; multi-proc systems since it avoids cache thrashing.
+        ldrb    $DESTREG, [r12]
+        cmp     $DESTREG, #0xFF
+        beq     %ft0
+        mov     $DESTREG, #0xFF
+        strb    $DESTREG, [r12]
+
+0
+
+    MEND
+
+    MACRO
+        ;; Define a helper with a name of the form RhpCheckedAssignRefR0 etc. The location to be updated is in
+        ;; $DESTREG. The object reference that will be assigned into that location is in one of the other
+        ;; general registers determined by the value of $REFREG. R12 is used as a scratch register.
+        
+        ;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+        ;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at WriteBarrierFunctionAvLocation
+        ;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address
+
+        DEFINE_CHECKED_WRITE_BARRIER  $DESTREG, $REFREG
+
+        gbls WriteBarrierFunction
+        gbls WriteBarrierFunctionAvLocation
+WriteBarrierFunction SETS "RhpCheckedAssignRef":cc:"$REFREG"
+WriteBarrierFunctionAvLocation SETS "RhpCheckedAssignRefAvLocation":cc:"$REFREG"
+
+        EXPORT $WriteBarrierFunction
+$WriteBarrierFunction
+    
+        ;; Export the canonical write barrier under unqualified name as well
+        IF "$REFREG" == "R1"
+        ALTERNATE_ENTRY RhpCheckedAssignRef
+        ENDIF
+
+        ;; Use the GC write barrier as a convenient place to implement the managed memory model for ARM. The
+        ;; intent is that writes to the target object ($REFREG) will be visible across all CPUs before the
+        ;; write to the destination ($DESTREG). This covers most of the common scenarios where the programmer
+        ;; might assume strongly ordered accessess, namely where the preceding writes are used to initialize
+        ;; the object and the final write, made by this barrier in the instruction following the DMB,
+        ;; publishes that object for other threads/cpus to see.
+        ;;
+        ;; Note that none of this is relevant for single cpu machines. We may choose to implement a
+        ;; uniprocessor specific version of this barrier if uni-proc becomes a significant scenario again.
+        dmb
+
+        ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between
+        ;; here and the card table update we may perform below.
+        ALTERNATE_ENTRY $WriteBarrierFunctionAvLocation
+        IF "$REFREG" == "R1"
+        ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+        ENDIF
+        str     $REFREG, [$DESTREG]
+
+        INSERT_CHECKED_WRITE_BARRIER_CORE $DESTREG, $REFREG
+
+        bx      lr
+
+    MEND
+
+
+    MACRO
+        ;; Define a helper with a name of the form RhpAssignRefR0 etc. The location to be updated is in
+        ;; $DESTREG. The object reference that will be assigned into that location is in one of the other
+        ;; general registers determined by the value of $REFREG. R12 is used as a scratch register.
+        
+        ;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+        ;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at WriteBarrierFunctionAvLocation
+        ;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address
+
+        DEFINE_UNCHECKED_WRITE_BARRIER  $DESTREG, $REFREG
+
+        gbls WriteBarrierFunction
+        gbls WriteBarrierFunctionAvLocation
+WriteBarrierFunction SETS "RhpAssignRef":cc:"$REFREG"
+WriteBarrierFunctionAvLocation SETS "RhpAssignRefAvLocation":cc:"$REFREG"
+
+        ;; Export the canonical write barrier under unqualified name as well
+        IF "$REFREG" == "R1"
+        ALTERNATE_ENTRY RhpAssignRef
+        ENDIF
+
+        EXPORT $WriteBarrierFunction
+$WriteBarrierFunction
+
+        ;; Use the GC write barrier as a convenient place to implement the managed memory model for ARM. The
+        ;; intent is that writes to the target object ($REFREG) will be visible across all CPUs before the
+        ;; write to the destination ($DESTREG). This covers most of the common scenarios where the programmer
+        ;; might assume strongly ordered accessess, namely where the preceding writes are used to initialize
+        ;; the object and the final write, made by this barrier in the instruction following the DMB,
+        ;; publishes that object for other threads/cpus to see.
+        ;;
+        ;; Note that none of this is relevant for single cpu machines. We may choose to implement a
+        ;; uniprocessor specific version of this barrier if uni-proc becomes a significant scenario again.
+        dmb
+
+        ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between
+        ;; here and the card table update we may perform below.
+        ALTERNATE_ENTRY $WriteBarrierFunctionAvLocation
+        IF "$REFREG" == "R1"
+        ALTERNATE_ENTRY RhpAssignRefAVLocation
+        ENDIF
+        str     $REFREG, [$DESTREG]
+
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE $DESTREG, $REFREG
+
+        bx      lr
+
+    MEND
+
+;; One day we might have write barriers for all the possible argument registers but for now we have
+;; just one write barrier that assumes the input register is R1.
+        DEFINE_CHECKED_WRITE_BARRIER R0, R1
+
+        DEFINE_UNCHECKED_WRITE_BARRIER R0, R1
+
+;; Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon
+;; successful updates. 
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation
+;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address
+
+        ;; Interlocked compare exchange on objectref.
+        ;;
+        ;; On entry:
+        ;;  r0: pointer to objectref
+        ;;  r1: exchange value
+        ;;  r2: comparand
+        ;;
+        ;; On exit:
+        ;;  r0: original value of objectref
+        ;;  r1,r2,r3,r12: trashed
+        ;;
+        LEAF_ENTRY RhpCheckedLockCmpXchg
+
+        ;; To implement our chosen memory model for ARM we insert a memory barrier at GC write barriers. This
+        ;; barrier must occur before the object reference update, so we have to do it unconditionally even
+        ;; though the update may fail below.
+        dmb
+
+CX_Retry
+        ;; Check location value is what we expect.
+        ALTERNATE_ENTRY  RhpCheckedLockCmpXchgAVLocation
+        ldrex   r3, [r0]
+        cmp     r2, r3
+        bne     CX_NoUpdate
+
+        ;; Current value matches comparand, attempt to update with the new value.
+        strex   r3, r1, [r0]
+        cmp     r3, #0
+        bne     CX_Retry        ; Retry the operation if another write beat us there
+
+        ;; We've successfully updated the value of the objectref so now we need a GC write barrier.
+        ;; The following barrier code takes the destination in r0 and the value in r1 so the arguments are
+        ;; already correctly set up.
+
+        INSERT_CHECKED_WRITE_BARRIER_CORE r0, r1
+
+        ;; The original value was equal to the comparand which is still in r2 so we can return that.
+        mov     r0, r2
+        bx      lr
+
+CX_NoUpdate
+        ;; Location value didn't match comparand, return that value.
+        mov     r0, r3
+        bx      lr
+
+        LEAF_END RhpCheckedLockCmpXchg
+
+        ;; Interlocked exchange on objectref.
+        ;;
+        ;; On entry:
+        ;;  r0: pointer to objectref
+        ;;  r1: exchange value
+        ;;
+        ;; On exit:
+        ;;  r0: original value of objectref
+        ;;  r1,r2,r3,r12: trashed
+        ;;
+
+        ;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+        ;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation
+        ;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address
+
+        LEAF_ENTRY RhpCheckedXchg
+
+        ;; To implement our chosen memory model for ARM we insert a memory barrier at GC write barriers. This
+        ;; barrier must occur before the object reference update.
+        dmb
+
+X_Retry
+        ALTERNATE_ENTRY  RhpCheckedXchgAVLocation
+        ;; Read the original contents of the location.
+        ldrex   r2, [r0]
+
+        ;; Attempt to update with the new value.
+        strex   r3, r1, [r0]
+        cmp     r3, #0
+        bne     X_Retry        ; Retry the operation if another write beat us there
+
+        ;; We've successfully updated the value of the objectref so now we need a GC write barrier.
+        ;; The following barrier code takes the destination in r0 and the value in r1 so the arguments are
+        ;; already correctly set up.
+
+        INSERT_CHECKED_WRITE_BARRIER_CORE r0, r1
+
+        ;; The original value is currently in r2. We need to return it in r0.
+        mov     r0, r2
+        bx      lr
+
+        LEAF_END RhpCheckedXchg
+
+    end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/AllocFast.S b/src/coreclr/src/nativeaot/Runtime/arm64/AllocFast.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/AllocFast.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/AllocFast.asm b/src/coreclr/src/nativeaot/Runtime/arm64/AllocFast.asm
new file mode 100644
index 0000000000000..6571bda58c401
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/AllocFast.asm
@@ -0,0 +1,290 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+    TEXTAREA
+
+;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+;; allocation context then automatically fallback to the slow allocation path.
+;;  x0 == EEType
+    LEAF_ENTRY RhpNewFast
+
+        ;; x1 = GetThread(), TRASHES x2
+        INLINE_GETTHREAD x1, x2
+
+        ;;
+        ;; x0 contains EEType pointer
+        ;;
+        ldr         w2, [x0, #OFFSETOF__EEType__m_uBaseSize]
+
+        ;;
+        ;; x0: EEType pointer
+        ;; x1: Thread pointer
+        ;; x2: base size
+        ;;
+
+        ;; Load potential new object address into x12.
+        ldr         x12, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Determine whether the end of the object would lie outside of the current allocation context. If so,
+        ;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        add         x2, x2, x12
+        ldr         x13, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         x2, x13
+        bhi         RhpNewFast_RarePath
+
+        ;; Update the alloc pointer to account for the allocation.
+        str         x2, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Set the new object's EEType pointer
+        str         x0, [x12, #OFFSETOF__Object__m_pEEType]
+
+        mov         x0, x12
+        ret
+
+RhpNewFast_RarePath
+        mov         x1, #0
+        b           RhpNewObject
+    LEAF_END RhpNewFast
+
+    INLINE_GETTHREAD_CONSTANT_POOL
+
+;; Allocate non-array object with finalizer.
+;;  x0 == EEType
+    LEAF_ENTRY RhpNewFinalizable
+        mov         x1, #GC_ALLOC_FINALIZE
+        b           RhpNewObject
+    LEAF_END RhpNewFinalizable
+
+;; Allocate non-array object.
+;;  x0 == EEType
+;;  x1 == alloc flags
+    NESTED_ENTRY RhpNewObject
+
+        PUSH_COOP_PINVOKE_FRAME x3
+
+        ;; x3: transition frame
+
+        ;; Preserve the EEType in x19
+        mov         x19, x0
+
+        ldr         w2, [x0, #OFFSETOF__EEType__m_uBaseSize]
+
+        ;; Call the rest of the allocation helper.
+        ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        bl          RhpGcAlloc
+
+        ;; Set the new object's EEType pointer on success.
+        cbz         x0, NewOutOfMemory
+        str         x19, [x0, #OFFSETOF__Object__m_pEEType]
+
+        ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        ldr         w1, [x19, #OFFSETOF__EEType__m_uBaseSize]
+        movk        x2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF)
+        movk        x2, #(RH_LARGE_OBJECT_SIZE >> 16), lsl #16
+        cmp         x1, x2
+        blo         New_SkipPublish
+
+        ;; x0: object
+        ;; x1: already contains object size
+        bl          RhpPublishObject    ;; x0: this function returns the object that was passed-in
+
+New_SkipPublish
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+NewOutOfMemory
+        ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         x0, x19            ; EEType pointer
+        mov         x1, 0               ; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_NOP b RhExceptionHandling_FailedAllocation
+
+    NESTED_END RhpNewObject
+
+;; Allocate a string.
+;;  x0 == EEType
+;;  x1 == element/character count
+    LEAF_ENTRY RhNewString
+        ;; Make sure computing the overall allocation size won't overflow
+        ;; TODO: this should be actually MAX_STRING_LENGTH
+        mov         x2, 0x7FFFFFFF
+        cmp         x1, x2
+        bhi         StringSizeOverflow
+
+        ;; Compute overall allocation size (align(base size + (element size * elements), 8)).
+        mov         w2, #STRING_COMPONENT_SIZE
+        mov         x3, #(STRING_BASE_SIZE + 7)
+        umaddl      x2, w1, w2, x3          ; x2 = w1 * w2 + x3
+        and         x2, x2, #-8
+
+        ; x0 == EEType
+        ; x1 == element count
+        ; x2 == string size
+
+        INLINE_GETTHREAD x3, x5
+
+        ;; Load potential new object address into x12.
+        ldr         x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Determine whether the end of the object would lie outside of the current allocation context. If so,
+        ;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        add         x2, x2, x12
+        ldr         x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         x2, x12
+        bhi         RhpNewArrayRare
+
+        ;; Reload new object address into r12.
+        ldr         x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Update the alloc pointer to account for the allocation.
+        str         x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Set the new object's EEType pointer and element count.
+        str         x0, [x12, #OFFSETOF__Object__m_pEEType]
+        str         x1, [x12, #OFFSETOF__Array__m_Length]
+
+        ;; Return the object allocated in x0.
+        mov         x0, x12
+
+        ret
+
+StringSizeOverflow
+        ; We get here if the length of the final string object can't be represented as an unsigned
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an OOM exception that the caller of this allocator understands.
+
+        ; x0 holds EEType pointer already
+        mov         x1, #1                  ; Indicate that we should throw OverflowException
+        b           RhExceptionHandling_FailedAllocation
+    LEAF_END    RhNewString
+
+    INLINE_GETTHREAD_CONSTANT_POOL
+
+
+;; Allocate one dimensional, zero based array (SZARRAY).
+;;  x0 == EEType
+;;  x1 == element count
+    LEAF_ENTRY RhpNewArray
+
+        ;; We want to limit the element count to the non-negative 32-bit int range.
+        ;; If the element count is <= 0x7FFFFFFF, no overflow is possible because the component
+        ;; size is <= 0xffff (it's an unsigned 16-bit value), and the base size for the worst
+        ;; case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits.
+        mov         x2, #0x7FFFFFFF
+        cmp         x1, x2
+        bhi         ArraySizeOverflow
+
+        ldrh        w2, [x0, #OFFSETOF__EEType__m_usComponentSize]
+        umull       x2, w1, w2
+        ldr         w3, [x0, #OFFSETOF__EEType__m_uBaseSize]
+        add         x2, x2, x3
+        add         x2, x2, #7
+        and         x2, x2, #-8
+
+        ; x0 == EEType
+        ; x1 == element count
+        ; x2 == array size
+
+        INLINE_GETTHREAD x3, x5
+
+        ;; Load potential new object address into x12.
+        ldr         x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Determine whether the end of the object would lie outside of the current allocation context. If so,
+        ;; we abandon the attempt to allocate the object directly and fall back to the slow helper.
+        add         x2, x2, x12
+        ldr         x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        cmp         x2, x12
+        bhi         RhpNewArrayRare
+
+        ;; Reload new object address into x12.
+        ldr         x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Update the alloc pointer to account for the allocation.
+        str         x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+
+        ;; Set the new object's EEType pointer and element count.
+        str         x0, [x12, #OFFSETOF__Object__m_pEEType]
+        str         x1, [x12, #OFFSETOF__Array__m_Length]
+
+        ;; Return the object allocated in r0.
+        mov         x0, x12
+
+        ret
+
+ArraySizeOverflow
+        ; We get here if the size of the final array object can't be represented as an unsigned
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an overflow exception that the caller of this allocator understands.
+
+        ; x0 holds EEType pointer already
+        mov         x1, #1                  ; Indicate that we should throw OverflowException
+        b           RhExceptionHandling_FailedAllocation
+    LEAF_END    RhpNewArray
+
+    INLINE_GETTHREAD_CONSTANT_POOL
+
+;; Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper.
+;;  x0 == EEType
+;;  x1 == element count
+;;  x2 == array size + Thread::m_alloc_context::alloc_ptr
+;;  x3 == Thread
+    NESTED_ENTRY RhpNewArrayRare
+
+        ; Recover array size by subtracting the alloc_ptr from x2.
+        PROLOG_NOP ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        PROLOG_NOP sub x2, x2, x12
+
+        PUSH_COOP_PINVOKE_FRAME x3
+
+        ; Preserve data we'll need later into the callee saved registers
+        mov         x19, x0             ; Preserve EEType
+        mov         x20, x1             ; Preserve element count
+        mov         x21, x2             ; Preserve array size
+
+        mov         x1, #0
+
+        ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        bl          RhpGcAlloc
+
+        ; Set the new object's EEType pointer and length on success.
+        cbz         x0, ArrayOutOfMemory
+
+        ; Success, set the array's type and element count in the new object.
+        str         x19, [x0, #OFFSETOF__Object__m_pEEType]
+        str         x20, [x0, #OFFSETOF__Array__m_Length]
+
+        ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        movk        x2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF)
+        movk        x2, #(RH_LARGE_OBJECT_SIZE >> 16), lsl #16
+        cmp         x21, x2
+        blo         NewArray_SkipPublish
+
+        ;; x0 = newly allocated array. x1 = size
+        mov         x1, x21
+        bl          RhpPublishObject
+
+NewArray_SkipPublish
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_RETURN
+
+ArrayOutOfMemory
+        ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         x0, x19             ; EEType Pointer
+        mov         x1, 0               ; Indicate that we should throw OOM.
+
+        POP_COOP_PINVOKE_FRAME
+        EPILOG_NOP b RhExceptionHandling_FailedAllocation
+
+    NESTED_END RhpNewArrayRare
+
+    END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/AsmMacros.h b/src/coreclr/src/nativeaot/Runtime/arm64/AsmMacros.h
new file mode 100644
index 0000000000000..950d8befc6ab0
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/AsmMacros.h
@@ -0,0 +1,316 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+;; OS provided macros
+#include <ksarm64.h>
+;; generated by the build from AsmOffsets.cpp
+#include "AsmOffsets.inc"
+
+;;
+;; CONSTANTS -- INTEGER
+;;
+TSF_Attached                    equ 0x01
+TSF_SuppressGcStress            equ 0x08
+TSF_DoNotTriggerGc              equ 0x10
+TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC equ 0x18
+
+;; Bit position for the flags above, to be used with tbz/tbnz instructions
+TSF_Attached_Bit                equ 0
+TSF_SuppressGcStress_Bit        equ 3
+TSF_DoNotTriggerGc_Bit          equ 4
+
+;; GC type flags
+GC_ALLOC_FINALIZE               equ 1
+GC_ALLOC_ALIGN8_BIAS            equ 4
+GC_ALLOC_ALIGN8                 equ 8
+
+;; Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h
+PTFF_SAVE_X19           equ 0x00000001
+PTFF_SAVE_X20           equ 0x00000002
+PTFF_SAVE_X21           equ 0x00000004
+PTFF_SAVE_X22           equ 0x00000008
+PTFF_SAVE_X23           equ 0x00000010
+PTFF_SAVE_X24           equ 0x00000020
+PTFF_SAVE_X25           equ 0x00000040
+PTFF_SAVE_X26           equ 0x00000080
+PTFF_SAVE_X27           equ 0x00000100
+PTFF_SAVE_X28           equ 0x00000200
+PTFF_SAVE_SP            equ 0x00000400
+PTFF_SAVE_ALL_PRESERVED equ 0x000003FF  ;; NOTE: x19-x28
+PTFF_SAVE_X0            equ 0x00000800
+PTFF_SAVE_X1            equ 0x00001000
+PTFF_SAVE_X2            equ 0x00002000
+PTFF_SAVE_X3            equ 0x00004000
+PTFF_SAVE_X4            equ 0x00008000
+PTFF_SAVE_X5            equ 0x00010000
+PTFF_SAVE_X6            equ 0x00020000
+PTFF_SAVE_X7            equ 0x00040000
+PTFF_SAVE_X8            equ 0x00080000
+PTFF_SAVE_X9            equ 0x00100000
+PTFF_SAVE_X10           equ 0x00200000
+PTFF_SAVE_X11           equ 0x00400000
+PTFF_SAVE_X12           equ 0x00800000
+PTFF_SAVE_X13           equ 0x01000000
+PTFF_SAVE_X14           equ 0x02000000
+PTFF_SAVE_X15           equ 0x04000000
+PTFF_SAVE_X16           equ 0x08000000
+PTFF_SAVE_X17           equ 0x10000000
+PTFF_SAVE_X18           equ 0x20000000
+PTFF_SAVE_ALL_SCRATCH   equ 0x3FFFF800  ;; NOTE: X0-X18
+PTFF_SAVE_FP            equ 0x40000000
+PTFF_SAVE_LR            equ 0x80000000
+
+;; NOTE: The following flags represent the upper 32 bits of the PInvokeTransitionFrameFlags. 
+;; Since the assembler doesn't support 64 bit constants in any way, we need to define just
+;; the upper bits here
+PTFF_X0_IS_GCREF_HI     equ 0x00000001 ;; iff PTFF_SAVE_X0 : set->x0 is Object, clear->x0 is scalar
+PTFF_X0_IS_BYREF_HI     equ 0x00000002 ;; iff PTFF_SAVE_X0 : set->x0 is ByRef, clear->x0 is Object or scalar
+PTFF_X1_IS_GCREF_HI     equ 0x00000004 ;; iff PTFF_SAVE_X1 : set->x1 is Object, clear->x1 is scalar
+PTFF_X1_IS_BYREF_HI     equ 0x00000008 ;; iff PTFF_SAVE_X1 : set->x1 is ByRef, clear->x1 is Object or scalar
+PTFF_THREAD_ABORT_HI    equ 0x00000010 ;; indicates that ThreadAbortException should be thrown when returning from the transition
+
+;; Bit position for the flags above, to be used with tbz / tbnz instructions
+PTFF_THREAD_ABORT_BIT   equ 36
+
+;; These must match the TrapThreadsFlags enum
+TrapThreadsFlags_None            equ 0
+TrapThreadsFlags_AbortInProgress equ 1
+TrapThreadsFlags_TrapThreads     equ 2
+
+;; Bit position for the flags above, to be used with tbz / tbnz instructions
+TrapThreadsFlags_AbortInProgress_Bit equ 0
+TrapThreadsFlags_TrapThreads_Bit     equ 1
+
+;; This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT
+STATUS_REDHAWK_THREAD_ABORT      equ 0x43
+
+;;
+;; Rename fields of nested structs
+;;
+OFFSETOF__Thread__m_alloc_context__alloc_ptr        equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr
+OFFSETOF__Thread__m_alloc_context__alloc_limit      equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit
+
+;;
+;; IMPORTS
+;;
+    EXTERN RhpGcAlloc
+    EXTERN RhpPublishObject
+    EXTERN RhExceptionHandling_FailedAllocation
+    EXTERN RhDebugBreak
+    EXTERN RhpWaitForSuspend2
+    EXTERN RhpWaitForGC2
+    EXTERN RhpReversePInvokeAttachOrTrapThread2
+    EXTERN RhpCalculateStackTraceWorker
+    EXTERN RhThrowHwEx
+    EXTERN RhThrowEx
+    EXTERN RhRethrow
+
+    EXTERN RhpTrapThreads
+    EXTERN g_lowest_address
+    EXTERN g_highest_address
+    EXTERN g_ephemeral_low
+    EXTERN g_ephemeral_high
+    EXTERN g_card_table
+
+
+;; -----------------------------------------------------------------------------
+;; Macro used to assign an alternate name to a symbol containing characters normally disallowed in a symbol
+;; name (e.g. C++ decorated names).
+    MACRO
+      SETALIAS   $name, $symbol
+        GBLS    $name
+$name   SETS    "|$symbol|"
+    MEND
+
+;;-----------------------------------------------------------------------------
+;; Macro for loading a 64-bit constant by a minimal number of instructions
+;; Since the asssembles doesn't support 64 bit arithmetics in expressions, 
+;; the value is passed in as lo, hi pair.
+    MACRO
+        MOVL64 $Reg, $ConstantLo, $ConstantHi
+
+        LCLS MovInstr
+MovInstr SETS "movz"
+
+         IF ((($ConstantHi):SHR:16):AND:0xffff) != 0
+         $MovInstr $Reg, #((($Constant):SHR:16):AND:0xffff), lsl #48
+MovInstr SETS "movk"
+         ENDIF
+
+         IF (($ConstantHi):AND:0xffff) != 0
+         $MovInstr $Reg, #(($ConstantHi):AND:0xffff), lsl #32
+MovInstr SETS "movk"
+         ENDIF
+
+        IF ((($ConstantLo):SHR:16):AND:0xffff) != 0
+        $MovInstr $Reg, #((($ConstantLo):SHR:16):AND:0xffff), lsl #16
+MovInstr SETS "movk"
+        ENDIF
+
+        $MovInstr $Reg, #(($ConstantLo):AND:0xffff)
+    MEND
+
+;; -----------------------------------------------------------------------------
+;;
+;; Macro to export a pointer to an address inside a stub as a 64-bit variable
+;;
+    MACRO
+        EXPORT_POINTER_TO_ADDRESS $Name
+        LCLS CodeLbl
+CodeLbl SETS "$Name":CC:"Lbl"
+$CodeLbl
+        AREA | .rdata | , ALIGN = 8, DATA, READONLY
+$Name
+        DCQ         $CodeLbl
+        EXPORT      $Name
+        TEXTAREA
+        ROUT
+
+    MEND 
+
+;; -----------------------------------------------------------------------------
+;;
+;; Macro for indicating an alternate entry point into a function.
+;;
+
+    MACRO
+        LABELED_RETURN_ADDRESS $ReturnAddressName
+
+        ; export the return address name, but do not perturb the code by forcing alignment
+$ReturnAddressName
+        EXPORT $ReturnAddressName
+
+        ; flush any pending literal pool stuff
+        ROUT
+
+    MEND
+
+;; -----------------------------------------------------------------------------
+;;
+;; Macro to get a pointer to the Thread* object for the currently executing thread
+;; 
+
+__tls_array     equ 0x58    ;; offsetof(TEB, ThreadLocalStoragePointer)
+
+    EXTERN _tls_index
+
+    GBLS __SECTIONREL_tls_CurrentThread
+__SECTIONREL_tls_CurrentThread SETS "SECTIONREL_tls_CurrentThread"
+
+    MACRO
+        INLINE_GETTHREAD $destReg, $trashReg
+
+        ;; The following macro variables are just some assembler magic to get the name of the 32-bit version
+        ;; of $trashReg. It does it by string manipulation. Replaces something like x3 with w3.
+        LCLS TrashRegister32Bit
+TrashRegister32Bit SETS "$trashReg"
+TrashRegister32Bit SETS "w":CC:("$TrashRegister32Bit":RIGHT:((:LEN:TrashRegister32Bit) - 1))
+
+        ldr         $trashReg, =_tls_index
+        ldr         $TrashRegister32Bit, [$trashReg]
+        ldr         $destReg, [xpr, #__tls_array]
+        ldr         $destReg, [$destReg, $trashReg lsl #3]
+        ldr         $trashReg, =$__SECTIONREL_tls_CurrentThread
+        ldr         $trashReg, [$trashReg]
+        add         $destReg, $destReg, $trashReg
+    MEND
+
+    ;; INLINE_GETTHREAD_CONSTANT_POOL macro has to be used after the last function in the .asm file that used
+    ;; INLINE_GETTHREAD. Optionally, it can be also used after any function that used INLINE_GETTHREAD
+    ;; to improve density, or to reduce distance betweeen the constant pool and its use.
+    MACRO
+        INLINE_GETTHREAD_CONSTANT_POOL
+        EXTERN tls_CurrentThread
+
+    ;; Section relocs are 32 bits. Using an extra DCD initialized to zero for 8-byte alignment.
+$__SECTIONREL_tls_CurrentThread
+        DCD tls_CurrentThread
+        RELOC 8, tls_CurrentThread      ;; SECREL
+        DCD 0
+
+__SECTIONREL_tls_CurrentThread SETS "$__SECTIONREL_tls_CurrentThread":CC:"_"
+
+    MEND
+
+    MACRO
+        INLINE_THREAD_UNHIJACK $threadReg, $trashReg1, $trashReg2
+        ;;
+        ;; Thread::Unhijack()
+        ;;
+        ldr         $trashReg1, [$threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        cbz         $trashReg1, %ft0
+
+        ldr         $trashReg2, [$threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        str         $trashReg1, [$trashReg2]
+        str         xzr, [$threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        str         xzr, [$threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+0
+    MEND
+
+;; -----------------------------------------------------------------------------
+;;
+;; Macro used from unmanaged helpers called from managed code where the helper does not transition immediately
+;; into pre-emptive mode but may cause a GC and thus requires the stack is crawlable. This is typically the
+;; case for helpers that meddle in GC state (e.g. allocation helpers) where the code must remain in
+;; cooperative mode since it handles object references and internal GC state directly but a garbage collection
+;; may be inevitable. In these cases we need to be able to transition to pre-meptive mode deep within the
+;; unmanaged code but still be able to initialize the stack iterator at the first stack frame which may hold
+;; interesting GC references. In all our helper cases this corresponds to the most recent managed frame (e.g.
+;; the helper's caller).
+;;
+;; This macro builds a frame describing the current state of managed code.
+;;
+;; INVARIANTS
+;; - The macro assumes it defines the method prolog, it should typically be the first code in a method and
+;;   certainly appear before any attempt to alter the stack pointer.
+;; - This macro uses trashReg (after its initial value has been saved in the frame) and upon exit trashReg
+;;   will contain the address of transition frame.
+;;
+
+DEFAULT_FRAME_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP
+
+    MACRO
+        PUSH_COOP_PINVOKE_FRAME $trashReg
+
+        PROLOG_SAVE_REG_PAIR   fp, lr, #-0x80!      ;; Push down stack pointer and store FP and LR
+
+        ;; 0x10 bytes reserved for Thread* and flags
+
+        ;; Save callee saved registers
+        PROLOG_SAVE_REG_PAIR   x19, x20, #0x20
+        PROLOG_SAVE_REG_PAIR   x21, x22, #0x30
+        PROLOG_SAVE_REG_PAIR   x23, x24, #0x40
+        PROLOG_SAVE_REG_PAIR   x25, x26, #0x50
+        PROLOG_SAVE_REG_PAIR   x27, x28, #0x60
+
+        ;; Save the value of SP before stack allocation to the last slot in the frame (slot #15)
+        add                    $trashReg, sp, #0x80
+        str                    $trashReg, [sp, #0x70]
+
+        ;; Record the bitmask of saved registers in the frame (slot #3)
+        mov                    $trashReg, #DEFAULT_FRAME_SAVE_FLAGS
+        str                    $trashReg, [sp, #0x18]
+
+        mov $trashReg, sp
+    MEND
+
+;; Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME
+    MACRO
+        POP_COOP_PINVOKE_FRAME
+
+        EPILOG_RESTORE_REG_PAIR   x19, x20, #0x20
+        EPILOG_RESTORE_REG_PAIR   x21, x22, #0x30
+        EPILOG_RESTORE_REG_PAIR   x23, x24, #0x40
+        EPILOG_RESTORE_REG_PAIR   x25, x26, #0x50
+        EPILOG_RESTORE_REG_PAIR   x27, x28, #0x60
+        EPILOG_RESTORE_REG_PAIR   fp, lr, #0x80!
+    MEND
+
+
+#ifdef FEATURE_GC_STRESS
+    SETALIAS THREAD__HIJACKFORGCSTRESS, ?HijackForGcStress@Thread@@SAXPEAUPAL_LIMITED_CONTEXT@@@Z
+    SETALIAS REDHAWKGCINTERFACE__STRESSGC, ?StressGc@RedhawkGCInterface@@SAXXZ
+
+    EXTERN $REDHAWKGCINTERFACE__STRESSGC
+    EXTERN $THREAD__HIJACKFORGCSTRESS
+#endif ;; FEATURE_GC_STRESS
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/AsmOffsetsCpu.h b/src/coreclr/src/nativeaot/Runtime/arm64/AsmOffsetsCpu.h
new file mode 100644
index 0000000000000..6e59ade597ad4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/AsmOffsetsCpu.h
@@ -0,0 +1,65 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This file is used by AsmOffsets.h to validate that our
+// assembly-code offsets always match their C++ counterparts.
+//
+// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix
+
+PLAT_ASM_SIZEOF(290, ExInfo)
+PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo)
+PLAT_ASM_OFFSET(8, ExInfo, m_pExContext)
+PLAT_ASM_OFFSET(10, ExInfo, m_exception)
+PLAT_ASM_OFFSET(18, ExInfo, m_kind)
+PLAT_ASM_OFFSET(19, ExInfo, m_passNumber)
+PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause)
+PLAT_ASM_OFFSET(20, ExInfo, m_frameIter)
+PLAT_ASM_OFFSET(288, ExInfo, m_notifyDebuggerSP)
+
+PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_FramePointer)
+PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_RIP)
+PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread)
+PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags)
+PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs)
+
+PLAT_ASM_SIZEOF(268, StackFrameIterator)
+PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer)
+PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC)
+PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay)
+PLAT_ASM_OFFSET(260, StackFrameIterator, m_OriginalControlPC)
+
+PLAT_ASM_SIZEOF(C0, PAL_LIMITED_CONTEXT)
+
+PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, FP)
+PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, LR)
+PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, X0)
+PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, X1)
+PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, X19)
+PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, X20)
+PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, X21)
+PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, X22)
+PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, X23)
+PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, X24)
+PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, X25)
+PLAT_ASM_OFFSET(58, PAL_LIMITED_CONTEXT, X26)
+PLAT_ASM_OFFSET(60, PAL_LIMITED_CONTEXT, X27)
+PLAT_ASM_OFFSET(68, PAL_LIMITED_CONTEXT, X28)
+PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, SP)
+PLAT_ASM_OFFSET(78, PAL_LIMITED_CONTEXT, IP)
+
+PLAT_ASM_SIZEOF(150, REGDISPLAY)
+PLAT_ASM_OFFSET(f8, REGDISPLAY, SP)
+
+PLAT_ASM_OFFSET(98, REGDISPLAY, pX19)
+PLAT_ASM_OFFSET(a0, REGDISPLAY, pX20)
+PLAT_ASM_OFFSET(a8, REGDISPLAY, pX21)
+PLAT_ASM_OFFSET(b0, REGDISPLAY, pX22)
+PLAT_ASM_OFFSET(b8, REGDISPLAY, pX23)
+PLAT_ASM_OFFSET(c0, REGDISPLAY, pX24)
+PLAT_ASM_OFFSET(c8, REGDISPLAY, pX25)
+PLAT_ASM_OFFSET(d0, REGDISPLAY, pX26)
+PLAT_ASM_OFFSET(d8, REGDISPLAY, pX27)
+PLAT_ASM_OFFSET(e0, REGDISPLAY, pX28)
+PLAT_ASM_OFFSET(e8, REGDISPLAY, pFP)
+PLAT_ASM_OFFSET(110, REGDISPLAY, D)
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/CallDescrWorker.S b/src/coreclr/src/nativeaot/Runtime/arm64/CallDescrWorker.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/CallDescrWorker.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/CallDescrWorker.asm b/src/coreclr/src/nativeaot/Runtime/arm64/CallDescrWorker.asm
new file mode 100644
index 0000000000000..2da05b7a0c538
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/CallDescrWorker.asm
@@ -0,0 +1,143 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+    TEXTAREA
+
+;;-----------------------------------------------------------------------------
+;; This helper routine enregisters the appropriate arguments and makes the
+;; actual call.
+;;
+;;  INPUT: x0: pointer to CallDescrData struct
+;;
+;;-----------------------------------------------------------------------------
+;;void RhCallDescrWorker(CallDescrData * pCallDescrData);
+    NESTED_ENTRY RhCallDescrWorker
+
+        PROLOG_SAVE_REG_PAIR   fp, lr, #-32!
+        PROLOG_SAVE_REG_PAIR   x19, x20, #16
+
+        ;; Save the value of SP before we start pushing any arguments
+        mov     x20, sp
+
+        mov     x19, x0 ; save pCallDescrData in x19
+
+        ldr     w1, [x19, #OFFSETOF__CallDescrData__numStackSlots]
+        cbz     w1, Ldonestack
+
+        ;; Add frame padding to ensure frame size is a multiple of 16 (a requirement of the OS ABI).
+        ;; We push two registers (above) and numStackSlots arguments (below). If this comes to an odd number
+        ;; of slots we must pad with another. This simplifies to "if the low bit of numStackSlots is set,
+        ;; extend the stack another eight bytes".
+        ldr     x0, [x19, #OFFSETOF__CallDescrData__pSrc]
+        add     x0, x0, x1 lsl #3               ; pSrcEnd=pSrc+8*numStackSlots 
+        ands    x2, x1, #1
+        beq     Lstackloop
+
+        ;; This loop copies numStackSlots words
+        ;; from [pSrcEnd-8,pSrcEnd-16,...] to [sp-8,sp-16,...]
+
+        ;; Pad and store one stack slot as number of slots are odd
+        ldr     x4, [x0,#-8]!
+        str     x4, [sp,#-16]!
+        subs    x1, x1, #1
+        beq     Ldonestack   
+Lstackloop
+        ldp     x2, x4, [x0,#-16]!
+        stp     x2, x4, [sp,#-16]!
+        subs    x1, x1, #2
+        bne     Lstackloop
+Ldonestack
+
+        ;; If FP arguments are supplied in registers (x9 != NULL) then initialize all of them from the pointer
+        ;; given in x9. 
+        ldr     x9, [x19, #OFFSETOF__CallDescrData__pFloatArgumentRegisters]
+        cbz     x9, LNoFloatingPoint
+        ldp     d0, d1, [x9]
+        ldp     d2, d3, [x9, #16]
+        ldp     d4, d5, [x9, #32]
+        ldp     d6, d7, [x9, #48]
+LNoFloatingPoint
+
+        ;; Copy [pArgumentRegisters, ..., pArgumentRegisters + 64]
+        ;; into x0, ..., x7, x8
+
+        ldr     x9, [x19, #OFFSETOF__CallDescrData__pArgumentRegisters]
+        ldp     x0, x1, [x9]
+        ldp     x2, x3, [x9, #16]
+        ldp     x4, x5, [x9, #32]
+        ldp     x6, x7, [x9, #48]
+        ldr     x8, [x9, #64]
+
+        ;; call pTarget
+        ldr     x9, [x19, #OFFSETOF__CallDescrData__pTarget]
+        blr     x9
+
+    EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk
+
+        ;; Symbol used to identify thunk call to managed function so the special
+        ;; case unwinder can unwind through this function. Sadly we cannot directly
+        ;; export this symbol right now because it confuses DIA unwinder to believe
+        ;; it's the beginning of a new method, therefore we export the address
+        ;; of an auxiliary variable holding the address instead.
+
+        ldr     w3, [x19, #OFFSETOF__CallDescrData__fpReturnSize]
+
+        ;; Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself
+        ldr     x19, [x19, #OFFSETOF__CallDescrData__pReturnBuffer]
+
+        ;; Int return case
+        cbz     w3, LIntReturn
+
+        ;; Float return case
+        cmp     w3, #4
+        beq     LFloatOrDoubleReturn
+
+        ;; Double return case
+        cmp     w3, #8
+        bne     LCheckHFAReturn
+
+LFloatOrDoubleReturn
+        str     d0, [x19]
+        b       LReturnDone
+
+LCheckHFAReturn
+        cmp     w3, #16
+        beq     LFloatOrDoubleHFAReturn
+        cmp     w3, #32
+        beq     LFloatOrDoubleHFAReturn
+        b       LNoHFAReturn
+
+LFloatOrDoubleHFAReturn
+        ;;Single/Double HFAReturn  return case
+        stp     d0, d1, [x19, #00]
+        stp     d2, d3, [x19, #16]
+        b       LReturnDone
+
+LNoHFAReturn
+
+        EMIT_BREAKPOINT ; Unreachable
+
+LIntReturn
+        ;; Save return value(s) into retbuf for int
+        stp     x0, x1, [x19]
+
+LReturnDone
+
+#ifdef _DEBUG
+        ;; Trash the floating point registers to ensure that the HFA return values 
+        ;; won't survive by accident
+        ldp     d0, d1, [sp]
+        ldp     d2, d3, [sp, #16]
+#endif
+        ;; Restore the value of SP
+        mov     sp, x20
+
+        EPILOG_RESTORE_REG_PAIR x19, x20, #16
+        EPILOG_RESTORE_REG_PAIR fp, lr, #32!
+        EPILOG_RETURN
+
+    NESTED_END RhCallDescrWorker
+
+    END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/CallingConventionConverterHelpers.S b/src/coreclr/src/nativeaot/Runtime/arm64/CallingConventionConverterHelpers.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/CallingConventionConverterHelpers.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/CallingConventionConverterHelpers.asm b/src/coreclr/src/nativeaot/Runtime/arm64/CallingConventionConverterHelpers.asm
new file mode 100644
index 0000000000000..d826c1b908c87
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/CallingConventionConverterHelpers.asm
@@ -0,0 +1,63 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "ksarm64.h"
+    
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; CallingConventionCoverter Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+POINTER_SIZE                        equ 0x08
+
+;;
+;; Note: The "__jmpstub__" prefix is used to indicate to debugger
+;; that it must step-through this stub when it encounters it while
+;; stepping.
+;;
+
+    ;;
+    ;; void CallingConventionConverter_ReturnThunk()
+    ;;
+    LEAF_ENTRY CallingConventionConverter_ReturnThunk
+        ret
+    LEAF_END CallingConventionConverter_ReturnThunk
+
+    ;;
+    ;; __jmpstub__CallingConventionConverter_CommonCallingStub
+    ;;
+    ;; struct CallingConventionConverter_CommonCallingStub_PointerData
+    ;; {
+    ;;     void *ManagedCallConverterThunk;
+    ;;     void *UniversalThunk;
+    ;; }
+    ;;
+    ;; struct CommonCallingStubInputData
+    ;; {
+    ;;     ULONG_PTR CallingConventionId;
+    ;;     CallingConventionConverter_CommonCallingStub_PointerData *commonData; // Only the ManagedCallConverterThunk field is used
+    ;;                                                                           // However, it is specified just like other platforms, so the behavior of the common
+    ;;                                                                           // calling stub is easier to debug
+    ;; }
+    ;;
+    ;; xip0 - Points at CommonCallingStubInputData
+    ;;  
+    ;;
+    LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub
+        ldr     xip1, [xip0]                ; put CallingConventionId into xip1 as "parameter" to universal transition thunk
+        ldr     xip0, [xip0, #POINTER_SIZE] ; get pointer to CallingConventionConverter_CommonCallingStub_PointerData into xip0
+        ldr     x12, [xip0, #POINTER_SIZE]  ; get address of UniversalTransitionThunk (which we'll tailcall to later)
+        ldr     xip0, [xip0]                ; get address of ManagedCallConverterThunk (target for universal thunk to call)
+        br      x12
+    LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub
+
+    ;;
+    ;; void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonCallingStub)
+    ;;
+    LEAF_ENTRY CallingConventionConverter_GetStubs
+        ldr     x12, =CallingConventionConverter_ReturnThunk
+        str     x12, [x0] ;; ARM doesn't need different return thunks.
+        str     x12, [x1]
+        ldr     x12, =__jmpstub__CallingConventionConverter_CommonCallingStub
+        str     x12, [x2]
+        ret
+    LEAF_END CallingConventionConverter_GetStubs
+
+    END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/Dummies.asm b/src/coreclr/src/nativeaot/Runtime/arm64/Dummies.asm
new file mode 100644
index 0000000000000..ea6c21fc810d0
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/Dummies.asm
@@ -0,0 +1,18 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+        LEAF_ENTRY RhpLMod
+        DCW     0xdefe
+        bx      lr
+        LEAF_END RhpLMod
+
+        LEAF_ENTRY RhpLMul
+        DCW     0xdefe
+        bx      lr
+        LEAF_END RhpLMul
+
+        END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.S b/src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.asm b/src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.asm
new file mode 100644
index 0000000000000..ab70efbd3d9d6
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.asm
@@ -0,0 +1,629 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15))
+
+#define HARDWARE_EXCEPTION 1
+#define SOFTWARE_EXCEPTION 0
+
+;; -----------------------------------------------------------------------------
+;; Macro used to create frame of exception throwing helpers (RhpThrowEx, RhpThrowHwEx)
+    MACRO
+        ALLOC_THROW_FRAME $exceptionType
+
+        PROLOG_NOP mov x3, sp
+ 
+        ;; Setup a PAL_LIMITED_CONTEXT on the stack {
+        IF $exceptionType == HARDWARE_EXCEPTION
+            PROLOG_NOP sub sp,sp,#0x50
+            PROLOG_NOP stp x3, x1, [sp]   ; x3 is the SP and x1 is the IP of the fault site 
+            PROLOG_PUSH_MACHINE_FRAME
+        ELSE
+            PROLOG_STACK_ALLOC 0x50
+            PROLOG_NOP stp x3, lr, [sp]   ; x3 is the SP and lr is the IP of the fault site 
+        ENDIF
+        PROLOG_NOP stp d8, d9, [sp, #0x10]
+        PROLOG_NOP stp d10, d11, [sp, #0x20]
+        PROLOG_NOP stp d12, d13, [sp, #0x30]
+        PROLOG_NOP stp d14, d15, [sp, #0x40]
+        PROLOG_SAVE_REG_PAIR fp, lr, #-0x70!
+        PROLOG_NOP stp xzr, xzr, [sp, #0x10] ; locations reserved for return value, not used for exception handling
+        PROLOG_SAVE_REG_PAIR x19, x20, #0x20
+        PROLOG_SAVE_REG_PAIR x21, x22, #0x30
+        PROLOG_SAVE_REG_PAIR x23, x24, #0x40
+        PROLOG_SAVE_REG_PAIR x25, x26, #0x50
+        PROLOG_SAVE_REG_PAIR x27, x28, #0x60
+        ;; } end PAL_LIMITED_CONTEXT
+ 
+        PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo 
+    MEND 
+
+;; -----------------------------------------------------------------------------
+;; Macro used to create frame of funclet calling helpers (RhpCallXXXXFunclet)
+;; $extraStackSize - extra stack space that the user of the macro can use to
+;;                   store additional registers
+    MACRO 
+        ALLOC_CALL_FUNCLET_FRAME $extraStackSize
+
+        ; Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,lr, #-60!
+        ; is intentional. Above statement would also emit instruction to save
+        ; sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body
+        ; of method. However, this method needs to be able to change fp before calling funclet.
+        ; This is required to access locals in funclet.
+        PROLOG_SAVE_REG_PAIR_NO_FP fp,lr, #-0x60!
+        PROLOG_SAVE_REG_PAIR x19, x20, #0x10
+        PROLOG_SAVE_REG_PAIR x21, x22, #0x20
+        PROLOG_SAVE_REG_PAIR x23, x24, #0x30
+        PROLOG_SAVE_REG_PAIR x25, x26, #0x40
+        PROLOG_SAVE_REG_PAIR x27, x28, #0x50
+        PROLOG_NOP mov fp, sp
+
+        IF $extraStackSize != 0
+            PROLOG_STACK_ALLOC $extraStackSize
+        ENDIF
+    MEND
+
+;; -----------------------------------------------------------------------------
+;; Macro used to free frame of funclet calling helpers (RhpCallXXXXFunclet)
+;; $extraStackSize - extra stack space that the user of the macro can use to
+;;                   store additional registers.
+;;                   It needs to match the value passed to the corresponding
+;;                   ALLOC_CALL_FUNCLET_FRAME.
+    MACRO 
+        FREE_CALL_FUNCLET_FRAME $extraStackSize
+
+        IF $extraStackSize != 0
+            EPILOG_STACK_FREE $extraStackSize
+        ENDIF
+
+        EPILOG_RESTORE_REG_PAIR x19, x20, #0x10
+        EPILOG_RESTORE_REG_PAIR x21, x22, #0x20
+        EPILOG_RESTORE_REG_PAIR x23, x24, #0x30
+        EPILOG_RESTORE_REG_PAIR x25, x26, #0x40
+        EPILOG_RESTORE_REG_PAIR x27, x28, #0x50
+        EPILOG_RESTORE_REG_PAIR fp, lr, #0x60!
+    MEND
+
+;; -----------------------------------------------------------------------------
+;; Macro used to restore preserved general purpose and FP registers from REGDISPLAY
+;; $regdisplayReg - register pointing to the REGDISPLAY structure
+    MACRO
+        RESTORE_PRESERVED_REGISTERS $regdisplayReg
+
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX19]
+        ldr         x19, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX20]
+        ldr         x20, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX21]
+        ldr         x21, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX22]
+        ldr         x22, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX23]
+        ldr         x23, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX24]
+        ldr         x24, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX25]
+        ldr         x25, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX26]
+        ldr         x26, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX27]
+        ldr         x27, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX28]
+        ldr         x28, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pFP]
+        ldr         fp,  [x12]
+        ;;
+        ;; load FP preserved regs
+        ;;
+        add         x12, $regdisplayReg, #OFFSETOF__REGDISPLAY__D
+        ldp         d8, d9,   [x12, #0x00]
+        ldp         d10, d11, [x12, #0x10]
+        ldp         d12, d13, [x12, #0x20]
+        ldp         d14, d15, [x12, #0x30]
+    MEND
+
+;; -----------------------------------------------------------------------------
+;; Macro used to save preserved general purpose and FP registers to REGDISPLAY
+;; $regdisplayReg - register pointing to the REGDISPLAY structure
+    MACRO
+        SAVE_PRESERVED_REGISTERS $regdisplayReg
+
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX19]
+        str         x19, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX20]
+        str         x20, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX21]
+        str         x21, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX22]
+        str         x22, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX23]
+        str         x23, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX24]
+        str         x24, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX25]
+        str         x25, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX26]
+        str         x26, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX27]
+        str         x27, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX28]
+        str         x28, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pFP]
+        str         fp,  [x12]
+        ;;
+        ;; store vfp preserved regs
+        ;;
+        add         x12, $regdisplayReg, #OFFSETOF__REGDISPLAY__D
+        stp         d8, d9,   [x12, #0x00]
+        stp         d10, d11, [x12, #0x10]
+        stp         d12, d13, [x12, #0x20]
+        stp         d14, d15, [x12, #0x30]
+    MEND
+
+;; -----------------------------------------------------------------------------
+;; Macro used to thrash preserved general purpose registers in REGDISPLAY
+;; to make sure nobody uses them
+;; $regdisplayReg - register pointing to the REGDISPLAY structure
+    MACRO
+        TRASH_PRESERVED_REGISTERS_STORAGE $regdisplayReg
+
+#if 0 // def _DEBUG  ;; @TODO: temporarily removed because trashing the frame pointer breaks the debugger
+        movz        x3, #0xbaad, LSL #48
+        movk        x3, #0xdeed, LSL #32
+        movk        x3, #0xbaad, LSL #16
+        movk        x3, #0xdeed
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX19]
+        str         x3, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX20]
+        str         x3, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX21]
+        str         x3, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX22]
+        str         x3, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX23]
+        str         x3, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX24]
+        str         x3, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX25]
+        str         x3, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX26]
+        str         x3, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX27]
+        str         x3, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pX28]
+        str         x3, [x12]
+        ldr         x12, [$regdisplayReg, #OFFSETOF__REGDISPLAY__pFP]
+        str         x3, [x12]
+#endif // _DEBUG
+    MEND
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpThrowHwEx
+;;
+;; INPUT:  W0:  exception code of fault
+;;         X1:  faulting IP
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpThrowHwEx
+
+#define rsp_offsetof_ExInfo  0
+#define rsp_offsetof_Context STACKSIZEOF_ExInfo
+
+        ALLOC_THROW_FRAME HARDWARE_EXCEPTION
+
+        ;; x2 = GetThread(), TRASHES x1
+        INLINE_GETTHREAD x2, x1
+
+        add         x1, sp, #rsp_offsetof_ExInfo                    ;; x1 <- ExInfo*
+        str         xzr, [x1, #OFFSETOF__ExInfo__m_exception]       ;; pExInfo->m_exception = null
+        mov         w3, #1
+        strb        w3, [x1, #OFFSETOF__ExInfo__m_passNumber]       ;; pExInfo->m_passNumber = 1
+        mov         w3, #0xFFFFFFFF
+        str         w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause]     ;; pExInfo->m_idxCurClause = MaxTryRegionIdx
+        mov         w3, #2
+        strb        w3, [x1, #OFFSETOF__ExInfo__m_kind]             ;; pExInfo->m_kind = ExKind.HardwareFault
+
+        ;; link the ExInfo into the thread's ExInfo chain
+        ldr         x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead]
+        str         x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo]      ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        str         x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] ;; m_pExInfoStackHead = pExInfo
+
+        ;; set the exception context field on the ExInfo
+        add         x2, sp, #rsp_offsetof_Context                   ;; x2 <- PAL_LIMITED_CONTEXT*
+        str         x2, [x1, #OFFSETOF__ExInfo__m_pExContext]       ;; pExInfo->m_pExContext = pContext
+
+        ;; w0: exception code
+        ;; x1: ExInfo*
+        bl          RhThrowHwEx        
+
+    EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2
+        
+        ;; no return
+        EMIT_BREAKPOINT
+
+    NESTED_END RhpThrowHwEx
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpThrowEx
+;;
+;; INPUT:  X0:  exception object
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpThrowEx
+
+        ALLOC_THROW_FRAME SOFTWARE_EXCEPTION
+
+        ;; x2 = GetThread(), TRASHES x1
+        INLINE_GETTHREAD x2, x1
+
+        ;; There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic.  So the return 
+        ;; address could have been hijacked when we were in that C# code and we must remove the hijack and
+        ;; reflect the correct return address in our exception context record.  The other throw helpers don't
+        ;; need this because they cannot be tail-called from C#.
+
+        ;; NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location 
+        ;; where the tail-calling thread had saved LR, which may not match where we have saved LR.
+
+        ldr         x1, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        cbz         x1, NotHijacked
+
+        ldr         x3, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+
+        ;; x0: exception object
+        ;; x1: hijacked return address
+        ;; x2: pThread
+        ;; x3: hijacked return address location
+
+        add         x12, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT)        ;; re-compute SP at callsite
+        cmp         x3, x12             ;; if (m_ppvHijackedReturnAddressLocation < SP at callsite)
+        blo         TailCallWasHijacked
+
+        ;; normal case where a valid return address location is hijacked
+        str         x1, [x3]
+        b           ClearThreadState
+
+TailCallWasHijacked
+
+        ;; Abnormal case where the return address location is now invalid because we ended up here via a tail 
+        ;; call.  In this case, our hijacked return address should be the correct caller of this method.
+        ;; 
+
+        ;; stick the previous return address in LR as well as in the right spots in our PAL_LIMITED_CONTEXT.
+        mov         lr, x1
+        str         lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__LR)]
+        str         lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP)]
+
+ClearThreadState
+
+        ;; clear the Thread's hijack state
+        str         xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        str         xzr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+
+NotHijacked
+
+        add         x1, sp, #rsp_offsetof_ExInfo                    ;; x1 <- ExInfo*
+        str         xzr, [x1, #OFFSETOF__ExInfo__m_exception]       ;; pExInfo->m_exception = null
+        mov         w3, #1
+        strb        w3, [x1, #OFFSETOF__ExInfo__m_passNumber]       ;; pExInfo->m_passNumber = 1
+        mov         w3, #0xFFFFFFFF
+        str         w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause]     ;; pExInfo->m_idxCurClause = MaxTryRegionIdx
+        mov         w3, #1
+        strb        w3, [x1, #OFFSETOF__ExInfo__m_kind]             ;; pExInfo->m_kind = ExKind.Throw
+
+        ;; link the ExInfo into the thread's ExInfo chain
+        ldr         x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead]
+        str         x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo]      ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        str         x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] ;; m_pExInfoStackHead = pExInfo
+
+        ;; set the exception context field on the ExInfo
+        add         x2, sp, #rsp_offsetof_Context                   ;; x2 <- PAL_LIMITED_CONTEXT*
+        str         x2, [x1, #OFFSETOF__ExInfo__m_pExContext]       ;; pExInfo->m_pExContext = pContext
+
+        ;; x0: exception object
+        ;; x1: ExInfo*
+        bl          RhThrowEx
+
+    EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2
+
+        ;; no return
+        EMIT_BREAKPOINT
+    NESTED_END RhpThrowEx
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void FASTCALL RhpRethrow()
+;;
+;; SUMMARY:  Similar to RhpThrowEx, except that it passes along the currently active ExInfo
+;;
+;; INPUT:
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpRethrow
+
+        ALLOC_THROW_FRAME SOFTWARE_EXCEPTION
+
+        ;; x2 = GetThread(), TRASHES x1
+        INLINE_GETTHREAD x2, x1
+
+        add         x1, sp, #rsp_offsetof_ExInfo                    ;; x1 <- ExInfo*
+        str         xzr, [x1, #OFFSETOF__ExInfo__m_exception]       ;; pExInfo->m_exception = null
+        strb        wzr, [x1, #OFFSETOF__ExInfo__m_kind]            ;; init to a deterministic value (ExKind.None)
+        mov         w3, #1
+        strb        w3, [x1, #OFFSETOF__ExInfo__m_passNumber]       ;; pExInfo->m_passNumber = 1
+        mov         w3, #0xFFFFFFFF
+        str         w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause]     ;; pExInfo->m_idxCurClause = MaxTryRegionIdx
+
+        ;; link the ExInfo into the thread's ExInfo chain
+        ldr         x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead]
+        mov         x0, x3                                          ;; x0 <- current ExInfo
+        str         x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo]      ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        str         x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] ;; m_pExInfoStackHead = pExInfo
+
+        ;; set the exception context field on the ExInfo
+        add         x2, sp, #rsp_offsetof_Context                   ;; x2 <- PAL_LIMITED_CONTEXT*
+        str         x2, [x1, #OFFSETOF__ExInfo__m_pExContext]       ;; pExInfo->m_pExContext = pContext
+
+        ;; x0 contains the currently active ExInfo
+        ;; x1 contains the address of the new ExInfo
+        bl          RhRethrow
+
+    EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2
+
+        ;; no return
+        EMIT_BREAKPOINT
+    NESTED_END RhpRethrow
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay,
+;;                                    ExInfo* pExInfo)
+;;
+;; INPUT:  X0:  exception object
+;;         X1:  handler funclet address
+;;         X2:  REGDISPLAY*
+;;         X3:  ExInfo*
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpCallCatchFunclet
+
+        ALLOC_CALL_FUNCLET_FRAME 0x60
+        stp d8, d9,   [sp, #0x00]
+        stp d10, d11, [sp, #0x10]
+        stp d12, d13, [sp, #0x20]
+        stp d14, d15, [sp, #0x30]
+        stp x0, x2,   [sp, #0x40]  ;; x0, x2 & x3 are saved so we have the exception object, REGDISPLAY and 
+        stp x3, xzr,  [sp, #0x50]  ;; ExInfo later, xzr makes space for the local "is_not_handling_thread_abort"
+
+#define rsp_offset_is_not_handling_thread_abort 0x58
+#define rsp_offset_x2 0x48
+#define rsp_offset_x3 0x50
+
+        ;;
+        ;; clear the DoNotTriggerGc flag, trashes x4-x6
+        ;;
+        INLINE_GETTHREAD    x5, x6      ;; x5 <- Thread*, x6 <- trashed
+
+        ldr         x4, [x5, #OFFSETOF__Thread__m_threadAbortException]
+        sub         x4, x4, x0
+        str         x4, [sp, #rsp_offset_is_not_handling_thread_abort] ;; Non-zero if the exception is not ThreadAbortException
+
+        add         x12, x5, #OFFSETOF__Thread__m_ThreadStateFlags
+
+ClearRetry_Catch
+        ldxr        w4, [x12]
+        bic         w4, w4, #TSF_DoNotTriggerGc
+        stxr        w6, w4, [x12]
+        cbz         w6, ClearSuccess_Catch
+        b           ClearRetry_Catch
+ClearSuccess_Catch
+
+        ;;
+        ;; set preserved regs to the values expected by the funclet
+        ;;
+        RESTORE_PRESERVED_REGISTERS x2
+        ;;
+        ;; trash the values at the old homes to make sure nobody uses them
+        ;;
+        TRASH_PRESERVED_REGISTERS_STORAGE x2
+
+        ;;
+        ;; call the funclet
+        ;; 
+        ;; x0 still contains the exception object
+        blr         x1
+
+        EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2
+
+        ;; x0 contains resume IP
+
+        ldr         x2, [sp, #rsp_offset_x2]                    ;; x2 <- REGDISPLAY*
+
+;; @TODO: add debug-only validation code for ExInfo pop
+
+        INLINE_GETTHREAD x1, x3                                 ;; x1 <- Thread*, x3 <- trashed
+
+        ;; We must unhijack the thread at this point because the section of stack where the hijack is applied
+        ;; may go dead.  If it does, then the next time we try to unhijack the thread, it will corrupt the stack.
+        INLINE_THREAD_UNHIJACK x1, x3, x12                      ;; Thread in x1, trashes x3 and x12
+
+        ldr         x3, [sp, #rsp_offset_x3]                    ;; x3 <- current ExInfo*
+        ldr         x2, [x2, #OFFSETOF__REGDISPLAY__SP]         ;; x2 <- resume SP value
+
+PopExInfoLoop
+        ldr         x3, [x3, #OFFSETOF__ExInfo__m_pPrevExInfo]  ;; x3 <- next ExInfo
+        cbz         x3, DonePopping                             ;; if (pExInfo == null) { we're done }
+        cmp         x3, x2
+        blt         PopExInfoLoop                               ;; if (pExInfo < resume SP} { keep going }
+
+DonePopping
+        str         x3, [x1, #OFFSETOF__Thread__m_pExInfoStackHead]     ;; store the new head on the Thread
+
+        ldr         x3, =RhpTrapThreads
+        ldr         w3, [x3]
+        tbz         x3, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort
+
+        ldr         x3, [sp, #rsp_offset_is_not_handling_thread_abort]
+        cbnz        x3, NoAbort
+
+        ;; It was the ThreadAbortException, so rethrow it
+        ;; reset SP
+        mov         x1, x0                                     ;; x1 <- continuation address as exception PC
+        mov         w0, #STATUS_REDHAWK_THREAD_ABORT
+        mov         sp, x2
+        b           RhpThrowHwEx
+
+NoAbort
+        ;; reset SP and jump to continuation address
+        mov         sp, x2
+        br          x0
+
+    NESTED_END RhpCallCatchFunclet
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay)
+;;
+;; INPUT:  X0:  handler funclet address
+;;         X1:  REGDISPLAY*
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpCallFinallyFunclet
+
+        ALLOC_CALL_FUNCLET_FRAME 0x50
+        stp d8, d9,   [sp, #0x00]
+        stp d10, d11, [sp, #0x10]
+        stp d12, d13, [sp, #0x20]
+        stp d14, d15, [sp, #0x30]
+        stp x0, x1,   [sp, #0x40]   ;; x1 is saved so we have the REGDISPLAY later, x0 is just alignment padding
+
+#define rsp_offset_x1 0x48
+
+        ;;
+        ;; We want to suppress hijacking between invocations of subsequent finallys.  We do this because we
+        ;; cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the
+        ;; method) and then been popped off the stack, leaving behind no trace of its effect.
+        ;;
+        ;; So we clear the state before and set it after invocation of the handler.
+        ;;
+
+        ;;
+        ;; clear the DoNotTriggerGc flag, trashes x2-x4
+        ;;
+        INLINE_GETTHREAD    x2, x3      ;; x2 <- Thread*, x3 <- trashed
+
+        add         x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags
+
+ClearRetry
+        ldxr        w4, [x12]
+        bic         w4, w4, #TSF_DoNotTriggerGc
+        stxr        w3, w4, [x12]
+        cbz         w3, ClearSuccess
+        b           ClearRetry
+ClearSuccess
+
+        ;;
+        ;; set preserved regs to the values expected by the funclet
+        ;;
+        RESTORE_PRESERVED_REGISTERS x1
+        ;;
+        ;; trash the values at the old homes to make sure nobody uses them
+        ;;
+        TRASH_PRESERVED_REGISTERS_STORAGE x1
+
+        ;;
+        ;; call the funclet
+        ;; 
+        blr         x0
+
+    EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2
+
+        ldr         x1, [sp, #rsp_offset_x1]        ;; reload REGDISPLAY pointer
+
+        ;;
+        ;; save new values of preserved regs into REGDISPLAY
+        ;;
+        SAVE_PRESERVED_REGISTERS x1
+
+        ;;
+        ;; set the DoNotTriggerGc flag, trashes x1-x3
+        ;;
+        INLINE_GETTHREAD    x2, x3      ;; x2 <- Thread*, x3 <- trashed
+
+        add         x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags
+SetRetry
+        ldxr        w1, [x12]
+        orr         w1, w1, #TSF_DoNotTriggerGc
+        stxr        w3, w1, [x12]
+        cbz         w3, SetSuccess
+        b           SetRetry
+SetSuccess
+
+        ldp         d8, d9,   [sp, #0x00] 
+        ldp         d10, d11, [sp, #0x10]
+        ldp         d12, d13, [sp, #0x20]
+        ldp         d14, d15, [sp, #0x30]
+
+        FREE_CALL_FUNCLET_FRAME 0x50
+        EPILOG_RETURN
+
+    NESTED_END RhpCallFinallyFunclet
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay)
+;;
+;; INPUT:  X0:  exception object
+;;         X1:  filter funclet address
+;;         X2:  REGDISPLAY*
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpCallFilterFunclet
+        ALLOC_CALL_FUNCLET_FRAME 0x40
+        stp d8, d9,   [sp, #0x00]
+        stp d10, d11, [sp, #0x10]
+        stp d12, d13, [sp, #0x20]
+        stp d14, d15, [sp, #0x30]
+
+        ldr         x12, [x2, #OFFSETOF__REGDISPLAY__pFP]
+        ldr         fp, [x12]
+
+        ;;
+        ;; call the funclet
+        ;; 
+        ;; x0 still contains the exception object
+        blr         x1
+
+    EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2
+
+        ldp         d8, d9,   [sp, #0x00]
+        ldp         d10, d11, [sp, #0x10]
+        ldp         d12, d13, [sp, #0x20]
+        ldp         d14, d15, [sp, #0x30]
+
+        FREE_CALL_FUNCLET_FRAME 0x40
+        EPILOG_RETURN
+
+    NESTED_END RhpCallFilterFunclet
+
+    INLINE_GETTHREAD_CONSTANT_POOL
+
+    end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/src/nativeaot/Runtime/arm64/GcProbe.asm
new file mode 100644
index 0000000000000..7dfa318291ff6
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/GcProbe.asm
@@ -0,0 +1,752 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+    TEXTAREA
+
+    EXTERN      g_fGcStressStarted
+
+PROBE_SAVE_FLAGS_EVERYTHING     equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH
+
+    ;; Build a map of symbols representing offsets into the transition frame (see PInvokeTransitionFrame in
+    ;; rhbinder.h) and keep these two in sync.
+    map 0
+            field OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs
+            field 10 * 8 ; x19..x28
+m_CallersSP field 8      ; SP at routine entry
+            field 19 * 8 ; x0..x18
+            field 8      ; lr
+m_SavedNZCV field 8      ; Saved condition flags
+            field 4 * 8  ; d0..d3
+PROBE_FRAME_SIZE    field 0
+
+    ;; Support for setting up a transition frame when performing a GC probe. In many respects this is very
+    ;; similar to the logic in PUSH_COOP_PINVOKE_FRAME in AsmMacros.h. In most cases setting up the
+    ;; transition frame comprises the entirety of the caller's prolog (and initial non-prolog code) and
+    ;; similarly for the epilog. Those cases can be dealt with using PROLOG_PROBE_FRAME and EPILOG_PROBE_FRAME
+    ;; defined below. For the special cases where additional work has to be done in the prolog we also provide
+    ;; the lower level macros ALLOC_PROBE_FRAME, FREE_PROBE_FRAME and INIT_PROBE_FRAME that allow more control
+    ;; to be asserted.
+    ;;
+    ;; Note that we currently employ a significant simplification of frame setup: we always allocate a
+    ;; maximally-sized PInvokeTransitionFrame and save all of the registers. Depending on the caller this can
+    ;; lead to up to 20 additional register saves (x0-x18, lr) or 160 bytes of stack space. I have done no
+    ;; analysis to see whether any of the worst cases occur on performance sensitive paths and whether the
+    ;; additional saves will show any measurable degradation.
+
+    ;; Perform the parts of setting up a probe frame that can occur during the prolog (and indeed this macro
+    ;; can only be called from within the prolog).
+    MACRO
+        ALLOC_PROBE_FRAME $extraStackSpace, $saveFPRegisters
+
+        ;; First create PInvokeTransitionFrame      
+        PROLOG_SAVE_REG_PAIR   fp, lr, #-(PROBE_FRAME_SIZE + $extraStackSpace)!      ;; Push down stack pointer and store FP and LR
+
+        ;; Slot at [sp, #0x10] is reserved for Thread *
+        ;; Slot at [sp, #0x18] is reserved for bitmask of saved registers
+
+        ;; Save callee saved registers
+        PROLOG_SAVE_REG_PAIR   x19, x20, #0x20
+        PROLOG_SAVE_REG_PAIR   x21, x22, #0x30
+        PROLOG_SAVE_REG_PAIR   x23, x24, #0x40
+        PROLOG_SAVE_REG_PAIR   x25, x26, #0x50
+        PROLOG_SAVE_REG_PAIR   x27, x28, #0x60
+
+        ;; Slot at [sp, #0x70] is reserved for caller sp
+
+        ;; Save the scratch registers 
+        PROLOG_NOP str         x0,       [sp, #0x78]
+        PROLOG_NOP stp         x1, x2,   [sp, #0x80]
+        PROLOG_NOP stp         x3, x4,   [sp, #0x90]
+        PROLOG_NOP stp         x5, x6,   [sp, #0xA0]
+        PROLOG_NOP stp         x7, x8,   [sp, #0xB0]
+        PROLOG_NOP stp         x9, x10,  [sp, #0xC0]
+        PROLOG_NOP stp         x11, x12, [sp, #0xD0]
+        PROLOG_NOP stp         x13, x14, [sp, #0xE0]
+        PROLOG_NOP stp         x15, x16, [sp, #0xF0]
+        PROLOG_NOP stp         x17, x18, [sp, #0x100]
+        PROLOG_NOP str         lr,       [sp, #0x110]
+
+        ;; Slot at [sp, #0x118] is reserved for NZCV
+
+        ;; Save the floating return registers
+        IF $saveFPRegisters
+            PROLOG_NOP stp         d0, d1,   [sp, #0x120]
+            PROLOG_NOP stp         d2, d3,   [sp, #0x130]
+        ENDIF
+
+    MEND
+
+    ;; Undo the effects of an ALLOC_PROBE_FRAME. This may only be called within an epilog. Note that all
+    ;; registers are restored (apart for sp and pc), even volatiles.
+    MACRO
+        FREE_PROBE_FRAME $extraStackSpace, $restoreFPRegisters
+
+        ;; Restore the scratch registers 
+        PROLOG_NOP ldr          x0,       [sp, #0x78]
+        PROLOG_NOP ldp          x1, x2,   [sp, #0x80]
+        PROLOG_NOP ldp          x3, x4,   [sp, #0x90]
+        PROLOG_NOP ldp          x5, x6,   [sp, #0xA0]
+        PROLOG_NOP ldp          x7, x8,   [sp, #0xB0]
+        PROLOG_NOP ldp          x9, x10,  [sp, #0xC0]
+        PROLOG_NOP ldp          x11, x12, [sp, #0xD0]
+        PROLOG_NOP ldp          x13, x14, [sp, #0xE0]
+        PROLOG_NOP ldp          x15, x16, [sp, #0xF0]
+        PROLOG_NOP ldp          x17, x18, [sp, #0x100]
+        PROLOG_NOP ldr          lr,       [sp, #0x110]
+
+        ; Restore the floating return registers
+        IF $restoreFPRegisters
+            EPILOG_NOP ldp          d0, d1,   [sp, #0x120]
+            EPILOG_NOP ldp          d2, d3,   [sp, #0x130]
+        ENDIF
+
+        ;; Restore callee saved registers
+        EPILOG_RESTORE_REG_PAIR x19, x20, #0x20
+        EPILOG_RESTORE_REG_PAIR x21, x22, #0x30
+        EPILOG_RESTORE_REG_PAIR x23, x24, #0x40
+        EPILOG_RESTORE_REG_PAIR x25, x26, #0x50
+        EPILOG_RESTORE_REG_PAIR x27, x28, #0x60
+
+        EPILOG_RESTORE_REG_PAIR fp, lr, #(PROBE_FRAME_SIZE + $extraStackSpace)!
+    MEND
+
+    ;; Complete the setup of a probe frame allocated with ALLOC_PROBE_FRAME with the initialization that can
+    ;; occur only outside the prolog (includes linking the frame to the current Thread). This macro assumes SP
+    ;; is invariant outside of the prolog.
+    ;;
+    ;;  $threadReg     : register containing the Thread* (this will be preserved)
+    ;;  $trashReg      : register that can be trashed by this macro
+    ;;  $savedRegsMask : value to initialize m_Flags field with (register or #constant)
+    ;;  $gcFlags       : value of gcref / gcbyref flags for saved registers, used only if $savedRegsMask is constant
+    ;;  $frameSize     : total size of the method's stack frame (including probe frame size)
+    MACRO
+        INIT_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask, $gcFlags, $frameSize
+
+        LCLS BitmaskStr
+BitmaskStr SETS "$savedRegsMask"        
+
+        str         $threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread]            ; Thread *
+        IF          BitmaskStr:LEFT:1 == "#"
+            ;; The savedRegsMask is a constant, remove the leading "#" since the MOVL64 doesn't expect it
+BitmaskStr  SETS BitmaskStr:RIGHT:(:LEN:BitmaskStr - 1)
+            MOVL64      $trashReg, $BitmaskStr, $gcFlags
+        ELSE
+            ASSERT "$gcFlags" == ""
+            ;; The savedRegsMask is a register
+            mov         $trashReg, $savedRegsMask
+        ENDIF
+        str         $trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
+        add         $trashReg, sp, #$frameSize
+        str         $trashReg, [sp, #m_CallersSP]
+    MEND    
+
+    ;; Simple macro to use when setting up the probe frame can comprise the entire prolog. Call this macro
+    ;; first in the method (no further prolog instructions can be added after this).
+    ;;
+    ;;  $threadReg     : register containing the Thread* (this will be preserved). If defaulted (specify |) then
+    ;;                   the current thread will be calculated inline into r2 ($trashReg must not equal r2 in
+    ;;                   this case)
+    ;;  $trashReg      : register that can be trashed by this macro
+    ;;  $savedRegsMask : value to initialize m_dwFlags field with (register or #constant)
+    ;;  $gcFlags       : value of gcref / gcbyref flags for saved registers, used only if $savedRegsMask is constant
+    MACRO
+        PROLOG_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask, $gcFlags
+
+        ; Local string tracking the name of the register in which the Thread* is kept. Defaults to the value
+        ; of $threadReg.
+        LCLS __PPF_ThreadReg
+__PPF_ThreadReg SETS "$threadReg"
+
+        ; Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving
+        ; incoming register values into it.
+        ALLOC_PROBE_FRAME 0, {true}
+
+        ; If the caller didn't provide a value for $threadReg then generate code to fetch the Thread* into x2.
+        ; Record that x2 holds the Thread* in our local variable.
+        IF "$threadReg" == ""
+            ASSERT "$trashReg" != "x2"
+__PPF_ThreadReg SETS "x2"
+            INLINE_GETTHREAD $__PPF_ThreadReg, $trashReg
+        ENDIF
+
+        ; Perform the rest of the PInvokeTransitionFrame initialization.
+        INIT_PROBE_FRAME $__PPF_ThreadReg, $trashReg, $savedRegsMask, $gcFlags, PROBE_FRAME_SIZE
+        mov         $trashReg, sp
+        str         $trashReg, [$__PPF_ThreadReg, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
+    MEND
+
+    ; Simple macro to use when PROLOG_PROBE_FRAME was used to set up and initialize the prolog and
+    ; PInvokeTransitionFrame. This will define the epilog including a return via the restored LR.
+    MACRO
+        EPILOG_PROBE_FRAME
+
+        FREE_PROBE_FRAME 0, {true}
+        EPILOG_RETURN
+    MEND
+
+;; In order to avoid trashing VFP registers across the loop hijack we must save all user registers, so that 
+;; registers used by the loop being hijacked will not be affected. Unlike ARM32 where neon registers (NQ0, ..., NQ15) 
+;; are fully covered by the floating point registers D0 ... D31, we have 32 neon registers Q0, ... Q31 on ARM64 
+;; which are not fully covered by the register D0 ... D31. Therefore we must explicitly save all Q registers.
+EXTRA_SAVE_SIZE equ (32*16)
+
+    MACRO
+        ALLOC_LOOP_HIJACK_FRAME
+
+        PROLOG_STACK_ALLOC EXTRA_SAVE_SIZE
+
+        ;; Save all neon registers
+        PROLOG_NOP stp         q0, q1,   [sp]
+        PROLOG_NOP stp         q2, q3,   [sp, #0x20]
+        PROLOG_NOP stp         q4, q5,   [sp, #0x40]
+        PROLOG_NOP stp         q6, q7,   [sp, #0x60]
+        PROLOG_NOP stp         q8, q9,   [sp, #0x80]
+        PROLOG_NOP stp         q10, q11, [sp, #0xA0]
+        PROLOG_NOP stp         q12, q13, [sp, #0xC0]
+        PROLOG_NOP stp         q14, q15, [sp, #0xE0]
+        PROLOG_NOP stp         q16, q17, [sp, #0x100]
+        PROLOG_NOP stp         q18, q19, [sp, #0x120]
+        PROLOG_NOP stp         q20, q21, [sp, #0x140]
+        PROLOG_NOP stp         q22, q23, [sp, #0x160]
+        PROLOG_NOP stp         q24, q25, [sp, #0x180]
+        PROLOG_NOP stp         q26, q27, [sp, #0x1A0]
+        PROLOG_NOP stp         q28, q29, [sp, #0x1C0]
+        PROLOG_NOP stp         q30, q31, [sp, #0x1E0]
+        
+        ALLOC_PROBE_FRAME 0, {false}
+    MEND
+
+    MACRO
+        FREE_LOOP_HIJACK_FRAME
+
+        FREE_PROBE_FRAME 0, {false}
+
+        ;; restore all neon registers 
+        PROLOG_NOP ldp         q0, q1,   [sp]
+        PROLOG_NOP ldp         q2, q3,   [sp, #0x20]
+        PROLOG_NOP ldp         q4, q5,   [sp, #0x40]
+        PROLOG_NOP ldp         q6, q7,   [sp, #0x60]
+        PROLOG_NOP ldp         q8, q9,   [sp, #0x80]
+        PROLOG_NOP ldp         q10, q11, [sp, #0xA0]
+        PROLOG_NOP ldp         q12, q13, [sp, #0xC0]
+        PROLOG_NOP ldp         q14, q15, [sp, #0xE0]
+        PROLOG_NOP ldp         q16, q17, [sp, #0x100]
+        PROLOG_NOP ldp         q18, q19, [sp, #0x120]
+        PROLOG_NOP ldp         q20, q21, [sp, #0x140]
+        PROLOG_NOP ldp         q22, q23, [sp, #0x160]
+        PROLOG_NOP ldp         q24, q25, [sp, #0x180]
+        PROLOG_NOP ldp         q26, q27, [sp, #0x1A0]
+        PROLOG_NOP ldp         q28, q29, [sp, #0x1C0]
+        PROLOG_NOP ldp         q30, q31, [sp, #0x1E0]
+
+        EPILOG_STACK_FREE EXTRA_SAVE_SIZE
+    MEND
+
+;;
+;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this 
+;; thread if it finds it at an IP that isn't managed code.
+;;
+;; Register state on entry:
+;;  x2: thread pointer
+;;  
+;; Register state on exit:
+;;
+    MACRO
+        ClearHijackState
+
+        ASSERT OFFSETOF__Thread__m_pvHijackedReturnAddress == (OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8)
+        ;; Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress
+        stp         xzr, xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        ;; Clear m_uHijackedReturnValueFlags
+        str         xzr, [x2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags]
+    MEND
+
+;;
+;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and 
+;; clears the hijack state.
+;;
+;; Register state on entry:
+;;  All registers correct for return to the original return address.
+;;  
+;; Register state on exit:
+;;  x2: thread pointer
+;;  x3: trashed
+;;  x12: transition frame flags for the return registers x0 and x1
+;;
+    MACRO
+        FixupHijackedCallstack
+
+        ;; x2 <- GetThread(), TRASHES x3
+        INLINE_GETTHREAD x2, x3
+        
+        ;;
+        ;; Fix the stack by restoring the original return address
+        ;;
+        ASSERT OFFSETOF__Thread__m_uHijackedReturnValueFlags == (OFFSETOF__Thread__m_pvHijackedReturnAddress + 8)
+        ;; Load m_pvHijackedReturnAddress and m_uHijackedReturnValueFlags
+        ldp         lr, x12, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+
+        ClearHijackState
+    MEND
+
+;;
+;; Set the Thread state and wait for a GC to complete.
+;;
+;; Register state on entry:
+;;  x4: thread pointer
+;;  
+;; Register state on exit:
+;;  x4: thread pointer
+;;  All other registers trashed
+;;
+
+    EXTERN RhpWaitForGCNoAbort
+
+    MACRO
+        WaitForGCCompletion
+
+        ldr         w2, [x4, #OFFSETOF__Thread__m_ThreadStateFlags]
+        tst         w2, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC
+        bne         %ft0
+
+        ldr         x9, [x4, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
+        bl          RhpWaitForGCNoAbort
+0
+    MEND
+
+    MACRO
+        HijackTargetFakeProlog
+
+        ;; This is a fake entrypoint for the method that 'tricks' the OS into calling our personality routine.
+        ;; The code here should never be executed, and the unwind info is bogus, but we don't mind since the
+        ;; stack is broken by the hijack anyway until after we fix it below.
+        PROLOG_SAVE_REG_PAIR   fp, lr, #-0x10!
+        nop                     ; We also need a nop here to simulate the implied bl instruction.  Without 
+                                ; this, an OS-applied -4 will back up into the method prolog and the unwind 
+                                ; will not be applied as desired.
+
+    MEND
+
+;;
+;;
+;;
+;; GC Probe Hijack targets
+;;
+;;
+    EXTERN RhpPInvokeExceptionGuard
+
+    NESTED_ENTRY RhpGcProbeHijackWrapper, .text, RhpPInvokeExceptionGuard
+        HijackTargetFakeProlog
+
+    LABELED_RETURN_ADDRESS RhpGcProbeHijack
+
+        FixupHijackedCallstack
+        orr         x12, x12, #DEFAULT_FRAME_SAVE_FLAGS
+        b           RhpGcProbe
+    NESTED_END RhpGcProbeHijackWrapper
+
+#ifdef FEATURE_GC_STRESS
+;;
+;;
+;; GC Stress Hijack targets
+;;
+;;
+    LEAF_ENTRY RhpGcStressHijack
+        FixupHijackedCallstack
+        orr         x12, x12, #DEFAULT_FRAME_SAVE_FLAGS
+        b           RhpGcStressProbe
+    LEAF_END RhpGcStressHijack
+;;
+;; Worker for our GC stress probes.  Do not call directly!!  
+;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. 
+;; This worker performs the GC Stress work and returns to the original return address.
+;;
+;; Register state on entry:
+;;  x0: hijacked function return value
+;;  x1: hijacked function return value
+;;  x2: thread pointer
+;;  w12: register bitmask
+;;
+;; Register state on exit:
+;;  Scratch registers, except for x0, have been trashed
+;;  All other registers restored as they were when the hijack was first reached.
+;;
+    NESTED_ENTRY RhpGcStressProbe
+        PROLOG_PROBE_FRAME x2, x3, x12, 
+
+        bl          $REDHAWKGCINTERFACE__STRESSGC
+
+        EPILOG_PROBE_FRAME
+    NESTED_END RhpGcStressProbe
+#endif ;; FEATURE_GC_STRESS
+
+    LEAF_ENTRY RhpGcProbe
+        ldr         x3, =RhpTrapThreads
+        ldr         w3, [x3]
+        tbnz        x3, #TrapThreadsFlags_TrapThreads_Bit, RhpGcProbeRare
+        ret
+    LEAF_END RhpGcProbe
+
+    EXTERN RhpThrowHwEx
+
+    NESTED_ENTRY RhpGcProbeRare
+        PROLOG_PROBE_FRAME x2, x3, x12, 
+
+        mov         x4, x2
+        WaitForGCCompletion
+
+        ldr         x2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
+        tbnz        x2, #PTFF_THREAD_ABORT_BIT, %F1
+
+        EPILOG_PROBE_FRAME
+
+1        
+        FREE_PROBE_FRAME 0, {true}
+        EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT
+        EPILOG_NOP mov x1, lr ;; return address as exception PC
+        EPILOG_NOP b RhpThrowHwEx
+    NESTED_END RhpGcProbeRare
+
+    LEAF_ENTRY RhpGcPoll
+        brk 0xf000 ;; TODO: remove after debugging/testing stub
+        ; @todo: I'm assuming it's not OK to trash any register here. If that's not true we can optimize the
+        ; push/pops out of this fast path.
+        str         x0, [sp], #-0x10!
+        ldr         x0, =RhpTrapThreads
+        ldr         w0, [x0]
+        tbnz        x0, #TrapThreadsFlags_TrapThreads_Bit, %F0
+        ldr         x0, [sp], #0x10!
+        ret
+0
+        ldr         x0, [sp], #0x10!
+        b           RhpGcPollRare
+    LEAF_END RhpGcPoll
+
+    NESTED_ENTRY RhpGcPollRare
+        brk 0xf000 ;; TODO: remove after debugging/testing stub
+        PROLOG_PROBE_FRAME |, x3, #PROBE_SAVE_FLAGS_EVERYTHING, 0
+
+        ; Unhijack this thread, if necessary.
+        INLINE_THREAD_UNHIJACK x2, x0, x1       ;; trashes x0, x1
+
+        mov         x4, x2
+        WaitForGCCompletion
+
+        EPILOG_PROBE_FRAME
+    NESTED_END RhpGcPollRare
+
+    LEAF_ENTRY RhpGcPollStress
+        ;
+        ; loop hijacking is used instead
+        ;
+        brk 0xf000
+
+    LEAF_END RhpGcPollStress
+
+
+#ifdef FEATURE_GC_STRESS
+    NESTED_ENTRY RhpHijackForGcStress
+        ;; This function should be called from right before epilog
+
+        ;; Push FP and LR, and allocate stack to hold PAL_LIMITED_CONTEXT structure and VFP return value registers
+        PROLOG_SAVE_REG_PAIR    fp, lr, #-(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!
+
+        ;;
+        ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the
+        ;; IP after the call to this helper.
+        ;;
+        ;; This is very likely overkill since the calculation of the return address should only need SP and 
+        ;; LR, but this is test code, so I'm not too worried about efficiency.
+        ;;
+        ;; Setup a PAL_LIMITED_CONTEXT on the stack 
+        ;; {
+            ;; FP and LR already pushed.
+            PROLOG_NOP  stp         x0, x1, [sp, #0x10]
+            PROLOG_SAVE_REG_PAIR    x19, x20, #0x20
+            PROLOG_SAVE_REG_PAIR    x21, x22, #0x30
+            PROLOG_SAVE_REG_PAIR    x23, x24, #0x40
+            PROLOG_SAVE_REG_PAIR    x25, x26, #0x50
+            PROLOG_SAVE_REG_PAIR    x27, x28, #0x60
+            PROLOG_SAVE_REG         lr, #0x78
+
+        ;; } end PAL_LIMITED_CONTEXT
+
+        ;; Save VFP return value
+        stp         d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)]
+        stp         d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)]
+
+        ;; Compute and save SP at callsite.
+        add         x0, sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)   ;; +0x20 for the pushes right before the context struct
+        str         x0, [sp, #OFFSETOF__PAL_LIMITED_CONTEXT__SP]
+
+        mov         x0, sp      ; Address of PAL_LIMITED_CONTEXT
+        bl          $THREAD__HIJACKFORGCSTRESS
+
+        ;; Restore return value registers (saved in PAL_LIMITED_CONTEXT structure)
+        ldp         x0, x1, [sp, #0x10]
+
+        ;; Restore VFP return value
+        ldp         d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)]
+        ldp         d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)]
+
+        ;; Epilog
+        EPILOG_RESTORE_REG_PAIR     x19, x20, #0x20
+        EPILOG_RESTORE_REG_PAIR     x21, x22, #0x30
+        EPILOG_RESTORE_REG_PAIR     x23, x24, #0x40
+        EPILOG_RESTORE_REG_PAIR     x25, x26, #0x50
+        EPILOG_RESTORE_REG_PAIR     x27, x28, #0x60
+        EPILOG_RESTORE_REG_PAIR     fp, lr, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!
+        EPILOG_RETURN
+
+    NESTED_END RhpHijackForGcStress
+
+    NESTED_ENTRY RhpHijackForGcStressLeaf
+        ;; This should be jumped to, right before epilog
+        ;; x9 has the return address (we don't care about trashing scratch regs at this point)
+
+        ;; Push FP and LR, and allocate stack to hold PAL_LIMITED_CONTEXT structure and VFP return value registers
+        PROLOG_SAVE_REG_PAIR    fp, lr, #-(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!
+
+        ;;
+        ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the
+        ;; IP after the call to this helper.
+        ;;
+        ;; This is very likely overkill since the calculation of the return address should only need SP and 
+        ;; LR, but this is test code, so I'm not too worried about efficiency.
+        ;;
+        ;; Setup a PAL_LIMITED_CONTEXT on the stack 
+        ;; {
+            ;; FP and LR already pushed.
+            PROLOG_NOP  stp         x0, x1, [sp, #0x10]
+            PROLOG_SAVE_REG_PAIR    x19, x20, #0x20
+            PROLOG_SAVE_REG_PAIR    x21, x22, #0x30
+            PROLOG_SAVE_REG_PAIR    x23, x24, #0x40
+            PROLOG_SAVE_REG_PAIR    x25, x26, #0x50
+            PROLOG_SAVE_REG_PAIR    x27, x28, #0x60
+            ; PROLOG_SAVE_REG macro doesn't let to use scratch reg:
+            PROLOG_NOP  str         x9, [sp, #0x78]           ; this is return address from RhpHijackForGcStress; lr is return address for it's caller
+
+        ;; } end PAL_LIMITED_CONTEXT
+
+        ;; Save VFP return value
+        stp         d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)]
+        stp         d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)]
+
+        ;; Compute and save SP at callsite.
+        add         x0, sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)   ;; +0x20 for the pushes right before the context struct
+        str         x0, [sp, #OFFSETOF__PAL_LIMITED_CONTEXT__SP]
+
+        mov         x0, sp      ; Address of PAL_LIMITED_CONTEXT
+        bl          $THREAD__HIJACKFORGCSTRESS
+
+        ;; Restore return value registers (saved in PAL_LIMITED_CONTEXT structure)
+        ldp         x0, x1, [sp, #0x10]
+
+        ;; Restore VFP return value
+        ldp         d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)]
+        ldp         d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)]
+
+        ;; Epilog
+        EPILOG_RESTORE_REG_PAIR     x19, x20, #0x20
+        EPILOG_RESTORE_REG_PAIR     x21, x22, #0x30
+        EPILOG_RESTORE_REG_PAIR     x23, x24, #0x40
+        EPILOG_RESTORE_REG_PAIR     x25, x26, #0x50
+        EPILOG_RESTORE_REG_PAIR     x27, x28, #0x60
+        EPILOG_NOP     ldr          x9, [sp, #0x78]
+        EPILOG_RESTORE_REG_PAIR     fp, lr, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!
+        EPILOG_NOP     br           x9
+
+    NESTED_END RhpHijackForGcStressLeaf
+
+#endif ;; FEATURE_GC_STRESS
+
+#if 0 // used by the binder only
+;;
+;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH 
+;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing
+;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of 
+;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the 
+;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be 
+;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the 
+;; handler in the caller.
+;; 
+;; If we are hijacked, then we jump to a routine that will unhijack appropriately and wait for the GC to
+;; complete. There are also variants for GC stress.
+;;
+;; Note that at this point we are either hijacked or we are not, and this will not change until we return to
+;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack 
+;; another thread while the target thread is suspended in managed code, and this is _not_ managed code.
+;;
+    MACRO
+        RTU_EH_JUMP_HELPER $funcName, $hijackFuncName, $isStress, $stressFuncName
+
+        LEAF_ENTRY $funcName
+            ldr         x0, =$hijackFuncName
+            cmp         x0, lr
+            beq         RhpGCProbeForEHJump
+
+            IF $isStress
+            ldr         x0, =$stressFuncName
+            cmp         x0, lr
+            beq         RhpGCStressProbeForEHJump
+            ENDIF
+
+            ;; We are not hijacked, so we can return to the handler.
+            ;; We return to keep the call/return prediction balanced.
+            mov         lr, x2  ; Update the return address
+            ret
+        LEAF_END $funcName
+    MEND
+;; We need an instance of the helper for each possible hijack function. The binder has enough
+;; information to determine which one we need to use for any function.
+    RTU_EH_JUMP_HELPER RhpEHJumpScalar,         RhpGcProbeHijack, {false}, 0
+    RTU_EH_JUMP_HELPER RhpEHJumpObject,         RhpGcProbeHijack, {false}, 0
+    RTU_EH_JUMP_HELPER RhpEHJumpByref,          RhpGcProbeHijack,  {false}, 0
+#ifdef FEATURE_GC_STRESS
+    RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijack, {true},  RhpGcStressHijack
+    RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijack, {true},  RhpGcStressHijack
+    RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress,  RhpGcProbeHijack,  {true},  RhpGcStressHijack
+#endif
+
+;;
+;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs.
+;;
+;; Register state on entry:
+;;  x0: scratch
+;;  x1: reference to the exception object.
+;;  x2: handler address we want to jump to.
+;;  Non-volatile registers are all already correct for return to the caller.
+;;  The stack is as if we are just about to returned from the call
+;;  
+;; Register state on exit:
+;;  x0: reference to the exception object
+;;  x2: thread pointer
+;;
+    MACRO
+        EHJumpProbeProlog
+
+        PROLOG_NOP mov x0, x1  ; move the ex object reference into x0 so we can report it
+        ALLOC_PROBE_FRAME 0x10, {true}
+        str         x2, [sp, #PROBE_FRAME_SIZE]
+
+        ;; x2 <- GetThread(), TRASHES x1
+        INLINE_GETTHREAD x2, x1
+        
+        ;; Recover the original return address and update the frame
+        ldr         lr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        str         lr, [sp, #OFFSETOF__PInvokeTransitionFrame__m_RIP]
+
+        ;; ClearHijackState expects thread in x2
+        ClearHijackState
+
+        ; TRASHES x1
+        INIT_PROBE_FRAME x2, x1, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_X0), PTFF_X0_IS_GCREF_HI, (PROBE_FRAME_SIZE + 8)
+        add         x1, sp, xzr
+        str         x1, [x2, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
+    MEND
+
+;;
+;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the 
+;; final jump to the handler for EH jump probe funcs.
+;;
+;; Register state on entry:
+;;  x0: reference to the exception object
+;;  x1-x3: scratch
+;;  
+;; Register state on exit:
+;;  sp: correct for return to the caller
+;;  x1: reference to the exception object
+;;
+    MACRO
+        EHJumpProbeEpilog
+
+        ldr         x2, [sp, #PROBE_FRAME_SIZE]
+        FREE_PROBE_FRAME 0x10, {true}       ; This restores exception object back into x0
+        EPILOG_NOP  mov x1, x0      ; Move the Exception object back into x1 where the catch handler expects it
+        EPILOG_NOP  br  x2
+    MEND
+
+;;
+;; We are hijacked for a normal GC (not GC stress), so we need to unhijack and wait for the GC to complete.
+;;
+;; Register state on entry:
+;;  x0: reference to the exception object.
+;;  x2: thread
+;;  Non-volatile registers are all already correct for return to the caller.
+;;  The stack is as if we have tail called to this function (lr points to return address).
+;;        
+;; Register state on exit:
+;;  x0: reference to the exception object
+;;
+    NESTED_ENTRY RhpGCProbeForEHJump
+        brk 0xf000 ;; TODO: remove after debugging/testing stub
+        EHJumpProbeProlog
+
+#ifdef _DEBUG
+        ;;
+        ;; If we get here, then we have been hijacked for a real GC, and our SyncState must
+        ;; reflect that we've been requested to synchronize.
+
+        ldr         x1, =RhpTrapThreads
+        ldr         w1, [x1]
+        tbnz        x1, #TrapThreadsFlags_TrapThreads_Bit, %0
+
+        bl          RhDebugBreak
+0
+#endif ;; _DEBUG
+
+        mov         x4, x2
+        WaitForGCCompletion
+
+        EHJumpProbeEpilog
+    NESTED_END RhpGCProbeForEHJump
+
+#ifdef FEATURE_GC_STRESS
+;;
+;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper.
+;;
+;; Register state on entry:
+;;  x1: reference to the exception object.
+;;  x2: thread
+;;  Non-volatile registers are all already correct for return to the caller.
+;;  The stack is as if we have tail called to this function (lr points to return address).
+;;        
+;; Register state on exit:
+;;  x0: reference to the exception object
+;;
+    NESTED_ENTRY RhpGCStressProbeForEHJump
+        brk 0xf000 ;; TODO: remove after debugging/testing stub
+        EHJumpProbeProlog
+
+        bl          $REDHAWKGCINTERFACE__STRESSGC
+
+        EHJumpProbeEpilog
+    NESTED_END RhpGCStressProbeForEHJump
+#endif ;; FEATURE_GC_STRESS
+#endif ;; 0
+
+#ifdef FEATURE_GC_STRESS
+;;
+;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this.
+;;
+    LEAF_ENTRY RhpSuppressGcStress
+        INLINE_GETTHREAD x9, x10
+        add         x9, x9, #OFFSETOF__Thread__m_ThreadStateFlags
+Retry
+        ldxr        w10, [x9]
+        orr         w10, w10, #TSF_SuppressGcStress
+        stxr        w11, w10, [x9]
+        cbz         w11, Success
+        b           Retry
+
+Success
+        ret
+    LEAF_END RhpSuppressGcStress
+#endif ;; FEATURE_GC_STRESS
+
+    INLINE_GETTHREAD_CONSTANT_POOL
+
+    end
+
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/GetThread.asm b/src/coreclr/src/nativeaot/Runtime/arm64/GetThread.asm
new file mode 100644
index 0000000000000..7c01e66453385
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/GetThread.asm
@@ -0,0 +1,29 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpGetThread
+;;
+;;
+;; INPUT: none
+;;
+;; OUTPUT: x9: Thread pointer
+;;
+;; MUST PRESERVE ARGUMENT REGISTERS
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    LEAF_ENTRY RhpGetThread
+        ;; x9 = GetThread(), TRASHES xip0 (which can be used as an intra-procedure-call scratch register)
+        INLINE_GETTHREAD x9, xip0
+        ret
+    LEAF_END
+FASTCALL_ENDFUNC
+
+    INLINE_GETTHREAD_CONSTANT_POOL
+
+    end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/Interlocked.S b/src/coreclr/src/nativeaot/Runtime/arm64/Interlocked.S
new file mode 100644
index 0000000000000..755b5fd3d302b
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/Interlocked.S
@@ -0,0 +1,42 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include <unixasmmacros.inc>
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg32AVLocation
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+// x0 = destination address
+// w1 = value
+// w2 = comparand
+LEAF_ENTRY RhpLockCmpXchg32, _TEXT
+    mov     x8, x0          // Save value of x0 into x8 as x0 is used for the return value
+ALTERNATE_ENTRY RhpLockCmpXchg32AVLocation
+1: // loop
+    ldaxr   w0, [x8]        // w0 = *x8
+    cmp     w0, w2 
+    bne     2f              // if (w0 != w2) goto exit
+    stlxr   w9, w1, [x8]    // if (w0 == w2) { try *x8 = w1 and goto loop if failed or goto exit }
+    cbnz    w9, 1b
+2: // exit
+    ret
+LEAF_END RhpLockCmpXchg32, _TEXT
+
+// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular:
+// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg64AVLocation
+// - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+// x0 = destination address
+// x1 = value
+// x2 = comparand
+LEAF_ENTRY RhpLockCmpXchg64, _TEXT
+    mov     x8, x0          // Save value of x0 into x8 as x0 is used for the return value
+ALTERNATE_ENTRY RhpLockCmpXchg64AVLocation
+1: // loop
+    ldaxr   x0, [x8]        // x0 = *x8
+    cmp     x0, x2 
+    bne     2f              // if (x0 != x2) goto exit
+    stlxr   w9, x1, [x8]    // if (x0 == x2) { try *x8 = x1 and goto loop if failed or goto exit }
+    cbnz    w9, 1b
+2: // exit
+    ret
+LEAF_END RhpLockCmpXchg64, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/InteropThunksHelpers.S b/src/coreclr/src/nativeaot/Runtime/arm64/InteropThunksHelpers.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/InteropThunksHelpers.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/InteropThunksHelpers.asm b/src/coreclr/src/nativeaot/Runtime/arm64/InteropThunksHelpers.asm
new file mode 100644
index 0000000000000..bd9cbb4e882c1
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/InteropThunksHelpers.asm
@@ -0,0 +1,91 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+
+#include "ksarm64.h"
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  DATA SECTIONS  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+__tls_array                         equ 0x58    ;; offsetof(TEB, ThreadLocalStoragePointer)
+
+POINTER_SIZE                        equ 0x08
+
+;; TLS variables
+    AREA    |.tls$|, DATA
+ThunkParamSlot % 0x8
+
+    TEXTAREA
+
+    EXTERN _tls_index
+
+    ;; Section relocs are 32 bits. Using an extra DCD initialized to zero for 8-byte alignment.
+__SECTIONREL_ThunkParamSlot
+        DCD ThunkParamSlot
+        RELOC 8, ThunkParamSlot      ;; SECREL
+        DCD 0
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+    ;;
+    ;; RhCommonStub
+    ;;
+    ;;  INPUT: xip0: thunk's data block
+    ;;
+    ;;  TRASHES: x9, x10, x11, xip0
+    ;;
+    LEAF_ENTRY RhCommonStub
+        ;; There are arbitrary callers passing arguments with arbitrary signatures.
+        ;; Custom calling convention:
+        ;;      xip0 pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers)
+        
+        ;; Save context data into the ThunkParamSlot thread-local variable
+        ;; A pointer to the delegate and function pointer for open static delegate should have been saved in the thunk's context cell during thunk allocation
+        ldr         x10, =_tls_index
+        ldr         w10, [x10]
+        ldr         x9, [xpr, #__tls_array]
+        ldr         x9, [x9, x10 lsl #3]     ;; x9 <- our TLS base
+       
+        ;; x9  = base address of TLS data
+        ;; x10 = trashed
+        ;; xip0 = address of context cell in thunk's data
+
+        ;; store thunk address in thread static
+        ldr         x10, [xip0]
+        ldr         x11, =__SECTIONREL_ThunkParamSlot
+        ldr         x11, [x11]
+        str         x10, [x9, x11]            ;; ThunkParamSlot <- context slot data
+        
+        ;; Now load the target address and jump to it.
+        ldr         xip0, [xip0, #POINTER_SIZE]
+        br          xip0
+
+    LEAF_END RhCommonStub
+
+    ;;
+    ;; IntPtr RhGetCommonStubAddress()
+    ;;
+    LEAF_ENTRY RhGetCommonStubAddress
+        ldr     x0, =RhCommonStub
+        ret
+    LEAF_END RhGetCommonStubAddress
+
+
+    ;;
+    ;; IntPtr RhGetCurrentThunkContext()
+    ;;
+    LEAF_ENTRY RhGetCurrentThunkContext
+
+        ldr         x1, =_tls_index
+        ldr         w1, [x1]
+        ldr         x0, [xpr, #__tls_array]
+        ldr         x0, [x0, x1 lsl #3]     ;; x0 <- our TLS base
+
+        ldr         x1, =__SECTIONREL_ThunkParamSlot
+        ldr         x1, [x1]
+        ldr         x0, [x0, x1]            ;; x0 <- ThunkParamSlot
+
+        ret
+
+    LEAF_END RhGetCurrentThunkContext
+
+    END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/MiscStubs.S b/src/coreclr/src/nativeaot/Runtime/arm64/MiscStubs.S
new file mode 100644
index 0000000000000..53616c2269615
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/MiscStubs.S
@@ -0,0 +1,2 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/MiscStubs.asm b/src/coreclr/src/nativeaot/Runtime/arm64/MiscStubs.asm
new file mode 100644
index 0000000000000..85c5d1e2ffd34
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/MiscStubs.asm
@@ -0,0 +1,244 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+    EXTERN memcpy
+    EXTERN memcpyGCRefs
+    EXTERN memcpyGCRefsWithWriteBarrier
+    EXTERN memcpyAnyWithWriteBarrier
+    EXTERN GetClasslibCCtorCheck
+
+    TEXTAREA
+
+;;
+;; Checks whether the static class constructor for the type indicated by the context structure has been
+;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will
+;; execute the cctor and update the context to record this fact.
+;;
+;;  Input:
+;;      x0 : Address of StaticClassConstructionContext structure
+;;
+;;  Output:
+;;      All volatile registers and the condition codes may be trashed.
+;;
+    LEAF_ENTRY RhpCheckCctor
+
+        ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the
+        ;; initial state is 0 and the remaining values are reserved for classlib use). This check is
+        ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for
+        ;; synchronizing with other threads and re-checking the value.
+        ldr     w12, [x0, #OFFSETOF__StaticClassConstructionContext__m_initialized]
+        cmp     w12, #1
+        bne     RhpCheckCctor__SlowPath
+        ret
+RhpCheckCctor__SlowPath
+        mov     x1, x0
+        b       RhpCheckCctor2 ; tail-call the check cctor helper that actually has an implementation to call
+                               ; the cctor
+
+    LEAF_END RhpCheckCctor
+
+;;
+;; Checks whether the static class constructor for the type indicated by the context structure has been
+;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will
+;; execute the cctor and update the context to record this fact.
+;;
+;;  Input:
+;;      x0 : Value that must be preserved in this register across the cctor check.
+;;      x1 : Address of StaticClassConstructionContext structure
+;;
+;;  Output:
+;;      All volatile registers other than x0 may be trashed and the condition codes may also be trashed.
+;;
+    LEAF_ENTRY RhpCheckCctor2
+
+        ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the
+        ;; initial state is 0 and the remaining values are reserved for classlib use). This check is
+        ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for
+        ;; synchronizing with other threads and re-checking the value.
+        ldr     w12, [x1, #OFFSETOF__StaticClassConstructionContext__m_initialized]
+        cmp     w12, #1
+        bne     RhpCheckCctor2__SlowPath
+        ret
+
+    LEAF_END RhpCheckCctor2
+
+;;
+;; Slow path helper for RhpCheckCctor.
+;;
+;;  Input:
+;;      x0 : Value that must be preserved in this register across the cctor check.
+;;      x1 : Address of StaticClassConstructionContext structure
+;;
+;;  Output:
+;;      All volatile registers other than x0 may be trashed and the condition codes may also be trashed.
+;;
+    NESTED_ENTRY RhpCheckCctor2__SlowPath
+
+        ;; Need to preserve x0, x1 and lr across helper call. fp is also pushed to keep the stack 16 byte aligned.
+        PROLOG_SAVE_REG_PAIR fp, lr, #-0x20!
+        stp     x0, x1, [sp, #0x10]
+
+        ;; Call a C++ helper to retrieve the address of the classlib callback. The caller's return address is
+        ;; passed as the argument to the helper; it's an address in the module and is used by the helper to
+        ;; locate the classlib.
+        mov     x0, lr
+        bl      GetClasslibCCtorCheck
+
+        ;; X0 now contains the address of the classlib method to call. The single argument is the context
+        ;; structure address currently in stashed on the stack. Clean up and tail call to the classlib
+        ;; callback so we're not on the stack should a GC occur (so we don't need to worry about transition
+        ;; frames).
+        mov     x12, x0
+        ldp     x0, x1, [sp, #0x10]
+        EPILOG_RESTORE_REG_PAIR fp, lr, #0x20!
+        ;; tail-call the class lib cctor check function. This function is required to return its first
+        ;; argument, so that x0 can be preserved.
+        EPILOG_NOP br x12
+
+    NESTED_END RhpCheckCctor__SlowPath2
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* RhpCopyMultibyteNoGCRefs(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;;
+
+    LEAF_ENTRY    RhpCopyMultibyteNoGCRefs
+
+        ; x0    dest
+        ; x1    src
+        ; x2    count
+
+        cbz     x2, NothingToCopy_NoGCRefs  ; check for a zero-length copy
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+    ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsDestAVLocation
+        ldrb    wzr, [x0]
+    ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsSrcAVLocation
+        ldrb    wzr, [x1]
+
+        ; tail-call to plain-old-memcpy
+        b       memcpy
+
+NothingToCopy_NoGCRefs
+        ; dest is already in x0
+        ret
+
+    LEAF_END
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* RhpCopyMultibyte(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;;
+
+    LEAF_ENTRY    RhpCopyMultibyte
+
+        ; x0    dest
+        ; x1    src
+        ; x2    count
+
+        ; check for a zero-length copy
+        cbz     x2, NothingToCopy_RhpCopyMultibyte
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+    ALTERNATE_ENTRY RhpCopyMultibyteDestAVLocation
+        ldrb    wzr, [x0]
+    ALTERNATE_ENTRY RhpCopyMultibyteSrcAVLocation
+        ldrb    wzr, [x1]
+
+        ; tail-call to the GC-safe memcpy implementation
+        b       memcpyGCRefs
+
+NothingToCopy_RhpCopyMultibyte
+        ; dest is already still in x0
+        ret
+
+    LEAF_END
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* RhpCopyMultibyteWithWriteBarrier(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;; Runs a card table update via RhpBulkWriteBarrier after the copy
+;;
+
+    LEAF_ENTRY    RhpCopyMultibyteWithWriteBarrier
+
+        ; x0    dest
+        ; x1    src
+        ; x2    count
+
+        ; check for a zero-length copy
+        cbz     x2, NothingToCopy_RhpCopyMultibyteWithWriteBarrier
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+    ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierDestAVLocation
+        ldrb    wzr, [x0]
+    ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierSrcAVLocation
+        ldrb    wzr, [x1]
+
+        ; tail-call to the GC-safe memcpy implementation
+        b       memcpyGCRefsWithWriteBarrier
+
+NothingToCopy_RhpCopyMultibyteWithWriteBarrier
+        ; dest is already still in x0
+        ret
+    LEAF_END
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* RhpCopyAnyWithWriteBarrier(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;; Runs a card table update via RhpBulkWriteBarrier after the copy if it contained GC pointers
+;;
+
+    LEAF_ENTRY    RhpCopyAnyWithWriteBarrier
+
+        ; x0    dest
+        ; x1    src
+        ; x2    count
+
+        ; check for a zero-length copy
+        cbz     x2, NothingToCopy_RhpCopyAnyWithWriteBarrier
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+    ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierDestAVLocation
+        ldrb    wzr, [x0]
+    ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierSrcAVLocation
+        ldrb    wzr, [x1]
+
+        ; tail-call to the GC-safe memcpy implementation
+        b       memcpyAnyWithWriteBarrier
+
+NothingToCopy_RhpCopyAnyWithWriteBarrier
+        ; dest is already still in x0
+        ret
+
+    LEAF_END
+
+    end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.S b/src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.asm b/src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.asm
new file mode 100644
index 0000000000000..e4db7d65cb4f3
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.asm
@@ -0,0 +1,301 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+        TEXTAREA
+
+        IMPORT RhpReversePInvokeBadTransition
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpWaitForSuspend -- rare path for RhpPInvoke and RhpReversePInvokeReturn
+;;
+;;
+;; INPUT: none
+;;
+;; TRASHES: none
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpWaitForSuspend
+
+        ;; FP and LR registers
+        PROLOG_SAVE_REG_PAIR   fp, lr, #-0xA0!            ;; Push down stack pointer and store FP and LR
+
+        ;; Need to save argument registers x0-x7 and the return buffer register x8
+        ;; Also save x9 which may be used for saving indirect call target
+        stp         x0, x1, [sp, #0x10]
+        stp         x2, x3, [sp, #0x20]
+        stp         x4, x5, [sp, #0x30]
+        stp         x6, x7, [sp, #0x40]
+        stp         x8, x9, [sp, #0x50]
+
+        ;; Save float argument registers as well since they're volatile
+        stp         d0, d1, [sp, #0x60]
+        stp         d2, d3, [sp, #0x70]
+        stp         d4, d5, [sp, #0x80]
+        stp         d6, d7, [sp, #0x90]
+
+        bl          RhpWaitForSuspend2
+
+        ;; Restore floating point registers
+        ldp            d0, d1, [sp, #0x60]
+        ldp            d2, d3, [sp, #0x70]
+        ldp            d4, d5, [sp, #0x80]
+        ldp            d6, d7, [sp, #0x90]
+
+        ;; Restore the argument registers
+        ldp            x0, x1, [sp, #0x10]
+        ldp            x2, x3, [sp, #0x20]
+        ldp            x4, x5, [sp, #0x30]
+        ldp            x6, x7, [sp, #0x40]
+        ldp            x8, x9, [sp, #0x50]
+
+        ;; Restore FP and LR registers, and free the allocated stack block
+        EPILOG_RESTORE_REG_PAIR   fp, lr, #0xA0!
+        EPILOG_RETURN
+
+    NESTED_END RhpWaitForSuspend
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpWaitForGCNoAbort
+;;
+;;
+;; INPUT: x9: transition frame
+;;
+;; TRASHES: None
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpWaitForGCNoAbort
+
+        ;; FP and LR registers
+        PROLOG_SAVE_REG_PAIR   fp, lr, #-0x40!            ;; Push down stack pointer and store FP and LR
+
+        ;; Save the integer return registers, as well as the floating return registers
+        stp         x0, x1, [sp, #0x10]
+        stp         d0, d1, [sp, #0x20]
+        stp         d2, d3, [sp, #0x30]
+
+        ldr         x0, [x9, #OFFSETOF__PInvokeTransitionFrame__m_pThread]
+        ldr         w0, [x0, #OFFSETOF__Thread__m_ThreadStateFlags]
+        tbnz        x0, #TSF_DoNotTriggerGc_Bit, Done
+
+        mov         x0, x9      ; passing transition frame in x0
+        bl          RhpWaitForGC2
+
+Done
+        ldp         x0, x1, [sp, #0x10]
+        ldp         d0, d1, [sp, #0x20]
+        ldp         d2, d3, [sp, #0x30]
+        EPILOG_RESTORE_REG_PAIR   fp, lr, #0x40!
+        EPILOG_RETURN
+
+    NESTED_END RhpWaitForGCNoAbort
+
+    EXTERN RhpThrowHwEx
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpWaitForGC
+;;
+;;
+;; INPUT: x9: transition frame
+;;
+;; TRASHES: x0, x1, x10
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpWaitForGC
+
+        PROLOG_SAVE_REG_PAIR    fp, lr, #-0x10!
+
+        ldr         x10, =RhpTrapThreads
+        ldr         w10, [x10]
+        tbz         x10, #TrapThreadsFlags_TrapThreads_Bit, NoWait
+        bl          RhpWaitForGCNoAbort
+NoWait
+        tbz         x10, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort
+        ldr         x10, [x9, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
+        tbz         x10, #PTFF_THREAD_ABORT_BIT, NoAbort
+
+        EPILOG_RESTORE_REG_PAIR fp, lr, #0x10!
+        EPILOG_NOP  mov w0, #STATUS_REDHAWK_THREAD_ABORT
+        EPILOG_NOP  mov x1, lr          ; hijack target address as exception PC
+        EPILOG_NOP  b RhpThrowHwEx        
+
+NoAbort
+        EPILOG_RESTORE_REG_PAIR fp, lr, #0x10!
+        EPILOG_RETURN
+
+    NESTED_END RhpWaitForGC
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpReversePInvoke
+;;
+;; IN:  x9: address of reverse pinvoke frame
+;;                  0: save slot for previous M->U transition frame
+;;                  8: save slot for thread pointer to avoid re-calc in epilog sequence
+;;
+;; PRESERVES: x0 - x8 -- need to preserve these because the caller assumes they aren't trashed
+;;
+;; TRASHES:   x10, x11
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    LEAF_ENTRY RhpReversePInvoke
+
+        INLINE_GETTHREAD x10, x11    ; x10 = Thread, x11 trashed
+        str         x10, [x9, #8]    ; save Thread pointer for RhpReversePInvokeReturn
+
+        ;; x9 = reverse pinvoke frame
+        ;; x10 = thread
+        ;; x11 = scratch
+
+        ldr         w11, [x10, #OFFSETOF__Thread__m_ThreadStateFlags]
+        tbz         x11, #TSF_Attached_Bit, AttachThread
+
+ThreadAttached
+        ;;
+        ;; Check for the correct mode.  This is accessible via various odd things that we cannot completely 
+        ;; prevent such as :
+        ;;     1) Registering a reverse pinvoke entrypoint as a vectored exception handler
+        ;;     2) Performing a managed delegate invoke on a reverse pinvoke delegate.
+        ;;
+        ldr         x11, [x10, #OFFSETOF__Thread__m_pTransitionFrame]
+        cbz         x11, CheckBadTransition
+
+        ;; Save previous TransitionFrame prior to making the mode transition so that it is always valid 
+        ;; whenever we might attempt to hijack this thread.
+        str         x11, [x9]
+
+        str         xzr, [x10, #OFFSETOF__Thread__m_pTransitionFrame] 
+        dmb         ish
+
+        ldr         x11, =RhpTrapThreads
+        ldr         w11, [x11]
+        tbnz        x11, #TrapThreadsFlags_TrapThreads_Bit, TrapThread
+
+        ret
+
+CheckBadTransition
+        ;; Allow 'bad transitions' in when the TSF_DoNotTriggerGc mode is set.  This allows us to have 
+        ;; [UnmanagedCallersOnly] methods that are called via the "restricted GC callouts" as well as from native,
+        ;; which is necessary because the methods are CCW vtable methods on interfaces passed to native.
+        ldr         w11, [x10, #OFFSETOF__Thread__m_ThreadStateFlags]
+        tbz         x11, #TSF_DoNotTriggerGc_Bit, BadTransition
+
+        ;; zero-out our 'previous transition frame' save slot
+        mov         x11, #0
+        str         x11, [x9]
+
+        ;; nothing more to do
+        ret
+
+TrapThread
+        ;; put the previous frame back (sets us back to preemptive mode)
+        ldr         x11, [x9]
+        str         x11, [x10, #OFFSETOF__Thread__m_pTransitionFrame] 
+        dmb         ish
+
+AttachThread
+        ; passing address of reverse pinvoke frame in x9
+        b           RhpReversePInvokeAttachOrTrapThread
+
+BadTransition
+        mov         x0, lr  ; arg <- return address
+        b           RhpReversePInvokeBadTransition
+
+    LEAF_END RhpReversePInvoke
+
+    INLINE_GETTHREAD_CONSTANT_POOL
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke
+;;
+;;
+;; INPUT: x9: address of reverse pinvoke frame
+;;
+;; PRESERVES: x0-x8 -- need to preserve these because the caller assumes they aren't trashed
+;;
+;; TRASHES: none
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread
+
+        ;; FP and LR registers
+        PROLOG_SAVE_REG_PAIR   fp, lr, #-0xA0!            ;; Push down stack pointer and store FP and LR
+
+        ;; Need to save argument registers x0-x7 and the return buffer register x8 (twice for 16B alignment)
+        stp         x0, x1, [sp, #0x10]
+        stp         x2, x3, [sp, #0x20]
+        stp         x4, x5, [sp, #0x30]
+        stp         x6, x7, [sp, #0x40]
+        stp         x8, x8, [sp, #0x50]
+
+        ;; Save float argument registers as well since they're volatile
+        stp         d0, d1, [sp, #0x60]
+        stp         d2, d3, [sp, #0x70]
+        stp         d4, d5, [sp, #0x80]
+        stp         d6, d7, [sp, #0x90]
+
+        mov         x0, x9         ; passing reverse pinvoke frame pointer in x0
+        bl          RhpReversePInvokeAttachOrTrapThread2
+
+        ;; Restore floating point registers
+        ldp         d0, d1, [sp, #0x60]
+        ldp         d2, d3, [sp, #0x70]
+        ldp         d4, d5, [sp, #0x80]
+        ldp         d6, d7, [sp, #0x90]
+
+        ;; Restore the argument registers
+        ldp         x0, x1, [sp, #0x10]
+        ldp         x2, x3, [sp, #0x20]
+        ldp         x4, x5, [sp, #0x30]
+        ldp         x6, x7, [sp, #0x40]
+        ldr         x8, [sp, #0x50] 
+
+        ;; Restore FP and LR registers, and free the allocated stack block
+        EPILOG_RESTORE_REG_PAIR   fp, lr, #0xA0!
+        EPILOG_RETURN
+
+    NESTED_END RhpReversePInvokeTrapThread
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpReversePInvokeReturn
+;;
+;; IN:  x9: address of reverse pinvoke frame
+;;                  0: save slot for previous M->U transition frame
+;;                  8: save slot for thread pointer to avoid re-calc in epilog sequence
+;;
+;; TRASHES:     x10, x11
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+    LEAF_ENTRY RhpReversePInvokeReturn
+
+        ldp         x10, x11, [x9]
+
+        ;; x10: previous M->U transition frame
+        ;; x11: thread pointer
+
+        str         x10, [x11, #OFFSETOF__Thread__m_pTransitionFrame] 
+        dmb         ish
+
+        ldr         x10, =RhpTrapThreads
+        ldr         w10, [x10]
+        tbnz        x10, #TrapThreadsFlags_TrapThreads_Bit, RareTrapThread
+
+        ret
+
+RareTrapThread
+        b           RhpWaitForSuspend
+
+    LEAF_END RhpReversePInvokeReturn
+
+    INLINE_GETTHREAD_CONSTANT_POOL
+
+    end
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.S b/src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.S
new file mode 100644
index 0000000000000..01ed602a761cf
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.S
@@ -0,0 +1,6 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include <unixasmmacros.inc>
+
+// TODO: Implement Arm64 support
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.asm b/src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.asm
new file mode 100644
index 0000000000000..956bcdb4d013f
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.asm
@@ -0,0 +1,116 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+    TEXTAREA
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+    EXTERN RhpCidResolve
+    EXTERN RhpUniversalTransition_DebugStepTailCall
+
+    ;; Macro that generates code to check a single cache entry.
+    MACRO
+        CHECK_CACHE_ENTRY $entry
+        ;; Check a single entry in the cache.
+        ;;  x9   : Cache data structure. Also used for target address jump.
+        ;;  x10  : Instance EEType*
+        ;;  x11  : Trashed
+        ldr     x11, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 16))]
+        cmp     x10, x11
+        bne     %ft0    ;; Jump to label '0'
+        ldr     x9, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + ($entry * 16) + 8)]
+        br      x9
+0   ;; Label '0'
+    MEND
+
+
+;;
+;; Macro that generates a stub consuming a cache with the given number of entries.
+;;
+    GBLS StubName
+
+    MACRO
+        DEFINE_INTERFACE_DISPATCH_STUB $entries
+
+StubName    SETS    "RhpInterfaceDispatch$entries"
+
+    NESTED_ENTRY $StubName
+
+        ;; xip1 currently holds the indirection cell address. We need to get the cache structure instead.
+        ldr     x9, [xip1, #OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        ;; Load the EEType from the object instance in x0.
+        ldr     x10, [x0]
+
+    GBLA CurrentEntry 
+CurrentEntry SETA 0
+
+    WHILE CurrentEntry < $entries
+        CHECK_CACHE_ENTRY CurrentEntry
+CurrentEntry SETA CurrentEntry + 1
+    WEND
+
+        ;; xip1 still contains the indirection cell address.
+        b RhpInterfaceDispatchSlow
+
+    NESTED_END $StubName
+
+    MEND
+
+;;
+;; Define all the stub routines we currently need.
+;;
+    DEFINE_INTERFACE_DISPATCH_STUB 1
+    DEFINE_INTERFACE_DISPATCH_STUB 2
+    DEFINE_INTERFACE_DISPATCH_STUB 4
+    DEFINE_INTERFACE_DISPATCH_STUB 8
+    DEFINE_INTERFACE_DISPATCH_STUB 16
+    DEFINE_INTERFACE_DISPATCH_STUB 32
+    DEFINE_INTERFACE_DISPATCH_STUB 64
+
+
+;;
+;; Initial dispatch on an interface when we don't have a cache yet.
+;;
+    LEAF_ENTRY RhpInitialInterfaceDispatch
+        ;; Just tail call to the cache miss helper.
+        b RhpInterfaceDispatchSlow
+    LEAF_END RhpInitialInterfaceDispatch
+
+;;
+;; Stub dispatch routine for dispatch to a vtable slot
+;;
+    LEAF_ENTRY RhpVTableOffsetDispatch
+        ;; xip1 has the interface dispatch cell address in it. 
+        ;; load x12 to point to the vtable offset (which is stored in the m_pCache field).
+        ldr     x12, [xip1, #OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        ;; Load the EEType from the object instance in x0, and add it to the vtable offset
+        ;; to get the address in the vtable of what we want to dereference
+        ldr     x13, [x0]
+        add     x12, x12, x13
+
+        ;; Load the target address of the vtable into x12
+        ldr     x12, [x12]
+
+        br      x12
+    LEAF_END RhpVTableOffsetDispatch
+
+;;
+;; Cache miss case, call the runtime to resolve the target and update the cache.
+;;
+    LEAF_ENTRY RhpInterfaceDispatchSlow
+    ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
+        ;; xip1 has the interface dispatch cell address in it. 
+        ;; Calling convention of the universal thunk is:
+        ;;  xip0: contains target address for the thunk to call
+        ;;  xip1: contains parameter of the thunk's target
+        ldr     xip0, =RhpCidResolve
+        b       RhpUniversalTransition_DebugStepTailCall
+    LEAF_END RhpInterfaceDispatchSlow
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
+
+    END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/ThunkPoolThunks.asm b/src/coreclr/src/nativeaot/Runtime/arm64/ThunkPoolThunks.asm
new file mode 100644
index 0000000000000..5306cf92ee27b
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/ThunkPoolThunks.asm
@@ -0,0 +1,334 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "ksarm64.h"
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  STUBS & DATA SECTIONS  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+THUNK_CODESIZE                      equ 0x10    ;; 3 instructions, 4 bytes each (and we also have 4 bytes of padding)
+THUNK_DATASIZE                      equ 0x10    ;; 2 qwords
+
+THUNK_POOL_NUM_THUNKS_PER_PAGE      equ 0xFA    ;; 250 thunks per page
+
+POINTER_SIZE                        equ 0x08
+
+    MACRO 
+        NAMED_READONLY_DATA_SECTION $name, $areaAlias
+        AREA    $areaAlias,DATA,READONLY
+RO$name % 8
+    MEND
+    
+    ;; This macro is used to declare the thunks data blocks. Unlike the macro above (which is just used for padding),
+    ;; this macro needs to assign labels to each data block, so we can address them using PC-relative addresses.
+    MACRO 
+        NAMED_READWRITE_DATA_SECTION $name, $areaAlias, $pageIndex
+        AREA    $areaAlias,DATA
+        THUNKS_DATA_PAGE_BLOCK $pageIndex
+    MEND
+
+    MACRO 
+        LOAD_DATA_ADDRESS $groupIndex, $index, $pageIndex
+        
+        ;; Set xip0 to the address of the current thunk's data block. This is done using labels.
+        adr      xip0, label_$groupIndex_$index_P$pageIndex
+    MEND
+
+    MACRO 
+        JUMP_TO_COMMON $groupIndex, $index
+        ;; start                                        : xip0 points to the current thunks first data cell in the data page
+        ;; set xip0 to begining of data page            : xip0 <- xip0 - (THUNK_DATASIZE * current thunk's index)
+        ;; fix offset to point to last QWROD in page    : xip1 <- [xip0 + PAGE_SIZE - POINTER_SIZE]
+        ;; tailcall to the location pointed at by the last qword in the data page
+        ldr      xip1, [xip0, #(PAGE_SIZE - POINTER_SIZE - ($groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * $index))]
+        br       xip1
+
+        brk     0xf000      ;; Stubs need to be 16-byte aligned for CFG table. Filling padding with a 
+                            ;; deterministic brk instruction, instead of having it just filled with zeros.
+    MEND
+
+    MACRO
+        THUNK_LABELED_DATA_BLOCK $groupIndex, $index, $pageIndex
+
+        ;; Each data block contains 2 qword cells. The data block is also labeled so it can be addressed
+        ;; using PC relative instructions
+label_$groupIndex_$index_P$pageIndex
+        DCQ 0
+        DCQ 0
+    MEND
+
+    MACRO 
+        TenThunks $groupIndex, $pageIndex
+
+        ;; Each thunk will load the address of its corresponding data (from the page that immediately follows)
+        ;; and call a common stub. The address of the common stub is setup by the caller (last qword
+        ;; in the thunks data section) depending on the 'kind' of thunks needed (interop, fat function pointers, etc...)
+        
+        ;; Each data block used by a thunk consists of two qword values:
+        ;;      - Context: some value given to the thunk as context. Example for fat-fptrs: context = generic dictionary
+        ;;      - Target : target code that the thunk eventually jumps to.
+
+        LOAD_DATA_ADDRESS $groupIndex,0,$pageIndex
+        JUMP_TO_COMMON    $groupIndex,0
+
+        LOAD_DATA_ADDRESS $groupIndex,1,$pageIndex
+        JUMP_TO_COMMON    $groupIndex,1
+
+        LOAD_DATA_ADDRESS $groupIndex,2,$pageIndex
+        JUMP_TO_COMMON    $groupIndex,2
+
+        LOAD_DATA_ADDRESS $groupIndex,3,$pageIndex
+        JUMP_TO_COMMON    $groupIndex,3
+
+        LOAD_DATA_ADDRESS $groupIndex,4,$pageIndex
+        JUMP_TO_COMMON    $groupIndex,4
+
+        LOAD_DATA_ADDRESS $groupIndex,5,$pageIndex
+        JUMP_TO_COMMON    $groupIndex,5
+
+        LOAD_DATA_ADDRESS $groupIndex,6,$pageIndex
+        JUMP_TO_COMMON    $groupIndex,6
+
+        LOAD_DATA_ADDRESS $groupIndex,7,$pageIndex
+        JUMP_TO_COMMON    $groupIndex,7
+
+        LOAD_DATA_ADDRESS $groupIndex,8,$pageIndex
+        JUMP_TO_COMMON    $groupIndex,8
+
+        LOAD_DATA_ADDRESS $groupIndex,9,$pageIndex
+        JUMP_TO_COMMON    $groupIndex,9
+    MEND
+    
+    MACRO 
+        TenThunkDataBlocks $groupIndex, $pageIndex
+
+        ;; Similar to the thunks stubs block, we declare the thunks data blocks here
+
+        THUNK_LABELED_DATA_BLOCK $groupIndex, 0, $pageIndex
+        THUNK_LABELED_DATA_BLOCK $groupIndex, 1, $pageIndex
+        THUNK_LABELED_DATA_BLOCK $groupIndex, 2, $pageIndex
+        THUNK_LABELED_DATA_BLOCK $groupIndex, 3, $pageIndex
+        THUNK_LABELED_DATA_BLOCK $groupIndex, 4, $pageIndex
+        THUNK_LABELED_DATA_BLOCK $groupIndex, 5, $pageIndex
+        THUNK_LABELED_DATA_BLOCK $groupIndex, 6, $pageIndex
+        THUNK_LABELED_DATA_BLOCK $groupIndex, 7, $pageIndex
+        THUNK_LABELED_DATA_BLOCK $groupIndex, 8, $pageIndex
+        THUNK_LABELED_DATA_BLOCK $groupIndex, 9, $pageIndex
+    MEND
+    
+    MACRO 
+        THUNKS_PAGE_BLOCK $pageIndex
+        
+        TenThunks 0, $pageIndex
+        TenThunks 1, $pageIndex
+        TenThunks 2, $pageIndex
+        TenThunks 3, $pageIndex
+        TenThunks 4, $pageIndex
+        TenThunks 5, $pageIndex 
+        TenThunks 6, $pageIndex 
+        TenThunks 7, $pageIndex 
+        TenThunks 8, $pageIndex 
+        TenThunks 9, $pageIndex 
+        TenThunks 10, $pageIndex
+        TenThunks 11, $pageIndex 
+        TenThunks 12, $pageIndex 
+        TenThunks 13, $pageIndex 
+        TenThunks 14, $pageIndex 
+        TenThunks 15, $pageIndex 
+        TenThunks 16, $pageIndex 
+        TenThunks 17, $pageIndex 
+        TenThunks 18, $pageIndex 
+        TenThunks 19, $pageIndex 
+        TenThunks 20, $pageIndex
+        TenThunks 21, $pageIndex
+        TenThunks 22, $pageIndex
+        TenThunks 23, $pageIndex
+        TenThunks 24, $pageIndex
+    MEND
+
+    MACRO 
+        THUNKS_DATA_PAGE_BLOCK $pageIndex
+        
+        TenThunkDataBlocks 0, $pageIndex
+        TenThunkDataBlocks 1, $pageIndex
+        TenThunkDataBlocks 2, $pageIndex
+        TenThunkDataBlocks 3, $pageIndex
+        TenThunkDataBlocks 4, $pageIndex
+        TenThunkDataBlocks 5, $pageIndex 
+        TenThunkDataBlocks 6, $pageIndex 
+        TenThunkDataBlocks 7, $pageIndex 
+        TenThunkDataBlocks 8, $pageIndex 
+        TenThunkDataBlocks 9, $pageIndex 
+        TenThunkDataBlocks 10, $pageIndex 
+        TenThunkDataBlocks 11, $pageIndex 
+        TenThunkDataBlocks 12, $pageIndex 
+        TenThunkDataBlocks 13, $pageIndex 
+        TenThunkDataBlocks 14, $pageIndex 
+        TenThunkDataBlocks 15, $pageIndex 
+        TenThunkDataBlocks 16, $pageIndex 
+        TenThunkDataBlocks 17, $pageIndex 
+        TenThunkDataBlocks 18, $pageIndex 
+        TenThunkDataBlocks 19, $pageIndex 
+        TenThunkDataBlocks 20, $pageIndex
+        TenThunkDataBlocks 21, $pageIndex
+        TenThunkDataBlocks 22, $pageIndex
+        TenThunkDataBlocks 23, $pageIndex
+        TenThunkDataBlocks 24, $pageIndex
+    MEND
+
+
+    ;;
+    ;; The first thunks section should be 64K aligned because it can get
+    ;; mapped multiple  times in memory, and mapping works on allocation
+    ;; granularity boundaries (we don't want to map more than what we need)
+    ;;
+    ;; The easiest way to do so is by having the thunks section at the 
+    ;; first 64K aligned virtual address in the binary. We provide a section
+    ;; layout file to the linker to tell it how to layout the thunks sections
+    ;; that we care about. (ndp\rh\src\runtime\DLLs\app\mrt100_app_sectionlayout.txt)
+    ;;
+    ;; The PE spec says images cannot have gaps between sections (other 
+    ;; than what is required by the section alignment value in the header),
+    ;; therefore we need a couple of padding data sections (otherwise the
+    ;; OS will not load the image).
+    ;;
+
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment0, "|.pad0|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment1, "|.pad1|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment2, "|.pad2|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment3, "|.pad3|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment4, "|.pad4|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment5, "|.pad5|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment6, "|.pad6|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment7, "|.pad7|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment8, "|.pad8|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment9, "|.pad9|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment10, "|.pad10|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment11, "|.pad11|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment12, "|.pad12|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment13, "|.pad13|"
+    NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment14, "|.pad14|"
+
+    ;;
+    ;; Declaring all the data section first since they have labels referenced by the stubs sections, to prevent 
+    ;; compilation errors ("undefined symbols"). The stubs/data sections will be correctly laid out in the image 
+    ;; using using the explicit layout configurations (ndp\rh\src\runtime\DLLs\mrt100_sectionlayout.txt)
+    ;;
+    NAMED_READWRITE_DATA_SECTION ThunkData0, "|.tkd0|", 0
+    NAMED_READWRITE_DATA_SECTION ThunkData1, "|.tkd1|", 1
+    NAMED_READWRITE_DATA_SECTION ThunkData2, "|.tkd2|", 2
+    NAMED_READWRITE_DATA_SECTION ThunkData3, "|.tkd3|", 3
+    NAMED_READWRITE_DATA_SECTION ThunkData4, "|.tkd4|", 4
+    NAMED_READWRITE_DATA_SECTION ThunkData5, "|.tkd5|", 5
+    NAMED_READWRITE_DATA_SECTION ThunkData6, "|.tkd6|", 6
+    NAMED_READWRITE_DATA_SECTION ThunkData7, "|.tkd7|", 7
+
+    ;;
+    ;; Thunk Stubs
+    ;; NOTE: Keep number of blocks in sync with macro/constant named 'NUM_THUNK_BLOCKS' in:
+    ;;      - ndp\FxCore\src\System.Private.CoreLib\System\Runtime\InteropServices\ThunkPool.cs
+    ;;      - ndp\rh\src\tools\rhbind\zapimage.h
+    ;;
+
+    LEAF_ENTRY ThunkPool, "|.tks0|"
+        THUNKS_PAGE_BLOCK 0
+    LEAF_END ThunkPool
+
+    LEAF_ENTRY ThunkPool1, "|.tks1|"
+        THUNKS_PAGE_BLOCK 1
+    LEAF_END ThunkPool1
+
+    LEAF_ENTRY ThunkPool2, "|.tks2|"
+        THUNKS_PAGE_BLOCK 2
+    LEAF_END ThunkPool2
+
+    LEAF_ENTRY ThunkPool3, "|.tks3|"
+        THUNKS_PAGE_BLOCK 3
+    LEAF_END ThunkPool3
+
+    LEAF_ENTRY ThunkPool4, "|.tks4|"
+        THUNKS_PAGE_BLOCK 4
+    LEAF_END ThunkPool4
+
+    LEAF_ENTRY ThunkPool5, "|.tks5|"
+        THUNKS_PAGE_BLOCK 5
+    LEAF_END ThunkPool5
+
+    LEAF_ENTRY ThunkPool6, "|.tks6|"
+        THUNKS_PAGE_BLOCK 6
+    LEAF_END ThunkPool6
+    
+    LEAF_ENTRY ThunkPool7, "|.tks7|"
+        THUNKS_PAGE_BLOCK 7
+    LEAF_END ThunkPool7
+
+    
+    ;;
+    ;; IntPtr RhpGetThunksBase()
+    ;;
+    ;; ARM64TODO: There is a bug in the arm64 assembler which ends up with mis-sorted Pdata entries
+    ;; for the functions in this file.  As a work around, don't generate pdata for these small stubs.
+    ;; All the "No_PDATA" variants need to be removed after MASM bug 516396 is fixed. 
+    LEAF_ENTRY_NO_PDATA RhpGetThunksBase
+        ;; Return the address of the first thunk pool to the caller (this is really the base address)
+        ldr     x0, =ThunkPool
+        ret
+    LEAF_END_NO_PDATA RhpGetThunksBase
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; General Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+    ;;
+    ;; int RhpGetNumThunksPerBlock()
+    ;;
+    LEAF_ENTRY_NO_PDATA RhpGetNumThunksPerBlock
+        mov     x0, THUNK_POOL_NUM_THUNKS_PER_PAGE
+        ret
+    LEAF_END_NO_PDATA RhpGetNumThunksPerBlock
+
+    ;;
+    ;; int RhpGetThunkSize()
+    ;;
+    LEAF_ENTRY_NO_PDATA RhpGetThunkSize
+        mov     x0, THUNK_CODESIZE
+        ret
+    LEAF_END_NO_PDATA RhpGetThunkSize
+
+    ;;
+    ;; int RhpGetNumThunkBlocksPerMapping()
+    ;;
+    LEAF_ENTRY_NO_PDATA RhpGetNumThunkBlocksPerMapping
+        mov     x0, 8
+        ret
+    LEAF_END_NO_PDATA RhpGetNumThunkBlocksPerMapping
+
+    ;;
+    ;; int RhpGetThunkBlockSize
+    ;;
+    LEAF_ENTRY_NO_PDATA RhpGetThunkBlockSize
+        mov     x0, PAGE_SIZE * 2
+        ret
+    LEAF_END_NO_PDATA RhpGetThunkBlockSize
+
+    ;; 
+    ;; IntPtr RhpGetThunkDataBlockAddress(IntPtr thunkStubAddress)
+    ;; 
+    LEAF_ENTRY_NO_PDATA RhpGetThunkDataBlockAddress
+        mov     x12, PAGE_SIZE - 1
+        bic     x0, x0, x12
+        mov     x12, PAGE_SIZE
+        add     x0, x0, x12
+        ret
+    LEAF_END_NO_PDATA RhpGetThunkDataBlockAddress
+
+    ;; 
+    ;; IntPtr RhpGetThunkStubsBlockAddress(IntPtr thunkDataAddress)
+    ;; 
+    LEAF_ENTRY_NO_PDATA RhpGetThunkStubsBlockAddress
+        mov     x12, PAGE_SIZE - 1
+        bic     x0, x0, x12
+        mov     x12, PAGE_SIZE
+        sub     x0, x0, x12
+        ret
+    LEAF_END_NO_PDATA RhpGetThunkStubsBlockAddress
+
+    END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/UniversalTransition.S b/src/coreclr/src/nativeaot/Runtime/arm64/UniversalTransition.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/UniversalTransition.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/UniversalTransition.asm b/src/coreclr/src/nativeaot/Runtime/arm64/UniversalTransition.asm
new file mode 100644
index 0000000000000..f3df0ccf7ce84
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/UniversalTransition.asm
@@ -0,0 +1,161 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+#include "AsmMacros.h"
+
+#ifdef _DEBUG
+#define TRASH_SAVED_ARGUMENT_REGISTERS
+#endif
+
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+    EXTERN RhpIntegerTrashValues
+    EXTERN RhpFpTrashValues
+#endif ;; TRASH_SAVED_ARGUMENT_REGISTERS
+
+;; Padding to account for the odd number of saved integer registers
+#define ALIGNMENT_PADDING_SIZE (8)
+
+#define COUNT_ARG_REGISTERS (9)
+#define INTEGER_REGISTER_SIZE (8)
+#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE)
+
+;; Largest return block is 4 doubles
+#define RETURN_BLOCK_SIZE (32) 
+
+#define COUNT_FLOAT_ARG_REGISTERS (8)
+#define FLOAT_REGISTER_SIZE (8)
+#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE)
+
+#define PUSHED_LR_SIZE (8)
+#define PUSHED_FP_SIZE (8)
+
+;;
+;; From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions:
+;;
+;;      ALIGNMENT_PADDING_SIZE
+;;      ARGUMENT_REGISTERS_SIZE
+;;      RETURN_BLOCK_SIZE
+;;      FLOAT_ARG_REGISTERS_SIZE
+;;      PUSHED_LR_SIZE
+;;      PUSHED_FP_SIZE
+;;
+
+#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_LR_SIZE + FLOAT_ARG_REGISTERS_SIZE)
+
+#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + \
+    PUSHED_LR_SIZE + PUSHED_FP_SIZE)
+
+#define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_LR_SIZE)
+#define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE)
+
+;;
+;; RhpUniversalTransition
+;; 
+;; At input to this function, x0-8, d0-7 and the stack may contain any number of arguments.
+;;
+;; In addition, there are 2 extra arguments passed in the intra-procedure-call scratch register:
+;;  xip0 will contain the managed function that is to be called by this transition function
+;;  xip1 will contain the pointer sized extra argument to the managed function
+;;
+;; When invoking the callee:
+;;
+;;  x0 shall contain a pointer to the TransitionBlock
+;;  x1 shall contain the value that was in xip1 at entry to this function
+;;
+;; Frame layout is:
+;;
+;;  {StackPassedArgs}                           ChildSP+0C0     CallerSP+000
+;;  {AlignmentPad (0x8 bytes)}                  ChildSP+0B8     CallerSP-008
+;;  {IntArgRegs (x0-x8) (0x48 bytes)}           ChildSP+070     CallerSP-050
+;;  {ReturnBlock (0x20 bytes)}                  ChildSP+050     CallerSP-070
+;;   -- The base address of the Return block is the TransitionBlock pointer, the floating point args are
+;;      in the neg space of the TransitionBlock pointer.  Note that the callee has knowledge of the exact
+;;      layout of all pieces of the frame that lie at or above the pushed floating point registers.
+;;  {FpArgRegs (d0-d7) (0x40 bytes)}            ChildSP+010     CallerSP-0B0
+;;  {PushedLR}                                  ChildSP+008     CallerSP-0B8
+;;  {PushedFP}                                  ChildSP+000     CallerSP-0C0
+;;
+;; NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure
+;; must be updated as well.
+;;
+;; NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has
+;; knowledge of the exact layout of all pieces of the frame that lie at or above the pushed
+;; FpArgRegs.
+;;
+;; NOTE: The stack walker guarantees that conservative GC reporting will be applied to
+;; everything between the base of the ReturnBlock and the top of the StackPassedArgs.
+;;
+
+    TEXTAREA
+
+    MACRO 
+        UNIVERSAL_TRANSITION $FunctionName
+
+    NESTED_ENTRY Rhp$FunctionName
+
+        ;; FP and LR registers
+        PROLOG_SAVE_REG_PAIR   fp, lr, #-STACK_SIZE!            ;; Push down stack pointer and store FP and LR
+
+        ;; Floating point registers
+        stp         d0, d1, [sp, #(FLOAT_ARG_OFFSET       )]
+        stp         d2, d3, [sp, #(FLOAT_ARG_OFFSET + 0x10)]
+        stp         d4, d5, [sp, #(FLOAT_ARG_OFFSET + 0x20)]
+        stp         d6, d7, [sp, #(FLOAT_ARG_OFFSET + 0x30)]
+
+        ;; Space for return buffer data (0x40 bytes)
+
+        ;; Save argument registers
+        stp         x0, x1,  [sp, #(ARGUMENT_REGISTERS_OFFSET       )]
+        stp         x2, x3,  [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x10)]
+        stp         x4, x5,  [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x20)]
+        stp         x6, x7,  [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x30)]
+        stp         x8, xzr, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x40)]
+
+#ifdef TRASH_SAVED_ARGUMENT_REGISTERS
+        ;; ARM64TODO
+#endif // TRASH_SAVED_ARGUMENT_REGISTERS
+
+        add         x0, sp, #DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK  ;; First parameter to target function is a pointer to the return block
+        mov         x8, x0                                          ;; Arm64 calling convention: Address of return block shall be passed in x8
+        mov         x1, xip1                                        ;; Second parameter to target function
+        blr         xip0
+
+        ;; We cannot make the label public as that tricks DIA stackwalker into thinking
+        ;; it's the beginning of a method. For this reason we export an auxiliary variable
+        ;; holding the address instead.
+    EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom$FunctionName
+
+        ;; Move the result (the target address) to x12 so it doesn't get overridden when we restore the
+        ;; argument registers.
+        mov         x12, x0
+
+        ;; Restore floating point registers
+        ldp         d0, d1, [sp, #(FLOAT_ARG_OFFSET       )]
+        ldp         d2, d3, [sp, #(FLOAT_ARG_OFFSET + 0x10)]
+        ldp         d4, d5, [sp, #(FLOAT_ARG_OFFSET + 0x20)]
+        ldp         d6, d7, [sp, #(FLOAT_ARG_OFFSET + 0x30)]
+
+        ;; Restore the argument registers
+        ldp         x0, x1,  [sp, #(ARGUMENT_REGISTERS_OFFSET       )]
+        ldp         x2, x3,  [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x10)]
+        ldp         x4, x5,  [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x20)]
+        ldp         x6, x7,  [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x30)]
+        ldr         x8,      [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x40)]
+
+        ;; Restore FP and LR registers, and free the allocated stack block
+        EPILOG_RESTORE_REG_PAIR   fp, lr, #STACK_SIZE!
+
+        ;; Tailcall to the target address.
+        EPILOG_NOP br x12
+
+    NESTED_END Rhp$FunctionName
+
+    MEND
+
+    ; To enable proper step-in behavior in the debugger, we need to have two instances
+    ; of the thunk. For the first one, the debugger steps into the call in the function, 
+    ; for the other, it steps over it.
+    UNIVERSAL_TRANSITION UniversalTransition
+    UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall
+
+    END
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.S b/src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.S
new file mode 100644
index 0000000000000..a14d99d7ef481
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.S
@@ -0,0 +1,33 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement Unix write barriers
+#include <unixasmmacros.inc>
+
+LEAF_ENTRY RhpAssignRef, _TEXT
+    str x1, [x0]
+    ret
+LEAF_END RhpAssignRef, _TEXT
+
+LEAF_ENTRY RhpCheckedAssignRef, _TEXT
+    str x1, [x0]
+    ret
+LEAF_END RhpCheckedAssignRef, _TEXT
+
+//
+// RhpByRefAssignRef simulates movs instruction for object references.
+//
+// On entry:
+//  x0: address of ref-field (assigned to)
+//  x1: address of the data (source)
+//  x3: be trashed
+//
+// On exit:
+//  x0, x1 are incremented by 8, 
+//  x3: trashed
+//
+LEAF_ENTRY RhpByRefAssignRef, _TEXT
+    ldr x3, [x1], #8
+    str x3, [x0], #8
+    ret
+LEAF_END RhpByRefAssignRef, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.asm b/src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.asm
new file mode 100644
index 0000000000000..204c79d00c4a3
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.asm
@@ -0,0 +1,318 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+;;
+;; Define the helpers used to implement the write barrier required when writing an object reference into a
+;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in
+;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral
+;; collection.
+;;
+
+#include "AsmMacros.h"
+
+    TEXTAREA
+
+;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used
+;; during garbage collections to verify that object references where never written to the heap without using a
+;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing
+;; new references to the real heap. Since this can't be solved perfectly without critical sections around the
+;; entire update process, we instead update the shadow location and then re-check the real location (as two
+;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value
+;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC
+;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the
+;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE.
+#ifdef WRITE_BARRIER_CHECK  
+
+    SETALIAS    g_GCShadow, ?g_GCShadow@@3PEAEEA
+    SETALIAS    g_GCShadowEnd, ?g_GCShadowEnd@@3PEAEEA
+    EXTERN      $g_GCShadow
+    EXTERN      $g_GCShadowEnd
+
+INVALIDGCVALUE  EQU 0xCCCCCCCD
+
+    MACRO
+        ;; On entry:
+        ;;  $destReg: location to be updated
+        ;;  $refReg: objectref to be stored
+        ;;
+        ;; On exit:
+        ;;  x9,x10: trashed
+        ;;  other registers are preserved
+        ;;
+        UPDATE_GC_SHADOW $destReg, $refReg
+
+        ;; If g_GCShadow is 0, don't perform the check.
+        adrp    x9, $g_GCShadow
+        ldr     x9, [x9, $g_GCShadow]
+        cbz     x9, %ft1
+
+        ;; Save $destReg since we're about to modify it (and we need the original value both within the macro and
+        ;; once we exit the macro).
+        mov     x10, $destReg
+
+        ;; Transform $destReg into the equivalent address in the shadow heap.
+        adrp    x9, g_lowest_address
+        ldr     x9, [x9, g_lowest_address]
+        subs    $destReg, $destReg, x9
+        blt     %ft0
+
+        adrp    x9, $g_GCShadow
+        ldr     x9, [x9, $g_GCShadow]
+        add     $destReg, $destReg, x9
+
+        adrp    x9, $g_GCShadowEnd
+        ldr     x9, [x9, $g_GCShadowEnd]
+        cmp     $destReg, x9
+        bgt     %ft0
+
+        ;; Update the shadow heap.
+        str     $refReg, [$destReg]
+
+        ;; The following read must be strongly ordered wrt to the write we've just performed in order to
+        ;; prevent race conditions.
+        dmb     ish
+
+        ;; Now check that the real heap location still contains the value we just wrote into the shadow heap.
+        mov     x9, x10
+        ldr     x9, [x9]
+        cmp     x9, $refReg
+        beq     %ft0
+
+        ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't
+        ;; guarantee whose shadow update won.
+        MOVL64  x9, INVALIDGCVALUE, 0
+        str     x9, [$destReg]
+
+0
+        ;; Restore original $destReg value
+        mov     $destReg, x10
+
+1
+    MEND
+
+#else // WRITE_BARRIER_CHECK
+
+    MACRO
+        UPDATE_GC_SHADOW $destReg, $refReg
+    MEND
+
+#endif // WRITE_BARRIER_CHECK
+
+;; There are several different helpers used depending on which register holds the object reference. Since all
+;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+;; name of the register that points to the location to be updated and the name of the register that holds the
+;; object reference (this should be in upper case as it's used in the definition of the name of the helper).
+
+;; Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for
+;; some interlocked helpers that need an inline barrier.
+    MACRO
+        ;; On entry:
+        ;;   $destReg: location to be updated
+        ;;   $refReg:  objectref to be stored
+        ;;
+        ;; On exit:
+        ;;   $destReg:   trashed
+        ;;   x9:         trashed
+        ;;
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE $destReg, $refReg
+
+        ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+        ;; we're in a debug build and write barrier checking has been enabled).
+        UPDATE_GC_SHADOW $destReg, $refReg
+
+        ;; We can skip the card table write if the reference is to
+        ;; an object not on the epehemeral segment.
+        adrp    x9, g_ephemeral_low
+        ldr     x9, [x9, g_ephemeral_low]
+        cmp     $refReg, x9
+        blt     %ft0
+
+        adrp    x9, g_ephemeral_high
+        ldr     x9, [x9, g_ephemeral_high]
+        cmp     $refReg, x9
+        bge     %ft0
+
+        ;; Set this object's card, if it hasn't already been set.
+        adrp    x9, g_card_table
+        ldr     x9, [x9, g_card_table]
+        add     $destReg, x9, $destReg lsr #11
+
+        ;; Check that this card hasn't already been written. Avoiding useless writes is a big win on
+        ;; multi-proc systems since it avoids cache thrashing.
+        ldrb    w9, [$destReg]
+        cmp     x9, 0xFF
+        beq     %ft0
+
+        mov     x9, 0xFF
+        strb    w9, [$destReg]
+
+0
+        ;; Exit label
+    MEND
+
+    MACRO
+        ;; On entry:
+        ;;   $destReg: location to be updated
+        ;;   $refReg:  objectref to be stored
+        ;;
+        ;; On exit:
+        ;;   $destReg:   trashed
+        ;;   x9:         trashed
+        ;;
+        INSERT_CHECKED_WRITE_BARRIER_CORE $destReg, $refReg
+
+        ;; The "check" of this checked write barrier - is $destReg
+        ;; within the heap? if no, early out.
+        adrp    x9, g_lowest_address
+        ldr     x9, [x9, g_lowest_address]
+        cmp     $destReg, x9
+        blt     %ft0
+
+        adrp    x9, g_highest_address
+        ldr     x9, [x9, g_highest_address]
+        cmp     $destReg, x9
+        bgt     %ft0
+
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE $destReg, $refReg
+
+0
+        ;; Exit label
+    MEND
+
+;; RhpCheckedAssignRef(Object** dst, Object* src)
+;;
+;; Write barrier for writes to objects that may reside
+;; on the managed heap.
+;;
+;; On entry:
+;;   x0 : the destination address (LHS of the assignment).
+;;        May not be an object reference (hence the checked).
+;;   x1 : the object reference (RHS of the assignment).
+;; On exit:
+;;   x1 : trashed
+;;   x9 : trashed
+    LEAF_ENTRY RhpCheckedAssignRef
+    ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+    ALTERNATE_ENTRY RhpCheckedAssignRefX1
+    ALTERNATE_ENTRY RhpCheckedAssignRefX1AVLocation
+
+        stlr    x1, [x0]
+        
+        INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1
+        
+        ret
+
+    LEAF_END RhpCheckedAssignRef
+ 
+;; RhpAssignRef(Object** dst, Object* src)
+;;
+;; Write barrier for writes to objects that are known to
+;; reside on the managed heap.
+;;
+;; On entry:
+;;  x0 : the destination address (LHS of the assignment).
+;;  x1 : the object reference (RHS of the assignment).
+;; On exit:
+;;  x1 : trashed
+;;  x9 : trashed
+    LEAF_ENTRY RhpAssignRef
+    ALTERNATE_ENTRY RhpAssignRefAVLocation
+    ALTERNATE_ENTRY RhpAssignRefX1
+    ALTERNATE_ENTRY RhpAssignRefX1AVLocation
+
+        stlr    x1, [x0]
+ 
+        INSERT_UNCHECKED_WRITE_BARRIER_CORE x0, x1
+ 
+        ret
+
+    LEAF_END RhpAssignRef
+
+;; Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon
+;; successful updates. 
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation
+;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address
+
+;; RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand)
+;;
+;; Interlocked compare exchange on objectref.
+;;
+;; On entry:
+;;  x0: pointer to objectref
+;;  x1: exchange value
+;;  x2: comparand
+;;
+;; On exit:
+;;  x0: original value of objectref
+;;  x9: trashed
+;;  x10: trashed
+;;
+    LEAF_ENTRY RhpCheckedLockCmpXchg
+    ALTERNATE_ENTRY  RhpCheckedLockCmpXchgAVLocation
+
+CmpXchgRetry
+        ;; Check location value is what we expect.
+        ldaxr   x10, [x0]
+        cmp     x10, x2
+        bne     CmpXchgNoUpdate
+
+        ;; Current value matches comparand, attempt to update with the new value.
+        stlxr   w9, x1, [x0]
+        cbnz    w9, CmpXchgRetry
+
+        ;; We've successfully updated the value of the objectref so now we need a GC write barrier.
+        ;; The following barrier code takes the destination in x0 and the value in x1 so the arguments are
+        ;; already correctly set up.
+
+        INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1
+
+CmpXchgNoUpdate
+        ;; x10 still contains the original value.
+        mov     x0, x10
+        ret     lr
+
+    LEAF_END RhpCheckedLockCmpXchg
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation
+;; - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address
+ 
+;; RhpCheckedXchg(Object** destination, Object* value)
+;;
+;; Interlocked exchange on objectref.
+;;
+;; On entry:
+;;  x0: pointer to objectref
+;;  x1: exchange value
+;;
+;; On exit:
+;;  x0: original value of objectref
+;;  x9: trashed
+;;  x10: trashed
+;;
+    LEAF_ENTRY RhpCheckedXchg
+    ALTERNATE_ENTRY  RhpCheckedXchgAVLocation
+
+ExchangeRetry
+        ;; Read the existing memory location.
+        ldaxr   x10,  [x0]
+
+        ;; Attempt to update with the new value.
+        stlxr   w9, x1, [x0]
+        cbnz    w9, ExchangeRetry
+
+        ;; We've successfully updated the value of the objectref so now we need a GC write barrier.
+        ;; The following barrier code takes the destination in x0 and the value in x1 so the arguments are
+        ;; already correctly set up.
+
+        INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1
+
+        ;; x10 still contains the original value.
+        mov     x0, x10
+        ret
+
+    LEAF_END RhpCheckedXchg
+
+    end
diff --git a/src/coreclr/src/nativeaot/Runtime/eetype.cpp b/src/coreclr/src/nativeaot/Runtime/eetype.cpp
new file mode 100644
index 0000000000000..d18de90b889f8
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/eetype.cpp
@@ -0,0 +1,166 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "rhassert.h"
+#include "rhbinder.h"
+#include "eetype.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+
+#include "CommonMacros.inl"
+
+#pragma warning(disable:4127) // C4127: conditional expression is constant
+
+// Validate an EEType extracted from an object.
+bool EEType::Validate(bool assertOnFail /* default: true */)
+{
+#define REPORT_FAILURE() do { if (assertOnFail) { ASSERT_UNCONDITIONALLY("EEType::Validate check failed"); } return false; } while (false)
+
+    // Deal with the most common case of a bad pointer without an exception.
+    if (this == NULL)
+        REPORT_FAILURE();
+
+    // EEType structures should be at least pointer aligned.
+    if (dac_cast<TADDR>(this) & (sizeof(TADDR)-1))
+        REPORT_FAILURE();
+
+    // Verify object size is bigger than min_obj_size
+    size_t minObjSize = get_BaseSize();
+    if (get_ComponentSize() != 0)
+    {
+        // If it is an array, we will align the size to the nearest pointer alignment, even if there are 
+        // zero elements.  Our strings take advantage of this.
+        minObjSize = (size_t)ALIGN_UP(minObjSize, sizeof(TADDR));
+    }
+    if (minObjSize < (3 * sizeof(TADDR)))
+        REPORT_FAILURE();
+
+    switch (get_Kind())
+    {
+    case CanonicalEEType:
+    {
+        // If the parent type is NULL this had better look like Object.
+        if (!IsInterface() && (m_RelatedType.m_pBaseType == NULL))
+        {
+            if (IsRelatedTypeViaIAT() ||
+                get_IsValueType() ||
+                HasFinalizer() ||
+                HasReferenceFields() ||
+                HasGenericVariance())
+            {
+                REPORT_FAILURE();
+            }
+        }
+        break;
+    }
+
+    case ClonedEEType:
+    {
+        // Cloned types must have a related type.
+        if (m_RelatedType.m_ppCanonicalTypeViaIAT == NULL)
+            REPORT_FAILURE();
+
+        // Either we're dealing with a clone of String or a generic type. We can tell the difference based
+        // on the component size.
+        switch (get_ComponentSize())
+        {
+        case 0:
+        {
+            // Cloned generic type.
+            if (!IsRelatedTypeViaIAT())
+            {
+                REPORT_FAILURE();
+            }
+            break;
+        }
+
+        case 2:
+        {
+            // Cloned string.
+            if (get_IsValueType() ||
+                HasFinalizer() ||
+                HasReferenceFields() ||
+                HasGenericVariance())
+            {
+                REPORT_FAILURE();
+            }
+
+            break;
+        }
+
+        default:
+            // Apart from cloned strings we don't expected cloned types to have a component size.
+            REPORT_FAILURE();
+        }
+        break;
+    }
+
+    case ParameterizedEEType:
+    {
+        // The only parameter EETypes that can exist on the heap are arrays
+
+        // Array types must have a related type.
+        if (m_RelatedType.m_pRelatedParameterType == NULL)
+            REPORT_FAILURE();
+
+        // Component size cannot be zero in this case.
+        if (get_ComponentSize() == 0)
+            REPORT_FAILURE();
+
+        if (get_IsValueType() ||
+            HasFinalizer() ||
+            HasGenericVariance())
+        {
+            REPORT_FAILURE();
+        }
+
+        break;
+    }
+
+    case GenericTypeDefEEType:
+    {
+        // We should never see uninstantiated generic type definitions here
+        // since we should never construct an object instance around them.
+        REPORT_FAILURE();
+    }
+
+    default:
+        // Should be unreachable.
+        REPORT_FAILURE();
+    }
+
+#undef REPORT_FAILURE
+
+    return true;
+}
+
+//-----------------------------------------------------------------------------------------------------------
+EEType::Kinds EEType::get_Kind()
+{
+	return (Kinds)(m_usFlags & (UInt16)EETypeKindMask);
+}
+
+//-----------------------------------------------------------------------------------------------------------
+EEType * EEType::get_CanonicalEEType()
+{
+	// cloned EETypes must always refer to types in other modules
+	ASSERT(IsCloned());
+    if (IsRelatedTypeViaIAT())
+        return *PTR_PTR_EEType(reinterpret_cast<TADDR>(m_RelatedType.m_ppCanonicalTypeViaIAT));
+    else
+        return PTR_EEType(reinterpret_cast<TADDR>(m_RelatedType.m_pCanonicalType)); // in the R2R case, the link is direct rather than indirect via the IAT
+}
+
+//-----------------------------------------------------------------------------------------------------------
+EEType * EEType::get_RelatedParameterType()
+{
+	ASSERT(IsParameterizedType());
+
+	if (IsRelatedTypeViaIAT())
+		return *PTR_PTR_EEType(reinterpret_cast<TADDR>(m_RelatedType.m_ppRelatedParameterTypeViaIAT));
+	else
+		return PTR_EEType(reinterpret_cast<TADDR>(m_RelatedType.m_pRelatedParameterType));
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/event.cpp b/src/coreclr/src/nativeaot/Runtime/event.cpp
new file mode 100644
index 0000000000000..73ad5095008d5
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/event.cpp
@@ -0,0 +1,120 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "event.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "holder.h"
+#include "Crst.h"
+#include "RWLock.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+
+//
+// -----------------------------------------------------------------------------------------------------------
+//
+// CLR wrapper around events. This version directly uses Win32 events (there's no support for host
+// interception). 
+//
+
+bool CLREventStatic::CreateManualEventNoThrow(bool bInitialState) 
+{ 
+    m_hEvent = PalCreateEventW(NULL, TRUE, bInitialState, NULL); 
+    m_fInitialized = true;
+    return IsValid();
+}
+
+bool CLREventStatic::CreateAutoEventNoThrow(bool bInitialState)
+{ 
+    m_hEvent = PalCreateEventW(NULL, FALSE, bInitialState, NULL); 
+    m_fInitialized = true;
+    return IsValid();
+}
+
+bool CLREventStatic::CreateOSManualEventNoThrow(bool bInitialState)
+{ 
+    m_hEvent = PalCreateEventW(NULL, TRUE, bInitialState, NULL); 
+    m_fInitialized = true;
+    return IsValid();
+}
+
+bool CLREventStatic::CreateOSAutoEventNoThrow(bool bInitialState)
+{ 
+    m_hEvent = PalCreateEventW(NULL, FALSE, bInitialState, NULL); 
+    m_fInitialized = true;
+    return IsValid();
+}
+
+void CLREventStatic::CloseEvent() 
+{ 
+    if (m_fInitialized && m_hEvent != INVALID_HANDLE_VALUE)
+    { 
+        PalCloseHandle(m_hEvent);
+        m_hEvent = INVALID_HANDLE_VALUE;
+    }
+}
+
+bool CLREventStatic::IsValid() const 
+{ 
+    return m_fInitialized && m_hEvent != INVALID_HANDLE_VALUE; 
+}
+
+bool CLREventStatic::Set() 
+{ 
+    if (!m_fInitialized)
+        return false;
+    return PalSetEvent(m_hEvent); 
+}
+
+bool CLREventStatic::Reset() 
+{ 
+    if (!m_fInitialized)
+        return false;
+    return PalResetEvent(m_hEvent); 
+}
+
+uint32_t CLREventStatic::Wait(uint32_t dwMilliseconds, bool bAlertable, bool bAllowReentrantWait)
+{
+    UInt32 result = WAIT_FAILED;
+
+    if (m_fInitialized)
+    {
+        bool        disablePreemptive = false;
+        Thread *    pCurThread  = ThreadStore::GetCurrentThreadIfAvailable();
+
+        if (NULL != pCurThread)
+        {
+            if (pCurThread->IsCurrentThreadInCooperativeMode())
+            {
+                pCurThread->EnablePreemptiveMode();
+                disablePreemptive = true;
+            }
+        }
+
+        result = PalCompatibleWaitAny(bAlertable, dwMilliseconds, 1, &m_hEvent, bAllowReentrantWait); 
+
+        if (disablePreemptive)
+        {
+            pCurThread->DisablePreemptiveMode();
+        }
+    }
+
+    return result;
+}
+
+HANDLE CLREventStatic::GetOSEvent()
+{
+    if (!m_fInitialized)
+        return INVALID_HANDLE_VALUE;
+    return m_hEvent;
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/event.h b/src/coreclr/src/nativeaot/Runtime/event.h
new file mode 100644
index 0000000000000..b46b9e538207c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/event.h
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+class CLREventStatic
+{
+public:
+    bool CreateManualEventNoThrow(bool bInitialState);
+    bool CreateAutoEventNoThrow(bool bInitialState);
+    bool CreateOSManualEventNoThrow(bool bInitialState);
+    bool CreateOSAutoEventNoThrow(bool bInitialState);
+    void CloseEvent();
+    bool IsValid() const;
+    bool Set();
+    bool Reset();
+    uint32_t Wait(uint32_t dwMilliseconds, bool bAlertable, bool bAllowReentrantWait = false);
+    HANDLE GetOSEvent();
+
+private:
+    HANDLE  m_hEvent;
+    bool    m_fInitialized;
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/eventtrace.cpp b/src/coreclr/src/nativeaot/Runtime/eventtrace.cpp
new file mode 100644
index 0000000000000..fb8a2053d034c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/eventtrace.cpp
@@ -0,0 +1,6503 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+// File: eventtrace.cpp
+// Abstract: This module implements Event Tracing support
+//
+// ============================================================================
+
+#include "common.h"
+
+#ifdef FEATURE_REDHAWK
+#include "gcenv.h"
+#include "gcheaputilities.h"
+
+#include "daccess.h"
+
+#include "slist.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "stackframeiterator.h"
+#include "thread.h"
+#include "rwlock.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+//#include "PalRedhawk.h"
+
+#define Win32EventWrite PalEventWrite
+#else // !FEATURE_REDHAWK
+
+#include "eventtrace.h"
+#include "winbase.h"
+#include "contract.h"
+#include "ex.h"
+#include "dbginterface.h"
+#define Win32EventWrite EventWrite
+
+// Flags used to store some runtime information for Event Tracing
+BOOL g_fEEOtherStartup=FALSE;
+BOOL g_fEEComActivatedStartup=FALSE;
+LPCGUID g_fEEComObjectGuid=&GUID_NULL;
+
+BOOL g_fEEHostedStartup = FALSE;
+
+#endif // FEATURE_REDHAWK
+
+#include "eventtracepriv.h"
+
+#ifdef FEATURE_REDHAWK
+volatile LONGLONG ETW::GCLog::s_l64LastClientSequenceNumber = 0;
+#else // FEATURE_REDHAWK
+Volatile<LONGLONG> ETW::GCLog::s_l64LastClientSequenceNumber = 0;
+#endif // FEATURE_REDHAWK
+
+#ifndef FEATURE_REDHAWK
+
+//---------------------------------------------------------------------------------------
+// Helper macros to determine which version of the Method events to use
+//
+// The V2 versions of these events include the ReJITID, the V1 versions do not.
+// Historically, when we version events, we'd just stop sending the old version and only
+// send the new one. However, now that we have xperf in heavy use internally and soon to be
+// used externally, we need to be a bit careful. In particular, we'd like to allow
+// current xperf to continue working without knowledge of ReJITIDs, and allow future
+// xperf to decode symbols in ReJITted functions. Thus,
+//    * During a first-JIT, only issue the existing V1 MethodLoad, etc. events (NOT v0,
+//        NOT v2). This event does not include a ReJITID, and can thus continue to be
+//        parsed by older decoders.
+//    * During a rejit, only issue the new V2 events (NOT v0 or v1), which will include a
+//        nonzero ReJITID. Thus, your unique key for a method extent would be MethodID +
+//        ReJITID + extent (hot/cold). These events will be ignored by older decoders
+//        (including current xperf) because of the version number, but xperf will be
+//        updated to decode these in the future.
+
+#define FireEtwMethodLoadVerbose_V1_or_V2(ullMethodIdentifier, ullModuleID, ullMethodStartAddress, ulMethodSize, ulMethodToken, ulMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID) \
+{   \
+    if (rejitID == 0)   \
+        { FireEtwMethodLoadVerbose_V1(ullMethodIdentifier, ullModuleID, ullMethodStartAddress, ulMethodSize, ulMethodToken, ulMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID); } \
+    else \
+        { FireEtwMethodLoadVerbose_V2(ullMethodIdentifier, ullModuleID, ullMethodStartAddress, ulMethodSize, ulMethodToken, ulMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID); } \
+}
+
+#define FireEtwMethodLoad_V1_or_V2(ullMethodIdentifier, ullModuleID, ullMethodStartAddress, ulMethodSize, ulMethodToken, ulMethodFlags, clrInstanceID, rejitID) \
+{   \
+    if (rejitID == 0)   \
+        { FireEtwMethodLoad_V1(ullMethodIdentifier, ullModuleID, ullMethodStartAddress, ulMethodSize, ulMethodToken, ulMethodFlags, clrInstanceID); } \
+    else \
+        { FireEtwMethodLoad_V2(ullMethodIdentifier, ullModuleID, ullMethodStartAddress, ulMethodSize, ulMethodToken, ulMethodFlags, clrInstanceID, rejitID); } \
+}
+
+#define FireEtwMethodUnloadVerbose_V1_or_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID) \
+{   \
+    if (rejitID == 0)   \
+        { FireEtwMethodUnloadVerbose_V1(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID); } \
+    else \
+        { FireEtwMethodUnloadVerbose_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID); } \
+}
+
+#define FireEtwMethodUnload_V1_or_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID, rejitID) \
+{   \
+    if (rejitID == 0)   \
+        { FireEtwMethodUnload_V1(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID); } \
+    else \
+        { FireEtwMethodUnload_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID, rejitID); } \
+}
+
+#define FireEtwMethodDCStartVerbose_V1_or_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID) \
+{   \
+    if (rejitID == 0)   \
+        { FireEtwMethodDCStartVerbose_V1(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID); } \
+    else \
+        { FireEtwMethodDCStartVerbose_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID); } \
+}
+
+#define FireEtwMethodDCStart_V1_or_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID, rejitID) \
+{   \
+    if (rejitID == 0)   \
+        { FireEtwMethodDCStart_V1(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID); } \
+    else \
+        { FireEtwMethodDCStart_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID, rejitID); } \
+}
+
+#define FireEtwMethodDCEndVerbose_V1_or_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID) \
+{   \
+    if (rejitID == 0)   \
+        { FireEtwMethodDCEndVerbose_V1(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID);  } \
+    else \
+        { FireEtwMethodDCEndVerbose_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, szDtraceOutput1, szDtraceOutput2, szDtraceOutput3, clrInstanceID, rejitID); } \
+}
+
+#define FireEtwMethodDCEnd_V1_or_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID, rejitID) \
+{   \
+    if (rejitID == 0)   \
+        { FireEtwMethodDCEnd_V1(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID);  } \
+    else \
+        { FireEtwMethodDCEnd_V2(ullMethodIdentifier, ullModuleID, ullColdMethodStartAddress, ulColdMethodSize, ulMethodToken, ulColdMethodFlags, clrInstanceID, rejitID); } \
+}
+
+// Module load / unload events:
+//     There is no precedent here for using GUIDs in Mac events, and it's doubtful any
+//     of the new PDB fields for the V2 Module events are at all useful on the Mac anyway.  So
+//     stick with V1 module events on the Mac.
+
+#ifdef FEATURE_DTRACE
+#define FireEtwModuleLoad_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \
+    FireEtwModuleLoad_V1(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId)
+#define FireEtwModuleUnload_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \
+    FireEtwModuleUnload_V1(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId)
+#define FireEtwModuleDCStart_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \
+    FireEtwModuleDCStart_V1(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId)
+#define FireEtwModuleDCEnd_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \
+    FireEtwModuleDCEnd_V1(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId)
+#else   // FEATURE_DTRACE
+#define FireEtwModuleLoad_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \
+    FireEtwModuleLoad_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath)
+#define FireEtwModuleUnload_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \
+    FireEtwModuleUnload_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath)
+#define FireEtwModuleDCStart_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \
+    FireEtwModuleDCStart_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath)
+#define FireEtwModuleDCEnd_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath) \
+    FireEtwModuleDCEnd_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, clrInstanceId, ManagedPdbSignature, ManagedPdbAge, ManagedPdbPath, NativePdbSignature, NativePdbAge, NativePdbPath)
+#endif  // FEATURE_DTRACE
+
+
+//---------------------------------------------------------------------------------------
+//
+// Rather than checking the NGEN keyword on the runtime provider directly, use this
+// helper that checks that the NGEN runtime provider keyword is enabled AND the
+// OverrideAndSuppressNGenEvents keyword on the runtime provider is NOT enabled.
+// 
+// OverrideAndSuppressNGenEvents allows controllers to set the expensive NGEN keyword for
+// older runtimes (< 4.0) where NGEN PDB info is NOT available, while suppressing those
+// expensive events on newer runtimes (>= 4.5) where NGEN PDB info IS available. Note
+// that 4.0 has NGEN PDBS but unfortunately not the OverrideAndSuppressNGenEvents
+// keyword, b/c NGEN PDBs were made publicly only after 4.0 shipped. So tools that need
+// to consume both <4.0 and 4.0 events would neeed to enable the expensive NGEN events to
+// deal properly with 3.5, even though those events aren't necessary on 4.0.
+// 
+// On CoreCLR, this keyword is a no-op, because coregen PDBs don't exist (and thus we'll
+// need the NGEN rundown to still work on Silverligth).
+//
+// Return Value:
+//      nonzero iff NGenKeyword is enabled on the runtime provider and
+//      OverrideAndSuppressNGenEventsKeyword is not enabled on the runtime provider.
+//
+
+BOOL IsRuntimeNgenKeywordEnabledAndNotSuppressed()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    return 
+    (
+        ETW_TRACING_CATEGORY_ENABLED(
+            MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+            TRACE_LEVEL_INFORMATION, 
+            CLR_NGEN_KEYWORD) 
+        && ! ( ETW_TRACING_CATEGORY_ENABLED(
+                MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                TRACE_LEVEL_INFORMATION, 
+                CLR_OVERRIDEANDSUPPRESSNGENEVENTS_KEYWORD) )
+    );
+}
+
+// Same as above, but for the rundown provider
+BOOL IsRundownNgenKeywordEnabledAndNotSuppressed()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    return 
+    (
+        ETW_TRACING_CATEGORY_ENABLED(
+            MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+            TRACE_LEVEL_INFORMATION, 
+            CLR_RUNDOWNNGEN_KEYWORD)
+        && ! ( ETW_TRACING_CATEGORY_ENABLED(
+                MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                TRACE_LEVEL_INFORMATION, 
+                CLR_RUNDOWNOVERRIDEANDSUPPRESSNGENEVENTS_KEYWORD) )
+    );
+}
+
+/*******************************************************/
+/* Fast assembly function to get the topmost EBP frame */
+/*******************************************************/
+#if defined(TARGET_X86)
+extern "C"
+{
+    CallStackFrame* GetEbp()
+    {
+        CallStackFrame *frame=NULL;
+        __asm
+        { 
+            mov frame, ebp
+        } 
+        return frame;
+    }
+}
+#endif //TARGET_X86
+
+#ifndef FEATURE_PAL
+
+/*************************************/
+/* Function to append a frame to an existing stack */
+/*************************************/
+void ETW::SamplingLog::Append(SIZE_T currentFrame)
+{
+    LIMITED_METHOD_CONTRACT;
+    if(m_FrameCount < (ETW::SamplingLog::s_MaxStackSize-1) && 
+       currentFrame != 0)
+    {
+        m_EBPStack[m_FrameCount] = currentFrame;
+        m_FrameCount++;
+    }
+};
+
+/********************************************************/
+/* Function to get the callstack on the current thread  */
+/********************************************************/
+ETW::SamplingLog::EtwStackWalkStatus ETW::SamplingLog::GetCurrentThreadsCallStack(UINT32 *frameCount, PVOID **Stack)
+{    
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        SO_TOLERANT;
+    }
+    CONTRACTL_END;
+
+    // The stack walk performed below can cause allocations (thus entering the host). But
+    // this is acceptable, since we're not supporting the use of SQL/F1 profiling and
+    // full-blown ETW CLR stacks (which would be redundant).
+    PERMANENT_CONTRACT_VIOLATION(HostViolation, ReasonUnsupportedForSQLF1Profiling);
+
+    m_FrameCount = 0;
+    ETW::SamplingLog::EtwStackWalkStatus stackwalkStatus = SaveCurrentStack();
+
+    _ASSERTE(m_FrameCount < ETW::SamplingLog::s_MaxStackSize);
+
+    // this not really needed, but let's do it 
+    // because we use the framecount while dumping the stack event
+    for(int i=m_FrameCount; i<ETW::SamplingLog::s_MaxStackSize; i++)
+    {
+        m_EBPStack[i] = 0;
+    }
+    // This is for consumers to work correctly because the number of 
+    // frames in the manifest file is specified to be 2
+    if(m_FrameCount < 2)
+        m_FrameCount = 2;
+
+    *frameCount = m_FrameCount;
+    *Stack = (PVOID *)m_EBPStack;
+    return stackwalkStatus;
+};
+
+/*************************************/
+/* Function to save the stack on the current thread */
+/*************************************/
+ETW::SamplingLog::EtwStackWalkStatus ETW::SamplingLog::SaveCurrentStack(int skipTopNFrames)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        SO_TOLERANT;
+    }
+    CONTRACTL_END;
+
+    if (!IsGarbageCollectorFullyInitialized())
+    {
+        // If the GC isn't ready yet, then there won't be any interesting
+        // managed code on the stack to walk. Plus, the stack walk itself may
+        // hit problems (e.g., when calling into the code manager) if it's run
+        // too early during startup.
+        return ETW::SamplingLog::UnInitialized;
+    }
+#ifndef DACCESS_COMPILE
+#if !defined(TARGET_X86) && !defined(TARGET_ARM)
+    if (RtlVirtualUnwind_Unsafe == NULL)
+    {
+        // We haven't even set up the RtlVirtualUnwind function pointer yet,
+        // so it's too early to try stack walking.
+        return ETW::SamplingLog::UnInitialized;
+    }
+#endif // !TARGET_X86 && !TARGET_ARM
+    Thread *pThread = GetThread();
+    if (pThread == NULL)
+    {
+        return ETW::SamplingLog::UnInitialized;
+    }    
+    // The thread should not have a hijack set up or we can't walk the stack. 
+    if (pThread->m_State & Thread::TS_Hijacked) {
+        return ETW::SamplingLog::UnInitialized;
+    }
+
+    if (pThread->IsEtwStackWalkInProgress())
+    {
+        return ETW::SamplingLog::InProgress;
+    }
+    pThread->MarkEtwStackWalkInProgress();
+    EX_TRY
+    {
+#ifdef TARGET_X86
+        CallStackFrame *currentEBP = GetEbp();
+        CallStackFrame *lastEBP = NULL;
+        while(currentEBP)
+        {
+            lastEBP = currentEBP;
+            currentEBP = currentEBP->m_Next;
+    
+            // Skip the top N frames
+            if(skipTopNFrames) {
+                skipTopNFrames--;
+                continue;
+            }
+    
+            // Save the Return Address for symbol decoding
+            Append(lastEBP->m_ReturnAddress);
+            
+            // Check for stack limits
+            if((SIZE_T)currentEBP < (SIZE_T)Thread::GetStackLowerBound() || (SIZE_T)currentEBP > (SIZE_T)Thread::GetStackUpperBound())
+            {
+                break;
+            }
+    
+            // If we have a too small address, we are probably bad
+            if((SIZE_T)currentEBP < (SIZE_T)0x10000)
+                break;
+    
+            if((SIZE_T)currentEBP < (SIZE_T)lastEBP)
+            {
+                break;
+            }
+        }
+#else
+        CONTEXT ctx;
+        ClrCaptureContext(&ctx);
+        UINT_PTR ControlPc = 0;
+        UINT_PTR CurrentSP = 0, PrevSP = 0;
+
+        while(1)
+        {
+            // Unwind to the caller
+            ControlPc = Thread::VirtualUnwindCallFrame(&ctx);
+    
+            // This is to take care of recursion
+            CurrentSP = (UINT_PTR)GetSP(&ctx);
+
+            // when to break from this loop
+            if ( ControlPc == 0 || ( PrevSP == CurrentSP ) )
+            {
+                break;
+            }
+    
+            // Skip the top N frames
+            if ( skipTopNFrames ) {
+                skipTopNFrames--;
+                continue;
+            }
+    
+            // Add the stack frame to the list
+            Append(ControlPc);
+    
+            PrevSP = CurrentSP;
+        }
+#endif //TARGET_X86
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+    pThread->MarkEtwStackWalkCompleted();
+#endif //!DACCESS_COMPILE
+
+    return ETW::SamplingLog::Completed;
+}
+#endif //!FEATURE_PAL
+
+#endif // !FEATURE_REDHAWK
+
+
+#if defined(FEATURE_REDHAWK) || !defined(FEATURE_PAL) || defined(FEATURE_DTRACE)
+
+/****************************************************************************/
+/* Methods that are called from the runtime */
+/****************************************************************************/
+
+#ifndef FEATURE_DTRACE
+/****************************************************************************/
+/* Methods for rundown events                                               */
+/* Since DTRACe does not support passing a method pointer as a callback when*/
+/* enable a events, rundown events are not supported on Mac                 */
+/****************************************************************************/
+
+/***************************************************************************/
+/* This function should be called from the event tracing callback routine 
+   when the private CLR provider is enabled */
+/***************************************************************************/
+
+#ifndef FEATURE_REDHAWK
+
+void ETW::GCLog::GCSettingsEvent()
+{
+    if (GCHeapUtilities::IsGCHeapInitialized())
+    {
+        if (ETW_TRACING_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, 
+                                                 GCSettings))
+        {
+            ETW::GCLog::ETW_GC_INFO Info;
+
+            Info.GCSettings.ServerGC = GCHeapUtilities::IsServerHeap ();
+            Info.GCSettings.SegmentSize = GCHeapUtilities::GetGCHeap()->GetValidSegmentSize (FALSE);
+            Info.GCSettings.LargeObjectSegmentSize = GCHeapUtilities::GetGCHeap()->GetValidSegmentSize (TRUE);
+            FireEtwGCSettings_V1(Info.GCSettings.SegmentSize, Info.GCSettings.LargeObjectSegmentSize, Info.GCSettings.ServerGC, GetClrInstanceId());
+        }  
+        GCHeapUtilities::GetGCHeap()->TraceGCSegments();
+    }
+};
+
+#endif // !FEATURE_REDHAWK
+
+
+//---------------------------------------------------------------------------------------
+// Code for sending GC heap object events is generally the same for both FEATURE_REDHAWK
+// and !FEATURE_REDHAWK builds
+//---------------------------------------------------------------------------------------
+
+
+// Simple helpers called by the GC to decide whether it needs to do a walk of heap
+// objects and / or roots.
+
+BOOL ETW::GCLog::ShouldWalkHeapObjectsForEtw()
+{
+    LIMITED_METHOD_CONTRACT;
+    return ETW_TRACING_CATEGORY_ENABLED(
+        MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_GCHEAPDUMP_KEYWORD);
+}
+
+BOOL ETW::GCLog::ShouldWalkHeapRootsForEtw()
+{
+    LIMITED_METHOD_CONTRACT;
+    return ETW_TRACING_CATEGORY_ENABLED(
+        MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_GCHEAPDUMP_KEYWORD);
+}
+
+BOOL ETW::GCLog::ShouldTrackMovementForEtw()
+{
+    LIMITED_METHOD_CONTRACT;
+    return ETW_TRACING_CATEGORY_ENABLED(
+        MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_GCHEAPSURVIVALANDMOVEMENT_KEYWORD);
+}
+
+BOOL ETW::GCLog::ShouldWalkStaticsAndCOMForEtw()
+{
+    // @TODO:
+    return FALSE;
+}
+
+void ETW::GCLog::WalkStaticsAndCOMForETW() 
+{ 
+    // @TODO:
+}
+
+
+// Batches the list of moved/surviving references for the GCBulkMovedObjectRanges /
+// GCBulkSurvivingObjectRanges events
+struct EtwGcMovementContext
+{
+public:
+    // An instance of EtwGcMovementContext is dynamically allocated and stored
+    // inside of MovedReferenceContextForEtwAndProfapi, which in turn is dynamically
+    // allocated and pointed to by a profiling_context pointer created by the GC on the stack.
+    // This is used to batch and send GCBulkSurvivingObjectRanges events and
+    // GCBulkMovedObjectRanges events. This method is passed a pointer to
+    // MovedReferenceContextForEtwAndProfapi::pctxEtw; if non-NULL it gets returned;
+    // else, a new EtwGcMovementContext is allocated, stored in that pointer, and
+    // then returned. Callers should test for NULL, which can be returned if out of
+    // memory
+    static EtwGcMovementContext * GetOrCreateInGCContext(EtwGcMovementContext ** ppContext)
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(ppContext != NULL);
+
+        EtwGcMovementContext * pContext = *ppContext;
+        if (pContext == NULL)
+        {
+            pContext = new (nothrow) EtwGcMovementContext;
+            *ppContext = pContext;
+        }
+        return pContext;
+    }
+
+    EtwGcMovementContext() :
+        iCurBulkSurvivingObjectRanges(0),
+        iCurBulkMovedObjectRanges(0)
+    {
+        LIMITED_METHOD_CONTRACT;
+        Clear();
+    }
+
+    // Resets structure for reuse on construction, and after each flush.
+    // (Intentionally leave iCurBulk* as is, since they persist across flushes within a GC.)
+    void Clear()
+    {
+        LIMITED_METHOD_CONTRACT;
+        cBulkSurvivingObjectRanges = 0;
+        cBulkMovedObjectRanges = 0;
+        ZeroMemory(rgGCBulkSurvivingObjectRanges, sizeof(rgGCBulkSurvivingObjectRanges));
+        ZeroMemory(rgGCBulkMovedObjectRanges, sizeof(rgGCBulkMovedObjectRanges));
+    }
+
+    //---------------------------------------------------------------------------------------
+    // GCBulkSurvivingObjectRanges
+    //---------------------------------------------------------------------------------------
+
+    // Sequence number for each GCBulkSurvivingObjectRanges event
+    UINT iCurBulkSurvivingObjectRanges;
+
+    // Number of surviving object ranges currently filled out in rgGCBulkSurvivingObjectRanges array
+    UINT cBulkSurvivingObjectRanges;
+
+    // Struct array containing the primary data for each GCBulkSurvivingObjectRanges
+    // event. Fix the size so the total event stays well below the 64K limit (leaving
+    // lots of room for non-struct fields that come before the values data)
+    EventStructGCBulkSurvivingObjectRangesValue rgGCBulkSurvivingObjectRanges[
+        (cbMaxEtwEvent - 0x100) / sizeof(EventStructGCBulkSurvivingObjectRangesValue)];
+
+    //---------------------------------------------------------------------------------------
+    // GCBulkMovedObjectRanges
+    //---------------------------------------------------------------------------------------
+
+    // Sequence number for each GCBulkMovedObjectRanges event
+    UINT iCurBulkMovedObjectRanges;
+
+    // Number of Moved object ranges currently filled out in rgGCBulkMovedObjectRanges array
+    UINT cBulkMovedObjectRanges;
+
+    // Struct array containing the primary data for each GCBulkMovedObjectRanges
+    // event. Fix the size so the total event stays well below the 64K limit (leaving
+    // lots of room for non-struct fields that come before the values data)
+    EventStructGCBulkMovedObjectRangesValue rgGCBulkMovedObjectRanges[
+        (cbMaxEtwEvent - 0x100) / sizeof(EventStructGCBulkMovedObjectRangesValue)];
+};
+
+// Contains above struct for ETW, plus extra info (opaque to us) used by the profiling
+// API to track its own information.
+struct MovedReferenceContextForEtwAndProfapi
+{
+    // An instance of MovedReferenceContextForEtwAndProfapi is dynamically allocated and
+    // pointed to by a profiling_context pointer created by the GC on the stack. This is used to
+    // batch and send GCBulkSurvivingObjectRanges events and GCBulkMovedObjectRanges
+    // events and the corresponding callbacks for profapi profilers. This method is
+    // passed a pointer to a MovedReferenceContextForEtwAndProfapi; if non-NULL it gets
+    // returned; else, a new MovedReferenceContextForEtwAndProfapi is allocated, stored
+    // in that pointer, and then returned. Callers should test for NULL, which can be
+    // returned if out of memory
+    static MovedReferenceContextForEtwAndProfapi * CreateInGCContext(LPVOID pvContext)
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(pvContext != NULL);
+
+        MovedReferenceContextForEtwAndProfapi * pContext = *(MovedReferenceContextForEtwAndProfapi **) pvContext;
+        
+        // Shouldn't be called if the context was already created.  Perhaps someone made
+        // one too many BeginMovedReferences calls, or didn't have an EndMovedReferences
+        // in between?
+        _ASSERTE(pContext == NULL);
+        
+        pContext = new (nothrow) MovedReferenceContextForEtwAndProfapi;
+        *(MovedReferenceContextForEtwAndProfapi **) pvContext = pContext;
+
+        return pContext;
+    }
+
+
+    MovedReferenceContextForEtwAndProfapi() :
+        pctxProfAPI(NULL),
+        pctxEtw(NULL)
+
+    {
+        LIMITED_METHOD_CONTRACT;
+    }
+        
+    LPVOID pctxProfAPI;
+    EtwGcMovementContext * pctxEtw;
+};
+
+
+//---------------------------------------------------------------------------------------
+//
+// Called by the GC for each moved or surviving reference that it encounters. This
+// batches the info into our context's buffer, and flushes that buffer to ETW as it fills
+// up.
+//
+// Arguments:
+//      * pbMemBlockStart - Start of moved/surviving block
+//      * pbMemBlockEnd - Next pointer after end of moved/surviving block
+//      * cbRelocDistance - How far did the block move? (0 for non-compacted / surviving
+//          references; negative if moved to earlier addresses)
+//      * profilingContext - Where our context is stored
+//      * fCompacting - Is this a compacting GC? Used to decide whether to send the moved
+//          or surviving event
+//
+
+// static
+void ETW::GCLog::MovedReference(
+    BYTE * pbMemBlockStart,
+    BYTE * pbMemBlockEnd,
+    ptrdiff_t cbRelocDistance,
+    size_t profilingContext,
+    BOOL fCompacting,
+    BOOL /*fAllowProfApiNotification*/) // @TODO: unused param from newer implementation
+{
+#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT
+    CONTRACTL 
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        CAN_TAKE_LOCK;  // EEToProfInterfaceImpl::AllocateMovedReferencesData takes lock
+    } 
+    CONTRACTL_END;
+
+    MovedReferenceContextForEtwAndProfapi * pCtxForEtwAndProfapi = 
+        (MovedReferenceContextForEtwAndProfapi *) profilingContext;
+    if (pCtxForEtwAndProfapi == NULL)
+    {
+        _ASSERTE(!"MovedReference() encountered a NULL profilingContext");
+        return;
+    }
+
+#ifdef PROFILING_SUPPORTED
+    // ProfAPI
+    {
+        BEGIN_PIN_PROFILER(CORProfilerTrackGC());
+        g_profControlBlock.pProfInterface->MovedReference(pbMemBlockStart,
+                                                          pbMemBlockEnd,
+                                                          cbRelocDistance,
+                                                          &(pCtxForEtwAndProfapi->pctxProfAPI),
+                                                          fCompacting);
+        END_PIN_PROFILER();
+    }
+#endif // PROFILING_SUPPORTED
+
+    // ETW
+
+    if (!ShouldTrackMovementForEtw())
+        return;
+
+    EtwGcMovementContext * pContext =
+        EtwGcMovementContext::GetOrCreateInGCContext(&pCtxForEtwAndProfapi->pctxEtw);
+    if (pContext == NULL)
+        return;
+
+    if (fCompacting)
+    {
+        // Moved references
+
+        _ASSERTE(pContext->cBulkMovedObjectRanges < _countof(pContext->rgGCBulkMovedObjectRanges));
+        EventStructGCBulkMovedObjectRangesValue * pValue =
+            &pContext->rgGCBulkMovedObjectRanges[pContext->cBulkMovedObjectRanges];
+        pValue->OldRangeBase = pbMemBlockStart;
+        pValue->NewRangeBase = pbMemBlockStart + cbRelocDistance;
+        pValue->RangeLength = pbMemBlockEnd - pbMemBlockStart;
+        pContext->cBulkMovedObjectRanges++;
+
+        // If buffer is now full, empty it into ETW
+        if (pContext->cBulkMovedObjectRanges == _countof(pContext->rgGCBulkMovedObjectRanges))
+        {
+            FireEtwGCBulkMovedObjectRanges(
+                pContext->iCurBulkMovedObjectRanges,
+                pContext->cBulkMovedObjectRanges,
+                GetClrInstanceId(),
+                sizeof(pContext->rgGCBulkMovedObjectRanges[0]),
+                &pContext->rgGCBulkMovedObjectRanges[0]);
+
+            pContext->iCurBulkMovedObjectRanges++;
+            pContext->Clear();
+        }
+    }
+    else
+    {
+        // Surviving references
+        
+        _ASSERTE(pContext->cBulkSurvivingObjectRanges < _countof(pContext->rgGCBulkSurvivingObjectRanges));
+        EventStructGCBulkSurvivingObjectRangesValue * pValue =
+            &pContext->rgGCBulkSurvivingObjectRanges[pContext->cBulkSurvivingObjectRanges];
+        pValue->RangeBase = pbMemBlockStart;
+        pValue->RangeLength = pbMemBlockEnd - pbMemBlockStart;
+        pContext->cBulkSurvivingObjectRanges++;
+
+        // If buffer is now full, empty it into ETW
+        if (pContext->cBulkSurvivingObjectRanges == _countof(pContext->rgGCBulkSurvivingObjectRanges))
+        {
+            FireEtwGCBulkSurvivingObjectRanges(
+                pContext->iCurBulkSurvivingObjectRanges,
+                pContext->cBulkSurvivingObjectRanges,
+                GetClrInstanceId(),
+                sizeof(pContext->rgGCBulkSurvivingObjectRanges[0]),
+                &pContext->rgGCBulkSurvivingObjectRanges[0]);
+
+            pContext->iCurBulkSurvivingObjectRanges++;
+            pContext->Clear();
+        }
+    }
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+}
+
+
+//---------------------------------------------------------------------------------------
+//
+// Called by the GC just before it begins enumerating plugs.  Gives us a chance to
+// allocate our context structure, to allow us to batch plugs before firing events
+// for them
+//
+// Arguments:
+//      * pProfilingContext - Points to location on stack (in GC function) where we can
+//         store a pointer to the context we allocate
+//
+
+// static
+void ETW::GCLog::BeginMovedReferences(size_t * pProfilingContext)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    MovedReferenceContextForEtwAndProfapi::CreateInGCContext(LPVOID(pProfilingContext));
+}
+
+
+//---------------------------------------------------------------------------------------
+//
+// Called by the GC at the end of a heap walk to give us a place to flush any remaining
+// buffers of data to ETW or the profapi profiler
+//
+// Arguments:
+//      profilingContext - Our context we built up during the heap walk
+//
+
+// static
+void ETW::GCLog::EndMovedReferences(size_t profilingContext, 
+    BOOL /*fAllowProfApiNotification*/) // @TODO: unused param from newer implementation
+{
+#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT
+    CONTRACTL 
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        CAN_TAKE_LOCK;
+    } 
+    CONTRACTL_END;
+
+    MovedReferenceContextForEtwAndProfapi * pCtxForEtwAndProfapi = (MovedReferenceContextForEtwAndProfapi *) profilingContext;
+    if (pCtxForEtwAndProfapi == NULL)
+    {
+        _ASSERTE(!"EndMovedReferences() encountered a NULL profilingContext");
+        return;
+    }
+
+#ifdef PROFILING_SUPPORTED
+    // ProfAPI
+    {
+        BEGIN_PIN_PROFILER(CORProfilerTrackGC());
+        g_profControlBlock.pProfInterface->EndMovedReferences(&(pCtxForEtwAndProfapi->pctxProfAPI));
+        END_PIN_PROFILER();
+    }
+#endif //PROFILING_SUPPORTED
+
+    // ETW
+    
+    if (!ShouldTrackMovementForEtw())
+        return;
+
+    // If context isn't already set up for us, then we haven't been collecting any data
+    // for ETW events.
+    EtwGcMovementContext * pContext = pCtxForEtwAndProfapi->pctxEtw;
+    if (pContext == NULL)
+        return;
+
+    // Flush any remaining moved or surviving range data
+
+    if (pContext->cBulkMovedObjectRanges > 0)
+    {
+        FireEtwGCBulkMovedObjectRanges(
+            pContext->iCurBulkMovedObjectRanges,
+            pContext->cBulkMovedObjectRanges,
+            GetClrInstanceId(),
+            sizeof(pContext->rgGCBulkMovedObjectRanges[0]),
+            &pContext->rgGCBulkMovedObjectRanges[0]);
+    }
+
+    if (pContext->cBulkSurvivingObjectRanges > 0)
+    {
+        FireEtwGCBulkSurvivingObjectRanges(
+            pContext->iCurBulkSurvivingObjectRanges,
+            pContext->cBulkSurvivingObjectRanges,
+            GetClrInstanceId(),
+            sizeof(pContext->rgGCBulkSurvivingObjectRanges[0]),
+            &pContext->rgGCBulkSurvivingObjectRanges[0]);
+    }
+
+    pCtxForEtwAndProfapi->pctxEtw = NULL;
+    delete pContext;
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+}
+
+/***************************************************************************/
+/* This implements the public runtime provider's GCHeapCollectKeyword.  It
+   performs a full, gen-2, blocking GC.
+/***************************************************************************/
+void ETW::GCLog::ForceGC(LONGLONG l64ClientSequenceNumber)
+{
+    CONTRACTL 
+    {
+        NOTHROW;
+        GC_TRIGGERS;
+        MODE_ANY;
+    } 
+    CONTRACTL_END;
+
+#ifdef FEATURE_REDHAWK
+    if (!GCHeapUtilities::IsGCHeapInitialized())
+        return;
+
+    // No InterlockedExchange64 on Redhawk, even though there is one for
+    // InterlockedCompareExchange64. Technically, there's a race here by using
+    // InterlockedCompareExchange64, but it's not worth addressing. The race would be
+    // between two ETW controllers trying to trigger GCs simultaneously, in which case
+    // one will win and get its sequence number to appear in the GCStart event, while the
+    // other will lose. Rare, uninteresting, and low-impact.
+    PalInterlockedCompareExchange64(&s_l64LastClientSequenceNumber, l64ClientSequenceNumber, s_l64LastClientSequenceNumber);
+#else // !FEATURE_REDHAWK
+    if (!IsGarbageCollectorFullyInitialized())
+        return;
+
+    InterlockedExchange64(&s_l64LastClientSequenceNumber, l64ClientSequenceNumber);
+#endif // FEATURE_REDHAWK
+
+    ForceGCForDiagnostics();
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Helper to fire the GCStart event.  Figures out which version of GCStart to fire, and
+// includes the client sequence number, if available.
+//
+// Arguments:
+//      pGcInfo - ETW_GC_INFO containing details from GC about this collection
+//
+
+// static
+void ETW::GCLog::FireGcStart(ETW_GC_INFO * pGcInfo)
+{
+    LIMITED_METHOD_CONTRACT;
+
+#if !defined(FEATURE_PAL) || defined(FEATURE_DTRACE)
+
+    if (ETW_TRACING_CATEGORY_ENABLED(
+        MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_GC_KEYWORD))
+    {
+#if !defined(FEATURE_PAL)
+        // If the controller specified a client sequence number for us to log with this
+        // GCStart, then retrieve it
+        LONGLONG l64ClientSequenceNumberToLog = 0;
+        if ((s_l64LastClientSequenceNumber != 0) &&
+            (pGcInfo->GCStart.Depth == GCHeapUtilities::GetGCHeap()->GetMaxGeneration()) &&
+            (pGcInfo->GCStart.Reason == ETW_GC_INFO::GC_INDUCED))
+        {
+#ifdef FEATURE_REDHAWK
+            // No InterlockedExchange64 on Redhawk (presumably b/c there is no compiler
+            // intrinsic for this on x86, even though there is one for InterlockedCompareExchange64)
+            l64ClientSequenceNumberToLog = PalInterlockedCompareExchange64(&s_l64LastClientSequenceNumber, 0, s_l64LastClientSequenceNumber);
+#else
+            l64ClientSequenceNumberToLog = InterlockedExchange64(&s_l64LastClientSequenceNumber, 0);
+#endif
+        }
+
+        FireEtwGCStart_V2(pGcInfo->GCStart.Count, pGcInfo->GCStart.Depth, pGcInfo->GCStart.Reason, pGcInfo->GCStart.Type, GetClrInstanceId(), l64ClientSequenceNumberToLog);
+
+#elif defined(FEATURE_DTRACE)
+        FireEtwGCStart(pGcInfo->GCStart.Count,pGcInfo->GCStart.Reason);
+#endif
+    }
+
+#endif // defined(FEATURE_PAL) || defined(FEATURE_DTRACE)
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Contains code common to profapi and ETW scenarios where the profiler wants to force
+// the CLR to perform a GC.  The important work here is to create a managed thread for
+// the current thread BEFORE the GC begins.  On both ETW and profapi threads, there may
+// not yet be a managed thread object.  But some scenarios require a managed thread
+// object be present (notably if we need to call into Jupiter during the GC).
+//
+// Return Value:
+//      HRESULT indicating success or failure
+//
+// Assumptions:
+//      Caller should ensure that the EE has fully started up and that the GC heap is
+//      initialized enough to actually perform a GC
+//
+
+// static
+HRESULT ETW::GCLog::ForceGCForDiagnostics()
+{
+    CONTRACTL 
+    {
+        NOTHROW;
+        GC_TRIGGERS;
+        MODE_ANY;
+    } 
+    CONTRACTL_END;
+
+    HRESULT hr = E_FAIL;
+
+#ifndef FEATURE_REDHAWK
+    // Caller should ensure we're past startup.
+    _ASSERTE(IsGarbageCollectorFullyInitialized());
+
+    // In immersive apps the GarbageCollect() call below will call into Jupiter,
+    // which will call back into the runtime to track references. This call
+    // chain would cause a Thread object to be created for this thread while code 
+    // higher on the stack owns the ThreadStoreLock. This will lead to asserts 
+    // since the ThreadStoreLock is non-reentrant. To avoid this we'll create 
+    // the Thread object here instead.
+    if (GetThreadNULLOk() == NULL)
+    {
+        HRESULT hr = E_FAIL;
+        SetupThreadNoThrow(&hr);
+        if (FAILED(hr))
+            return hr;
+    }
+
+    ASSERT_NO_EE_LOCKS_HELD();
+
+    EX_TRY
+    {
+        // Need to switch to cooperative mode as the thread will access managed
+        // references (through Jupiter callbacks).
+        GCX_COOP();
+
+#else       // FEATURE_REDHAWK
+        _ASSERTE(GCHeapUtilities::IsGCHeapInitialized());
+
+        ThreadStore::AttachCurrentThread();
+        Thread * pThread = ThreadStore::GetCurrentThread();
+
+        // Doing this prevents the GC from trying to walk this thread's stack for roots.
+        pThread->SetGCSpecial(true);
+
+        // While doing the GC, much code assumes & asserts the thread doing the GC is in
+        // cooperative mode.
+        pThread->DisablePreemptiveMode();
+#endif // FEATURE_REDHAWK
+        
+        hr = GCHeapUtilities::GetGCHeap()->GarbageCollect(
+            -1,     // all generations should be collected
+            FALSE,  // low_memory_p
+            collection_blocking);
+
+#ifdef FEATURE_REDHAWK
+        // In case this thread (generated by the ETW OS APIs) hangs around a while,
+        // better stick it back into preemptive mode, so it doesn't block any other GCs
+        pThread->EnablePreemptiveMode();
+#else   // !FEATURE_REDHAWK
+    }
+    EX_CATCH { }
+    EX_END_CATCH(RethrowCorruptingExceptions);
+#endif // FEATURE_REDHAWK
+
+    return hr;
+}
+
+
+//---------------------------------------------------------------------------------------
+// BulkTypeValue / BulkTypeEventLogger: These take care of batching up types so they can
+// be logged via ETW in bulk
+//---------------------------------------------------------------------------------------
+
+BulkTypeValue::BulkTypeValue() : cTypeParameters(0), rgTypeParameters()
+#ifdef FEATURE_REDHAWK
+, ullSingleTypeParameter(0)
+#else // FEATURE_REDHAWK
+, sName()
+#endif // FEATURE_REDHAWK
+{
+    LIMITED_METHOD_CONTRACT;
+    ZeroMemory(&fixedSizedData, sizeof(fixedSizedData));
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Clears a BulkTypeValue so it can be reused after the buffer is flushed to ETW
+//
+
+void BulkTypeValue::Clear()
+{
+    CONTRACTL 
+    {
+        THROWS;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    } 
+    CONTRACTL_END;
+
+    ZeroMemory(&fixedSizedData, sizeof(fixedSizedData));
+    cTypeParameters = 0;
+#ifdef FEATURE_REDHAWK
+    ullSingleTypeParameter = 0;
+    rgTypeParameters.Release();
+#else // FEATURE_REDHAWK
+    sName.Clear();
+    rgTypeParameters.Clear();
+#endif // FEATURE_REDHAWK
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Fire an ETW event for all the types we batched so far, and then reset our state
+// so we can start batching new types at the beginning of the array.
+//
+//
+
+void BulkTypeEventLogger::FireBulkTypeEvent()
+{
+#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT
+    LIMITED_METHOD_CONTRACT;
+
+    if (m_nBulkTypeValueCount == 0)
+    {
+        // No types were batched up, so nothing to send
+        return;
+    }
+
+    // Normally, we'd use the MC-generated FireEtwBulkType for all this gunk, but
+    // it's insufficient as the bulk type event is too complex (arrays of structs of
+    // varying size). So we directly log the event via EventDataDescCreate and
+    // EventWrite
+
+    // We use one descriptor for the count + one for the ClrInstanceID + 4
+    // per batched type (to include fixed-size data + name + param count + param
+    // array).  But the system limit of 128 descriptors per event kicks in way
+    // before the 64K event size limit, and we already limit our batch size
+    // (m_nBulkTypeValueCount) to stay within the 128 descriptor limit.
+    EVENT_DATA_DESCRIPTOR EventData[128];
+    UINT16 nClrInstanceID = GetClrInstanceId();
+
+    UINT iDesc = 0;
+
+    _ASSERTE(iDesc < _countof(EventData));
+    EventDataDescCreate(&EventData[iDesc++], &m_nBulkTypeValueCount, sizeof(m_nBulkTypeValueCount));
+
+    _ASSERTE(iDesc < _countof(EventData));
+    EventDataDescCreate(&EventData[iDesc++], &nClrInstanceID, sizeof(nClrInstanceID));
+
+    for (int iTypeData = 0; iTypeData < m_nBulkTypeValueCount; iTypeData++)
+    {
+        // Do fixed-size data as one bulk copy
+        _ASSERTE(iDesc < _countof(EventData));
+        EventDataDescCreate(
+            &EventData[iDesc++], 
+            &(m_rgBulkTypeValues[iTypeData].fixedSizedData), 
+            sizeof(m_rgBulkTypeValues[iTypeData].fixedSizedData));
+
+        // Do var-sized data individually per field
+
+        // Type name (nonexistent and thus empty on FEATURE_REDHAWK)
+        _ASSERTE(iDesc < _countof(EventData));
+#ifdef FEATURE_REDHAWK
+        EventDataDescCreate(&EventData[iDesc++], L"", sizeof(WCHAR));
+#else   // FEATURE_REDHAWK
+        LPCWSTR wszName = m_rgBulkTypeValues[iTypeData].sName.GetUnicode();
+        EventDataDescCreate(
+            &EventData[iDesc++], 
+            (wszName == NULL) ? L"" : wszName,
+            (wszName == NULL) ? sizeof(WCHAR) : (m_rgBulkTypeValues[iTypeData].sName.GetCount() + 1) * sizeof(WCHAR));
+#endif // FEATURE_REDHAWK
+
+        // Type parameter count
+#ifndef FEATURE_REDHAWK
+        m_rgBulkTypeValues[iTypeData].cTypeParameters = m_rgBulkTypeValues[iTypeData].rgTypeParameters.GetCount();
+#endif // FEATURE_REDHAWK
+        _ASSERTE(iDesc < _countof(EventData));
+        EventDataDescCreate(
+            &EventData[iDesc++], 
+            &(m_rgBulkTypeValues[iTypeData].cTypeParameters),
+            sizeof(m_rgBulkTypeValues[iTypeData].cTypeParameters));
+
+        // Type parameter array
+        if (m_rgBulkTypeValues[iTypeData].cTypeParameters > 0)
+        {
+            _ASSERTE(iDesc < _countof(EventData));
+            EventDataDescCreate(
+                &EventData[iDesc++], 
+#ifdef FEATURE_REDHAWK
+                ((m_rgBulkTypeValues[iTypeData].cTypeParameters == 1) ?
+                    &(m_rgBulkTypeValues[iTypeData].ullSingleTypeParameter) :
+                    (ULONGLONG *) (m_rgBulkTypeValues[iTypeData].rgTypeParameters)),
+#else
+                m_rgBulkTypeValues[iTypeData].rgTypeParameters.GetElements(),
+#endif
+                sizeof(ULONGLONG) * m_rgBulkTypeValues[iTypeData].cTypeParameters);
+        }
+    }
+
+    Win32EventWrite(Microsoft_Windows_DotNETRuntimeHandle, &BulkType, iDesc, EventData);
+
+    // Reset state
+    m_nBulkTypeValueCount = 0;
+    m_nBulkTypeValueByteCount = 0;
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+}
+
+#ifndef FEATURE_REDHAWK
+
+//---------------------------------------------------------------------------------------
+//
+// Batches a single type into the array, flushing the array to ETW if it fills up. Most
+// interaction with the type system (to analyze the type) is done here. This does not
+// recursively batch up any parameter types (for arrays or generics), but does add their
+// TypeHandles to the rgTypeParameters array. LogTypeAndParameters is responsible for
+// initiating any recursive calls to deal with type parameters.
+//
+// Arguments:
+//      th - TypeHandle to batch
+//
+// Return Value:
+//      Index into array of where this type got batched. -1 if there was a failure.
+//
+
+int BulkTypeEventLogger::LogSingleType(TypeHandle th)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        CAN_TAKE_LOCK;  // some of the type system stuff can take locks
+    } 
+    CONTRACTL_END;
+
+    // If there's no room for another type, flush what we've got
+    if (m_nBulkTypeValueCount == _countof(m_rgBulkTypeValues))
+    {
+        FireBulkTypeEvent();
+    }
+    
+    _ASSERTE(m_nBulkTypeValueCount < _countof(m_rgBulkTypeValues));
+
+    if (!th.IsTypeDesc() && th.GetMethodTable()->IsArray())
+    {
+        _ASSERTE(!"BulkTypeEventLogger::LogSingleType called with MethodTable array");
+        return -1;
+    }
+
+    BulkTypeValue * pVal = &m_rgBulkTypeValues[m_nBulkTypeValueCount];
+    
+    // Clear out pVal before filling it out (array elements can get reused if there
+    // are enough types that we need to flush to multiple events).  Clearing the
+    // contained SBuffer can throw, so deal with exceptions
+    BOOL fSucceeded = FALSE;
+    EX_TRY
+    {
+        pVal->Clear();
+        fSucceeded = TRUE;
+    }
+    EX_CATCH
+    {
+        fSucceeded = FALSE;
+    }
+    EX_END_CATCH(RethrowCorruptingExceptions);
+    if (!fSucceeded)
+        return -1;      
+
+    pVal->fixedSizedData.TypeID = (ULONGLONG) th.AsTAddr();
+    pVal->fixedSizedData.ModuleID = (ULONGLONG) (TADDR) th.GetModule();
+    pVal->fixedSizedData.TypeNameID = (th.GetMethodTable() == NULL) ? 0 : th.GetCl();
+    pVal->fixedSizedData.Flags = 0;
+    pVal->fixedSizedData.CorElementType = (BYTE) th.GetInternalCorElementType();
+
+    if (th.IsArray())
+    {
+        // Normal typedesc array
+        pVal->fixedSizedData.Flags |= kEtwTypeFlagsArray;
+
+        // Fetch TypeHandle of array elements
+        fSucceeded = FALSE;
+        EX_TRY
+        {
+            pVal->rgTypeParameters.Append((ULONGLONG) th.AsArray()->GetArrayElementTypeHandle().AsTAddr());
+            fSucceeded = TRUE;
+        }
+        EX_CATCH
+        {
+            fSucceeded = FALSE;
+        }
+        EX_END_CATCH(RethrowCorruptingExceptions);
+        if (!fSucceeded)
+            return -1;      
+    }
+    else if (th.IsTypeDesc())
+    {
+        // Non-array Typedescs
+        PTR_TypeDesc pTypeDesc = th.AsTypeDesc();
+        if (pTypeDesc->HasTypeParam())
+        {
+            fSucceeded = FALSE;
+            EX_TRY
+            {
+                pVal->rgTypeParameters.Append((ULONGLONG) pTypeDesc->GetTypeParam().AsTAddr());
+                fSucceeded = TRUE;
+            }
+            EX_CATCH
+            {
+                fSucceeded = FALSE;
+            }
+            EX_END_CATCH(RethrowCorruptingExceptions);
+            if (!fSucceeded)
+                return -1;      
+        }
+    }
+    else
+    {
+        // Non-array MethodTable
+
+        PTR_MethodTable pMT = th.AsMethodTable();
+
+        // Make CorElementType more specific if this is a string MT
+        if (pMT->IsString())
+        {
+            pVal->fixedSizedData.CorElementType = ELEMENT_TYPE_STRING;
+        }
+        else if (pMT->IsObjectClass())
+        {
+            pVal->fixedSizedData.CorElementType = ELEMENT_TYPE_OBJECT;
+        }
+
+        // Generic arguments
+        DWORD cTypeParameters = pMT->GetNumGenericArgs();
+        if (cTypeParameters > 0)
+        {
+            Instantiation inst = pMT->GetInstantiation();
+            fSucceeded = FALSE;
+            EX_TRY
+            {
+                for (DWORD i=0; i < cTypeParameters; i++)
+                {
+                    pVal->rgTypeParameters.Append((ULONGLONG) inst[i].AsTAddr());
+                }
+                fSucceeded = TRUE;
+            }
+            EX_CATCH
+            {
+                fSucceeded = FALSE;
+            }
+            EX_END_CATCH(RethrowCorruptingExceptions);
+            if (!fSucceeded)
+                return -1;      
+        }
+
+        if (pMT->HasFinalizer())
+        {
+            pVal->fixedSizedData.Flags |= kEtwTypeFlagsFinalizable;
+        }
+        if (pMT->IsDelegate())
+        {
+            pVal->fixedSizedData.Flags |= kEtwTypeFlagsDelegate;
+        }
+        if (pMT->IsComObjectType())
+        {
+            pVal->fixedSizedData.Flags |= kEtwTypeFlagsExternallyImplementedCOMObject;
+        }
+    }
+
+    // If the profiler wants it, construct a name.  Always normalize the string (even if
+    // type names are not requested) so that calls to sName.GetCount() can't throw
+    EX_TRY
+    {
+        if (ETW_TRACING_CATEGORY_ENABLED(
+            MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+            TRACE_LEVEL_INFORMATION, 
+            CLR_GCHEAPANDTYPENAMES_KEYWORD))
+        {
+            th.GetName(pVal->sName);
+        }
+        pVal->sName.Normalize();
+    }
+    EX_CATCH
+    {
+        // If this failed, the name remains empty, which is ok; the event just
+        // won't have a name in it.
+        pVal->sName.Clear();
+    }
+    EX_END_CATCH(RethrowCorruptingExceptions);
+
+    // Now that we know the full size of this type's data, see if it fits in our
+    // batch or whether we need to flush
+
+    int cbVal = pVal->GetByteCountInEvent();
+    if (cbVal > kMaxBytesTypeValues)
+    {
+        // This type is apparently so huge, it's too big to squeeze into an event, even
+        // if it were the only type batched in the whole event.  Bail
+        _ASSERTE(!"Type too big to log via ETW");
+        return -1;
+    }
+
+    if (m_nBulkTypeValueByteCount + cbVal > kMaxBytesTypeValues)
+    {
+        // Although this type fits into the array, its size is so big that the entire
+        // array can't be logged via ETW. So flush the array, and start over by
+        // calling ourselves--this refetches the type info and puts it at the
+        // beginning of the array.  Since we know this type is small enough to be
+        // batched into an event on its own, this recursive call will not try to
+        // call itself again.
+        FireBulkTypeEvent();
+        return LogSingleType(th);
+    }
+
+    // The type fits into the batch, so update our state
+    m_nBulkTypeValueCount++;
+    m_nBulkTypeValueByteCount += cbVal;
+    return m_nBulkTypeValueCount - 1;       // Index of type we just added
+}
+
+void BulkTypeEventLogger::Cleanup() {}
+
+//---------------------------------------------------------------------------------------
+//
+// High-level method to batch a type and (recursively) its type parameters, flushing to
+// ETW as needed.  This is called by (static)
+// ETW::TypeSystemLog::LogTypeAndParametersIfNecessary, which is what clients use to log
+// type events
+//
+// Arguments:
+//      * thAsAddr - Type to batch
+//      * typeLogBehavior - Reminder of whether the type system log lock is held
+//          (useful if we need to recurively call back into TypeSystemLog), and whether
+//          we even care to check if the type was already logged
+//
+
+void BulkTypeEventLogger::LogTypeAndParameters(ULONGLONG thAsAddr, ETW::TypeSystemLog::TypeLogBehavior typeLogBehavior)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        CAN_TAKE_LOCK;  // LogSingleType can take locks
+    } 
+    CONTRACTL_END;
+
+    TypeHandle th = TypeHandle::FromTAddr((TADDR) thAsAddr);
+
+    // Batch up this type.  This grabs useful info about the type, including any
+    // type parameters it may have, and sticks it in m_rgBulkTypeValues
+    int iBulkTypeEventData = LogSingleType(th);
+    if (iBulkTypeEventData == -1)
+    {
+        // There was a failure trying to log the type, so don't bother with its type
+        // parameters
+        return;
+    }
+
+    // Look at the type info we just batched, so we can get the type parameters
+    BulkTypeValue * pVal = &m_rgBulkTypeValues[iBulkTypeEventData];
+
+    // We're about to recursively call ourselves for the type parameters, so make a
+    // local copy of their type handles first (else, as we log them we could flush
+    // and clear out m_rgBulkTypeValues, thus trashing pVal)
+    
+    StackSArray<ULONGLONG> rgTypeParameters;
+    DWORD cParams = pVal->rgTypeParameters.GetCount();
+
+    BOOL fSucceeded = FALSE;
+    EX_TRY
+    {
+        for (COUNT_T i = 0; i < cParams; i++)
+        {
+            rgTypeParameters.Append(pVal->rgTypeParameters[i]);
+        }
+        fSucceeded = TRUE;
+    }
+    EX_CATCH
+    {
+        fSucceeded = FALSE;
+    }
+    EX_END_CATCH(RethrowCorruptingExceptions);
+    if (!fSucceeded)
+        return;      
+
+    // Before we recurse, adjust the special-cased type-log behavior that allows a
+    // top-level type to be logged without lookup, but still requires lookups to avoid
+    // dupes of type parameters
+    if (typeLogBehavior == ETW::TypeSystemLog::kTypeLogBehaviorAssumeLockAndAlwaysLogTopLevelType)
+        typeLogBehavior = ETW::TypeSystemLog::kTypeLogBehaviorAssumeLockAndLogIfFirstTime;
+
+    // Recursively log any referenced parameter types
+    for (COUNT_T i=0; i < cParams; i++)
+    {
+        ETW::TypeSystemLog::LogTypeAndParametersIfNecessary(this, rgTypeParameters[i], typeLogBehavior);
+    }
+}
+
+#endif // FEATURE_REDHAWK
+
+// Holds state that batches of roots, nodes, edges, and types as the GC walks the heap
+// at the end of a collection.
+class EtwGcHeapDumpContext
+{
+public:
+    // An instance of EtwGcHeapDumpContext is dynamically allocated and stored inside of
+    // ProfilingScanContext and ProfilerWalkHeapContext, which are context structures
+    // that the GC heap walker sends back to the callbacks. This method is passed a
+    // pointer to ProfilingScanContext::pvEtwContext or
+    // ProfilerWalkHeapContext::pvEtwContext; if non-NULL it gets returned; else, a new
+    // EtwGcHeapDumpContext is allocated, stored in that pointer, and then returned. 
+    // Callers should test for NULL, which can be returned if out of memory
+    static EtwGcHeapDumpContext * GetOrCreateInGCContext(LPVOID * ppvEtwContext)
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(ppvEtwContext != NULL);
+
+        EtwGcHeapDumpContext * pContext = (EtwGcHeapDumpContext *) *ppvEtwContext;
+        if (pContext == NULL)
+        {
+            pContext = new (nothrow) EtwGcHeapDumpContext;
+            *ppvEtwContext = pContext;
+        }
+        return pContext;
+    }
+
+    EtwGcHeapDumpContext() :
+        iCurBulkRootEdge(0),
+        iCurBulkRootConditionalWeakTableElementEdge(0),
+        iCurBulkNodeEvent(0),
+        iCurBulkEdgeEvent(0),
+        bulkTypeEventLogger()
+    {
+        LIMITED_METHOD_CONTRACT;
+        ClearRootEdges();
+        ClearRootConditionalWeakTableElementEdges();
+        ClearNodes();
+        ClearEdges();
+    }
+
+    // These helpers clear the individual buffers, for use after a flush and on
+    // construction.  They intentionally leave the indices (iCur*) alone, since they
+    // persist across flushes within a GC
+
+    void ClearRootEdges()
+    {
+        LIMITED_METHOD_CONTRACT;
+        cGcBulkRootEdges = 0;
+        ZeroMemory(rgGcBulkRootEdges, sizeof(rgGcBulkRootEdges));
+    }
+
+    void ClearRootConditionalWeakTableElementEdges()
+    {
+        LIMITED_METHOD_CONTRACT;
+        cGCBulkRootConditionalWeakTableElementEdges = 0;
+        ZeroMemory(rgGCBulkRootConditionalWeakTableElementEdges, sizeof(rgGCBulkRootConditionalWeakTableElementEdges));
+    }
+
+    void ClearNodes()
+    {
+        LIMITED_METHOD_CONTRACT;
+        cGcBulkNodeValues = 0;
+        ZeroMemory(rgGcBulkNodeValues, sizeof(rgGcBulkNodeValues));
+    }
+
+    void ClearEdges()
+    {
+        LIMITED_METHOD_CONTRACT;
+        cGcBulkEdgeValues = 0;
+        ZeroMemory(rgGcBulkEdgeValues, sizeof(rgGcBulkEdgeValues));
+    }
+
+    //---------------------------------------------------------------------------------------
+    // GCBulkRootEdge
+    // 
+    // A "root edge" is the relationship between a source "GCRootID" (i.e., stack
+    // variable, handle, static, etc.) and the target "RootedNodeAddress" (the managed
+    // object that gets rooted).
+    //
+    //---------------------------------------------------------------------------------------
+
+    // Sequence number for each GCBulkRootEdge event
+    UINT iCurBulkRootEdge;
+
+    // Number of root edges currently filled out in rgGcBulkRootEdges array
+    UINT cGcBulkRootEdges;
+
+    // Struct array containing the primary data for each GCBulkRootEdge event.  Fix the size so
+    // the total event stays well below the 64K
+    // limit (leaving lots of room for non-struct fields that come before the root edge data)
+    EventStructGCBulkRootEdgeValue rgGcBulkRootEdges[(cbMaxEtwEvent - 0x100) / sizeof(EventStructGCBulkRootEdgeValue)];
+
+
+    //---------------------------------------------------------------------------------------
+    // GCBulkRootConditionalWeakTableElementEdge
+    // 
+    // These describe dependent handles, which simulate an edge connecting a key NodeID
+    // to a value NodeID.
+    //
+    //---------------------------------------------------------------------------------------
+
+    // Sequence number for each GCBulkRootConditionalWeakTableElementEdge event
+    UINT iCurBulkRootConditionalWeakTableElementEdge;
+
+    // Number of root edges currently filled out in rgGCBulkRootConditionalWeakTableElementEdges array
+    UINT cGCBulkRootConditionalWeakTableElementEdges;
+
+    // Struct array containing the primary data for each GCBulkRootConditionalWeakTableElementEdge event.  Fix the size so
+    // the total event stays well below the 64K
+    // limit (leaving lots of room for non-struct fields that come before the root edge data)
+    EventStructGCBulkRootConditionalWeakTableElementEdgeValue rgGCBulkRootConditionalWeakTableElementEdges
+        [(cbMaxEtwEvent - 0x100) / sizeof(EventStructGCBulkRootConditionalWeakTableElementEdgeValue)];
+
+    //---------------------------------------------------------------------------------------
+    // GCBulkNode
+    // 
+    // A "node" is ANY managed object sitting on the heap, including RootedNodeAddresses
+    // as well as leaf nodes.
+    //
+    //---------------------------------------------------------------------------------------
+
+    // Sequence number for each GCBulkNode event
+    UINT iCurBulkNodeEvent;
+
+    // Number of nodes currently filled out in rgGcBulkNodeValues array
+    UINT cGcBulkNodeValues;
+
+    // Struct array containing the primary data for each GCBulkNode event.  Fix the size so
+    // the total event stays well below the 64K
+    // limit (leaving lots of room for non-struct fields that come before the node data)
+    EventStructGCBulkNodeValue rgGcBulkNodeValues[(cbMaxEtwEvent - 0x100) / sizeof(EventStructGCBulkNodeValue)];
+
+    //---------------------------------------------------------------------------------------
+    // GCBulkEdge
+    // 
+    // An "edge" is the relationship between a source node and its referenced target
+    // node. Edges are reported in bulk, separately from Nodes, but it is expected that
+    // the consumer read the Node and Edge streams together. One takes the first node
+    // from the Node stream, and then reads EdgeCount entries in the Edge stream, telling
+    // you all of that Node's targets. Then, one takes the next node in the Node stream,
+    // and reads the next entries in the Edge stream (using this Node's EdgeCount to
+    // determine how many) to find all of its targets. This continues on until the Node
+    // and Edge streams have been fully read.
+    // 
+    // GCBulkRootEdges are not duplicated in the GCBulkEdge events. GCBulkEdge events
+    // begin at the GCBulkRootEdge.RootedNodeAddress and move forward.
+    // 
+    //---------------------------------------------------------------------------------------
+
+    // Sequence number for each GCBulkEdge event
+    UINT iCurBulkEdgeEvent;
+
+    // Number of nodes currently filled out in rgGcBulkEdgeValues array
+    UINT cGcBulkEdgeValues;
+
+    // Struct array containing the primary data for each GCBulkEdge event.  Fix the size so
+    // the total event stays well below the 64K
+    // limit (leaving lots of room for non-struct fields that come before the edge data)
+    EventStructGCBulkEdgeValue rgGcBulkEdgeValues[(cbMaxEtwEvent - 0x100) / sizeof(EventStructGCBulkEdgeValue)];
+
+
+    //---------------------------------------------------------------------------------------
+    // BulkType
+    // 
+    // Types are a bit more complicated to batch up, since their data is of varying
+    // size.  BulkTypeEventLogger takes care of the pesky details for us
+    //---------------------------------------------------------------------------------------
+    
+    BulkTypeEventLogger bulkTypeEventLogger;
+};
+
+
+
+//---------------------------------------------------------------------------------------
+//
+// Called during a heap walk for each root reference encountered.  Batches up the root in
+// the ETW context
+//
+// Arguments:
+//      * pvHandle - If the root is a handle, this points to the handle
+//      * pRootedNode - Points to object that is rooted
+//      * pSecondaryNodeForDependentHandle - For dependent handles, this is the
+//          secondary object
+//      * fDependentHandle - nonzero iff this is for a dependent handle
+//      * profilingScanContext - The shared profapi/etw context built up during the heap walk.
+//      * dwGCFlags - Bitmask of "GC_"-style flags set by GC
+//      * rootFlags - Bitmask of EtwGCRootFlags describing the root
+//
+
+// static
+void ETW::GCLog::RootReference(
+    LPVOID pvHandle,
+    Object * pRootedNode,
+    Object * pSecondaryNodeForDependentHandle,
+    BOOL fDependentHandle,
+    ProfilingScanContext * profilingScanContext,
+    DWORD dwGCFlags,
+    DWORD rootFlags)
+{
+#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT
+    LIMITED_METHOD_CONTRACT;
+
+    if (pRootedNode == NULL)
+        return;
+
+    EtwGcHeapDumpContext * pContext =
+        EtwGcHeapDumpContext::GetOrCreateInGCContext(&profilingScanContext->pvEtwContext);
+    if (pContext == NULL)
+        return;
+
+    // Determine root kind, root ID, and handle-specific flags
+    LPVOID pvRootID = NULL;
+    BYTE nRootKind = (BYTE) profilingScanContext->dwEtwRootKind;
+    switch (nRootKind)
+    {
+    case kEtwGCRootKindStack:
+#ifndef FEATURE_REDHAWK
+        pvRootID = profilingScanContext->pMD;
+#endif // !FEATURE_REDHAWK
+        break;
+
+    case kEtwGCRootKindHandle:
+        pvRootID = pvHandle;
+        break;
+
+    case kEtwGCRootKindFinalizer:
+        _ASSERTE(pvRootID == NULL);
+        break;
+
+    case kEtwGCRootKindOther:
+    default:
+        _ASSERTE(nRootKind == kEtwGCRootKindOther);
+        _ASSERTE(pvRootID == NULL);
+        break;
+    }
+    
+    // Convert GC root flags to ETW root flags
+    if (dwGCFlags & GC_CALL_INTERIOR)
+        rootFlags |= kEtwGCRootFlagsInterior;
+    if (dwGCFlags & GC_CALL_PINNED)
+        rootFlags |= kEtwGCRootFlagsPinning;
+
+    // Add root edge to appropriate buffer
+    if (fDependentHandle)
+    {
+        _ASSERTE(pContext->cGCBulkRootConditionalWeakTableElementEdges < 
+            _countof(pContext->rgGCBulkRootConditionalWeakTableElementEdges));
+        EventStructGCBulkRootConditionalWeakTableElementEdgeValue * pRCWTEEdgeValue =
+            &pContext->rgGCBulkRootConditionalWeakTableElementEdges[pContext->cGCBulkRootConditionalWeakTableElementEdges];
+        pRCWTEEdgeValue->GCKeyNodeID = pRootedNode;
+        pRCWTEEdgeValue->GCValueNodeID = pSecondaryNodeForDependentHandle;
+        pRCWTEEdgeValue->GCRootID = pvRootID;
+        pContext->cGCBulkRootConditionalWeakTableElementEdges++;
+
+        // If RCWTE edge buffer is now full, empty it into ETW
+        if (pContext->cGCBulkRootConditionalWeakTableElementEdges == 
+            _countof(pContext->rgGCBulkRootConditionalWeakTableElementEdges))
+        {
+            FireEtwGCBulkRootConditionalWeakTableElementEdge(
+                pContext->iCurBulkRootConditionalWeakTableElementEdge,
+                pContext->cGCBulkRootConditionalWeakTableElementEdges,
+                GetClrInstanceId(),
+                sizeof(pContext->rgGCBulkRootConditionalWeakTableElementEdges[0]),
+                &pContext->rgGCBulkRootConditionalWeakTableElementEdges[0]);
+
+            pContext->iCurBulkRootConditionalWeakTableElementEdge++;
+            pContext->ClearRootConditionalWeakTableElementEdges();
+        }
+    }
+    else
+    {
+        _ASSERTE(pContext->cGcBulkRootEdges < _countof(pContext->rgGcBulkRootEdges));
+        EventStructGCBulkRootEdgeValue * pBulkRootEdgeValue = &pContext->rgGcBulkRootEdges[pContext->cGcBulkRootEdges];
+        pBulkRootEdgeValue->RootedNodeAddress = pRootedNode;
+        pBulkRootEdgeValue->GCRootKind = nRootKind;
+        pBulkRootEdgeValue->GCRootFlag = rootFlags;
+        pBulkRootEdgeValue->GCRootID = pvRootID;
+        pContext->cGcBulkRootEdges++;
+
+        // If root edge buffer is now full, empty it into ETW
+        if (pContext->cGcBulkRootEdges == _countof(pContext->rgGcBulkRootEdges))
+        {
+            FireEtwGCBulkRootEdge(
+                pContext->iCurBulkRootEdge,
+                pContext->cGcBulkRootEdges,
+                GetClrInstanceId(),
+                sizeof(pContext->rgGcBulkRootEdges[0]),
+                &pContext->rgGcBulkRootEdges[0]);
+
+            pContext->iCurBulkRootEdge++;
+            pContext->ClearRootEdges();
+        }
+    }
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+}
+
+
+//---------------------------------------------------------------------------------------
+//
+// Called during a heap walk for each object reference encountered.  Batches up the
+// corresponding node, edges, and type data for the ETW events.
+//
+// Arguments:
+//      * profilerWalkHeapContext - The shared profapi/etw context built up during the heap walk.
+//      * pObjReferenceSource - Object doing the pointing
+//      * typeID - Type of pObjReferenceSource
+//      * fDependentHandle - nonzero iff this is for a dependent handle
+//      * cRefs - Count of objects being pointed to
+//      * rgObjReferenceTargets - Array of objects being pointed to
+//
+
+// static
+void ETW::GCLog::ObjectReference(
+    ProfilerWalkHeapContext * profilerWalkHeapContext,
+    Object * pObjReferenceSource,
+    ULONGLONG typeID,
+    ULONGLONG cRefs,
+    Object ** rgObjReferenceTargets)
+{
+#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+
+        // LogTypeAndParametersIfNecessary can take a lock
+        CAN_TAKE_LOCK;
+    }
+    CONTRACTL_END;
+
+    EtwGcHeapDumpContext * pContext =
+        EtwGcHeapDumpContext::GetOrCreateInGCContext(&profilerWalkHeapContext->pvEtwContext);
+    if (pContext == NULL)
+        return;
+
+    //---------------------------------------------------------------------------------------
+    //    GCBulkNode events
+    //---------------------------------------------------------------------------------------
+
+    // Add Node (pObjReferenceSource) to buffer
+    _ASSERTE(pContext->cGcBulkNodeValues < _countof(pContext->rgGcBulkNodeValues));
+    EventStructGCBulkNodeValue * pBulkNodeValue = &pContext->rgGcBulkNodeValues[pContext->cGcBulkNodeValues];
+    pBulkNodeValue->Address = pObjReferenceSource;
+    pBulkNodeValue->Size = pObjReferenceSource->GetSize();
+    pBulkNodeValue->TypeID = typeID;
+    pBulkNodeValue->EdgeCount = cRefs;
+    pContext->cGcBulkNodeValues++;
+
+    // If Node buffer is now full, empty it into ETW
+    if (pContext->cGcBulkNodeValues == _countof(pContext->rgGcBulkNodeValues))
+    {
+        FireEtwGCBulkNode(
+            pContext->iCurBulkNodeEvent,
+            pContext->cGcBulkNodeValues,
+            GetClrInstanceId(),
+            sizeof(pContext->rgGcBulkNodeValues[0]),
+            &pContext->rgGcBulkNodeValues[0]);
+
+        pContext->iCurBulkNodeEvent++;
+        pContext->ClearNodes();
+    }
+
+    //---------------------------------------------------------------------------------------
+    //    BulkType events
+    //---------------------------------------------------------------------------------------
+
+    // We send type information as necessary--only for nodes, and only for nodes that we
+    // haven't already sent type info for
+    if (typeID != 0)
+    {
+        ETW::TypeSystemLog::LogTypeAndParametersIfNecessary(
+            &pContext->bulkTypeEventLogger,     // Batch up this type with others to minimize events
+            typeID, 
+            
+            // During heap walk, GC holds the lock for us, so we can directly enter the
+            // hash to see if the type has already been logged
+            ETW::TypeSystemLog::kTypeLogBehaviorAssumeLockAndLogIfFirstTime
+            );
+    }
+
+    //---------------------------------------------------------------------------------------
+    //    GCBulkEdge events
+    //---------------------------------------------------------------------------------------
+
+    // Add Edges (rgObjReferenceTargets) to buffer. Buffer could fill up before all edges
+    // are added (it could even fill up multiple times during this one call if there are
+    // a lot of edges), so empty Edge buffer into ETW as we go along, as many times as we
+    // need.
+
+    for (ULONGLONG i=0; i < cRefs; i++)
+    {
+        _ASSERTE(pContext->cGcBulkEdgeValues < _countof(pContext->rgGcBulkEdgeValues));
+        EventStructGCBulkEdgeValue * pBulkEdgeValue = &pContext->rgGcBulkEdgeValues[pContext->cGcBulkEdgeValues];
+        pBulkEdgeValue->Value = rgObjReferenceTargets[i];
+        // FUTURE: ReferencingFieldID 
+        pBulkEdgeValue->ReferencingFieldID = 0;
+        pContext->cGcBulkEdgeValues++;
+
+        // If Edge buffer is now full, empty it into ETW
+        if (pContext->cGcBulkEdgeValues == _countof(pContext->rgGcBulkEdgeValues))
+        {
+            FireEtwGCBulkEdge(
+                pContext->iCurBulkEdgeEvent,
+                pContext->cGcBulkEdgeValues,
+                GetClrInstanceId(),
+                sizeof(pContext->rgGcBulkEdgeValues[0]),
+                &pContext->rgGcBulkEdgeValues[0]);
+
+            pContext->iCurBulkEdgeEvent++;
+            pContext->ClearEdges();
+        }
+    }
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Called by GC at end of heap dump to give us a convenient time to flush any remaining
+// buffers of data to ETW
+//
+// Arguments:
+//      profilerWalkHeapContext - Context containing data we've batched up
+//
+
+// static
+void ETW::GCLog::EndHeapDump(ProfilerWalkHeapContext * profilerWalkHeapContext)
+{
+#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT
+    LIMITED_METHOD_CONTRACT;
+
+    // If context isn't already set up for us, then we haven't been collecting any data
+    // for ETW events.
+    EtwGcHeapDumpContext * pContext = (EtwGcHeapDumpContext *) profilerWalkHeapContext->pvEtwContext;
+    if (pContext == NULL)
+        return;
+
+    // If the GC events are enabled, flush any remaining root, node, and / or edge data
+    if (ETW_TRACING_CATEGORY_ENABLED(
+        MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_GCHEAPDUMP_KEYWORD))
+    {
+        if (pContext->cGcBulkRootEdges > 0)
+        {
+            FireEtwGCBulkRootEdge(
+                pContext->iCurBulkRootEdge,
+                pContext->cGcBulkRootEdges,
+                GetClrInstanceId(),
+                sizeof(pContext->rgGcBulkRootEdges[0]),
+                &pContext->rgGcBulkRootEdges[0]);
+        }
+
+        if (pContext->cGCBulkRootConditionalWeakTableElementEdges > 0)
+        {
+            FireEtwGCBulkRootConditionalWeakTableElementEdge(
+                pContext->iCurBulkRootConditionalWeakTableElementEdge,
+                pContext->cGCBulkRootConditionalWeakTableElementEdges,
+                GetClrInstanceId(),
+                sizeof(pContext->rgGCBulkRootConditionalWeakTableElementEdges[0]),
+                &pContext->rgGCBulkRootConditionalWeakTableElementEdges[0]);
+        }
+
+        if (pContext->cGcBulkNodeValues > 0)
+        {
+            FireEtwGCBulkNode(
+                pContext->iCurBulkNodeEvent,
+                pContext->cGcBulkNodeValues,
+                GetClrInstanceId(),
+                sizeof(pContext->rgGcBulkNodeValues[0]),
+                &pContext->rgGcBulkNodeValues[0]);
+        }
+
+        if (pContext->cGcBulkEdgeValues > 0)
+        {
+            FireEtwGCBulkEdge(
+                pContext->iCurBulkEdgeEvent,
+                pContext->cGcBulkEdgeValues,
+                GetClrInstanceId(),
+                sizeof(pContext->rgGcBulkEdgeValues[0]),
+                &pContext->rgGcBulkEdgeValues[0]);
+        }
+    }
+
+    // Ditto for type events
+    if (ETW_TRACING_CATEGORY_ENABLED(
+        MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_TYPE_KEYWORD))
+    {
+        pContext->bulkTypeEventLogger.FireBulkTypeEvent();
+        pContext->bulkTypeEventLogger.Cleanup();
+    }
+
+    // Delete any GC state built up in the context
+    profilerWalkHeapContext->pvEtwContext = NULL;
+    delete pContext;
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+}
+
+
+#ifndef FEATURE_REDHAWK
+
+//---------------------------------------------------------------------------------------
+//
+// Helper to send public finalize object & type events, and private finalize object
+// event.  If Type events are enabled, this will send the Type event for the finalized
+// objects.  It will not be batched with other types (except type parameters, if any),
+// and will not check if the Type has already been logged (may thus result in dupe
+// logging of the Type).
+//
+// Arguments:
+//      pMT - MT of object getting finalized
+//      pObj - object getting finalized
+//
+
+// static
+void ETW::GCLog::SendFinalizeObjectEvent(MethodTable * pMT, Object * pObj)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+
+        // LogTypeAndParameters locks, and we take our own lock if typeLogBehavior says to
+        CAN_TAKE_LOCK;
+    }
+    CONTRACTL_END;
+
+    // Send public finalize object event, if it's enabled
+    if (ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, FinalizeObject))
+    {
+        FireEtwFinalizeObject(pMT, pObj, GetClrInstanceId());
+        
+        // This function checks if type events are enabled; if so, it sends event for
+        // finalized object's type (and parameter types, if any)
+        ETW::TypeSystemLog::LogTypeAndParametersIfNecessary(
+            NULL,       // Not batching this type with others
+            (TADDR) pMT,
+            
+            // Don't spend the time entering the lock and checking the hash table to see
+            // if we've already logged the type; just log it (if type events are enabled).
+            ETW::TypeSystemLog::kTypeLogBehaviorAlwaysLog
+            );
+    }
+
+    // Send private finalize object event, if it's enabled
+    if (ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, PrvFinalizeObject))
+    {
+        EX_TRY
+        {
+            DefineFullyQualifiedNameForClassWOnStack();
+            FireEtwPrvFinalizeObject(pMT, pObj, GetClrInstanceId(), GetFullyQualifiedNameForClassNestedAwareW(pMT));
+        }
+        EX_CATCH
+        {
+        }
+        EX_END_CATCH(RethrowCorruptingExceptions);
+    }
+}
+
+DWORD ETW::ThreadLog::GetEtwThreadFlags(Thread * pThread)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    DWORD dwEtwThreadFlags = 0;
+
+    if (pThread->IsThreadPoolThread())
+    {
+        dwEtwThreadFlags |= kEtwThreadFlagThreadPoolWorker;
+    }
+    if (pThread->IsGCSpecial())
+    {
+        dwEtwThreadFlags |= kEtwThreadFlagGCSpecial;
+    }
+    if (IsGarbageCollectorFullyInitialized() &&
+        (pThread == GCHeapUtilities::GetGCHeap()->GetFinalizerThread()))
+    {
+        dwEtwThreadFlags |= kEtwThreadFlagFinalizer;
+    }
+
+    return dwEtwThreadFlags;
+}
+
+void ETW::ThreadLog::FireThreadCreated(Thread * pThread)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    FireEtwThreadCreated(
+        (ULONGLONG)pThread, 
+        (ULONGLONG)pThread->GetDomain(), 
+        GetEtwThreadFlags(pThread),
+        pThread->GetThreadId(),
+        pThread->GetOSThreadId(), 
+        GetClrInstanceId());
+}
+
+void ETW::ThreadLog::FireThreadDC(Thread * pThread)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    FireEtwThreadDC(
+        (ULONGLONG)pThread, 
+        (ULONGLONG)pThread->GetDomain(), 
+        GetEtwThreadFlags(pThread),
+        pThread->GetThreadId(),
+        pThread->GetOSThreadId(), 
+        GetClrInstanceId());
+}
+
+
+
+// TypeSystemLog implementation
+// 
+// We keep track of which TypeHandles have been logged, and stats on instances of these
+// TypeHandles that have been allocated, by a hash table of hash tables. The outer hash
+// table maps Module*'s to an inner hash table that contains all the TypeLoggingInfos for that
+// Module*. Arranging things this way makes it easy to deal with Module unloads, as we
+// can simply remove the corresponding inner hash table from the outer hash table.
+
+// The following help define the "inner" hash table: a hash table of TypeLoggingInfos
+// from a particular Module (key = TypeHandle, value = TypeLoggingInfo.
+
+class LoggedTypesFromModuleTraits : public NoRemoveSHashTraits< DefaultSHashTraits<ETW::TypeLoggingInfo> >
+{
+public:
+
+    // explicitly declare local typedefs for these traits types, otherwise 
+    // the compiler may get confused
+    typedef NoRemoveSHashTraits< DefaultSHashTraits<ETW::TypeLoggingInfo> > PARENT;
+    typedef PARENT::element_t element_t;
+    typedef PARENT::count_t count_t;
+
+    typedef TypeHandle key_t;
+
+    static key_t GetKey(const element_t &e)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return e.th;
+    }
+
+    static BOOL Equals(key_t k1, key_t k2)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (k1 == k2);
+    }
+
+    static count_t Hash(key_t k)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (count_t) k.AsTAddr();
+    }
+
+    static bool IsNull(const element_t &e)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (e.th.AsTAddr() == NULL);
+    }
+
+    static const element_t Null()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return ETW::TypeLoggingInfo(NULL);
+    }
+};
+typedef SHash<LoggedTypesFromModuleTraits> LoggedTypesFromModuleHash;
+
+// The inner hash table is housed inside this class, which acts as an entry in the outer
+// hash table.
+class ETW::LoggedTypesFromModule
+{
+public:
+    Module * pModule;
+    LoggedTypesFromModuleHash loggedTypesFromModuleHash;
+
+    // These are used by the outer hash table (mapping Module*'s to instances of
+    // LoggedTypesFromModule).
+    static COUNT_T Hash(Module * pModule) 
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (COUNT_T) (SIZE_T) pModule; 
+    }
+    Module * GetKey() 
+    {
+        LIMITED_METHOD_CONTRACT;
+        return pModule; 
+    }
+
+    LoggedTypesFromModule(Module * pModuleParam) : loggedTypesFromModuleHash()
+    {
+        LIMITED_METHOD_CONTRACT;
+        pModule = pModuleParam;
+    }
+
+    ~LoggedTypesFromModule()
+    {
+        LIMITED_METHOD_CONTRACT;
+    }
+};
+
+// The following define the outer hash table (mapping Module*'s to instances of
+// LoggedTypesFromModule).
+
+class AllLoggedTypesTraits : public DefaultSHashTraits<ETW::LoggedTypesFromModule *>
+{
+public:
+
+    // explicitly declare local typedefs for these traits types, otherwise 
+    // the compiler may get confused
+    typedef DefaultSHashTraits<ETW::LoggedTypesFromModule *> PARENT;
+    typedef PARENT::element_t element_t;
+    typedef PARENT::count_t count_t;
+
+    typedef Module * key_t;
+
+    static key_t GetKey(const element_t &e)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return e->pModule;
+    }
+
+    static BOOL Equals(key_t k1, key_t k2)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (k1 == k2);
+    }
+
+    static count_t Hash(key_t k)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (count_t) (size_t) k;
+    }
+
+    static bool IsNull(const element_t &e)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (e == NULL);
+    }
+
+    static const element_t Null()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return NULL; 
+    }
+};
+
+typedef SHash<AllLoggedTypesTraits> AllLoggedTypesHash;
+
+// The outer hash table (mapping Module*'s to instances of LoggedTypesFromModule) is
+// housed in this struct, which is dynamically allocated the first time we decide we need
+// it.
+struct AllLoggedTypes
+{
+public:
+    // This Crst protects the entire outer & inner hash tables.  On a GC heap walk, it
+    // is entered once for the duration of the walk, so that we can freely access the
+    // hash tables during the walk.  On each object allocation, this Crst must be
+    // entered individually each time.
+    static CrstStatic s_cs;
+    
+    // The outer hash table (mapping Module*'s to instances of LoggedTypesFromModule)
+    AllLoggedTypesHash allLoggedTypesHash;
+};
+
+
+CrstStatic AllLoggedTypes::s_cs;
+AllLoggedTypes * ETW::TypeSystemLog::s_pAllLoggedTypes = NULL;
+BOOL ETW::TypeSystemLog::s_fHeapAllocEventEnabledOnStartup = FALSE;
+BOOL ETW::TypeSystemLog::s_fHeapAllocHighEventEnabledNow = FALSE;
+BOOL ETW::TypeSystemLog::s_fHeapAllocLowEventEnabledNow = FALSE;
+int ETW::TypeSystemLog::s_nCustomMsBetweenEvents = 0;
+
+
+//---------------------------------------------------------------------------------------
+//
+// Initializes TypeSystemLog (specifically its crst).  Called just before ETW providers
+// are registered with the OS
+//
+// Return Value:
+//     HRESULT indicating success or failure
+//
+
+// static
+HRESULT ETW::TypeSystemLog::PreRegistrationInit()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (!AllLoggedTypes::s_cs.InitNoThrow(
+        CrstEtwTypeLogHash, 
+        CRST_UNSAFE_ANYMODE))       // This lock is taken during a GC while walking the heap
+    {
+        return E_FAIL;
+    }
+
+    return S_OK;
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Initializes TypeSystemLog (specifically its crst).  Called just after ETW providers
+// are registered with the OS
+//
+// Return Value:
+//     HRESULT indicating success or failure
+//
+
+// static
+void ETW::TypeSystemLog::PostRegistrationInit()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // Initialize our "current state" BOOLs that remember if low or high allocation
+    // sampling is turned on
+    BOOL s_fHeapAllocLowEventEnabledNow = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, CLR_GCHEAPALLOCLOW_KEYWORD);
+    BOOL s_fHeapAllocHighEventEnabledNow = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, CLR_GCHEAPALLOCHIGH_KEYWORD);
+
+    // Snapshot the current state of the object allocated keyword (on startup), and rely
+    // on this snapshot for the rest of the process run. Since these events require the
+    // slow alloc JIT helper to be enabled, and that can only be done on startup, we
+    // remember in this BOOL that we did so, so that we can prevent the object allocated
+    // event from being fired if the fast allocation helper were enabled but had to
+    // degrade down to the slow helper (e.g., thread ran over its allocation limit). This
+    // keeps things consistent.
+    s_fHeapAllocEventEnabledOnStartup = (s_fHeapAllocLowEventEnabledNow || s_fHeapAllocHighEventEnabledNow);
+
+    if (s_fHeapAllocEventEnabledOnStartup)
+    {
+        // Determine if a COMPLUS env var is overriding the frequency for the sampled
+        // object allocated events
+        
+        // Config value intentionally typed as string, b/c DWORD intepretation is hard-coded
+        // to hex, which is not what the user would expect.  This way I can force the
+        // conversion to use decimal.
+        NewArrayHolder<WCHAR> wszCustomObjectAllocationEventsPerTypePerSec(NULL);
+        if (FAILED(CLRConfig::GetConfigValue(
+            CLRConfig::UNSUPPORTED_ETW_ObjectAllocationEventsPerTypePerSec, 
+            &wszCustomObjectAllocationEventsPerTypePerSec)) ||
+            (wszCustomObjectAllocationEventsPerTypePerSec == NULL))
+        {
+            return;
+        }
+        LPWSTR endPtr;
+        DWORD dwCustomObjectAllocationEventsPerTypePerSec = wcstoul(
+            wszCustomObjectAllocationEventsPerTypePerSec,
+            &endPtr,
+            10          // Base 10 conversion
+            );
+
+        if (dwCustomObjectAllocationEventsPerTypePerSec == ULONG_MAX)
+            dwCustomObjectAllocationEventsPerTypePerSec = 0;
+        if (dwCustomObjectAllocationEventsPerTypePerSec != 0)
+        {
+            // MsBetweenEvents = (1000 ms/sec) / (custom desired events/sec)
+            s_nCustomMsBetweenEvents = 1000 / dwCustomObjectAllocationEventsPerTypePerSec;
+        }
+    }
+}
+
+
+//---------------------------------------------------------------------------------------
+//
+// Update object allocation sampling frequency and / or Type hash table contents based
+// on what keywords were changed.
+//
+
+// static
+void ETW::TypeSystemLog::OnKeywordsChanged()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // If the desired frequencey for the GCSampledObjectAllocation events has changed,
+    // update our state.
+    s_fHeapAllocLowEventEnabledNow = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, CLR_GCHEAPALLOCLOW_KEYWORD);
+    s_fHeapAllocHighEventEnabledNow = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, CLR_GCHEAPALLOCHIGH_KEYWORD);
+
+    // FUTURE: Would be nice here to log an error event if (s_fHeapAllocLowEventEnabledNow ||
+    // s_fHeapAllocHighEventEnabledNow), but !s_fHeapAllocEventEnabledOnStartup
+
+    // If the type events should be turned off, eliminate the hash tables that tracked
+    // which types were logged. (If type events are turned back on later, we'll re-log
+    // them all as we encounter them.) Note that all we can really test for is that the
+    // Types keyword on the runtime provider is off. Not necessarily that it was on and
+    // was just turned off with this request. But either way, TypeSystemLog can handle it
+    // because it is extremely smart.
+    if (!ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, CLR_TYPE_KEYWORD))
+        OnTypesKeywordTurnedOff();
+}
+
+
+//---------------------------------------------------------------------------------------
+//
+// Based on keywords alone, determine the what the default sampling rate should be for
+// object allocation events.  (This function does not consider any COMPLUS overrides for
+// the sampling rate.)
+//
+
+// static
+int ETW::TypeSystemLog::GetDefaultMsBetweenEvents()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // We should only get here if the allocation event is enabled. In spirit, this assert
+    // is correct, but a race could cause the assert to fire (if someone toggled the
+    // event off after we decided that the event was on and we started down the path of
+    // calculating statistics to fire the event). In such a case we'll end up returning
+    // k_nDefaultMsBetweenEventsLow below, but next time we won't get here as we'll know
+    // early enough not to fire the event.
+    //_ASSERTE(IsHeapAllocEventEnabled());
+
+    // MsBetweenEvents = (1000 ms/sec) / (desired events/sec)
+    const int k_nDefaultMsBetweenEventsHigh = 1000 / 100;   // 100 events per type per sec
+    const int k_nDefaultMsBetweenEventsLow = 1000 / 5;      // 5 events per type per sec
+
+    // If both are set, High takes precedence
+    if (s_fHeapAllocHighEventEnabledNow)
+    {
+        return k_nDefaultMsBetweenEventsHigh;
+    }
+    return k_nDefaultMsBetweenEventsLow;
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Use this to decide whether to fire the object allocation event
+//
+// Return Value:
+//      nonzero iff we should fire the event.
+//
+
+// static
+BOOL ETW::TypeSystemLog::IsHeapAllocEventEnabled()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    return 
+        // Only fire the event if it was enabled at startup (and thus the slow-JIT new
+        // helper is used in all cases)
+        s_fHeapAllocEventEnabledOnStartup &&
+
+        // AND a keyword is still enabled.  (Thus people can turn off the event
+        // whenever they want; but they cannot turn it on unless it was also on at startup.)
+        (s_fHeapAllocHighEventEnabledNow || s_fHeapAllocLowEventEnabledNow);
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Helper that adds (or updates) the TypeLoggingInfo inside the inner hash table passed
+// in.
+//
+// Arguments:
+//      * pLoggedTypesFromModule - Inner hash table to update
+//      * pTypeLoggingInfo - TypeLoggingInfo to store
+//
+// Return Value:
+//      nonzero iff the add/replace was successful.
+//
+// Assumptions:
+//     Caller must be holding the hash crst
+//
+
+// static
+BOOL ETW::TypeSystemLog::AddOrReplaceTypeLoggingInfo(ETW::LoggedTypesFromModule * pLoggedTypesFromModule, const ETW::TypeLoggingInfo * pTypeLoggingInfo)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    _ASSERTE(GetHashCrst()->OwnedByCurrentThread());
+    _ASSERTE(pLoggedTypesFromModule != NULL);
+
+    BOOL fSucceeded = FALSE;
+    EX_TRY
+    {
+        pLoggedTypesFromModule->loggedTypesFromModuleHash.AddOrReplace(*pTypeLoggingInfo);
+        fSucceeded = TRUE;
+    }
+    EX_CATCH
+    {
+        fSucceeded = FALSE;
+    }
+    EX_END_CATCH(RethrowCorruptingExceptions);
+
+    return fSucceeded;
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Records stats about the object's allocation, and determines based on those stats whether
+// to fires the high / low frequency GCSampledObjectAllocation ETW event
+//
+// Arguments:
+//      * pObject - Allocated object to log
+//      * th - TypeHandle for the object
+//
+
+// static
+void ETW::TypeSystemLog::SendObjectAllocatedEvent(Object * pObject)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+
+    // No-op if the appropriate keywords were not enabled on startup (or we're not yet
+    // started up)
+    if (!s_fHeapAllocEventEnabledOnStartup || !g_fEEStarted)
+        return;
+
+    TypeHandle th = pObject->GetTypeHandle();
+
+    SIZE_T size = pObject->GetSize();
+    if(size < MIN_OBJECT_SIZE)
+    {
+        size = PtrAlign(size);
+    }
+
+    SIZE_T nTotalSizeForTypeSample = size;
+    DWORD dwTickNow = GetTickCount();
+    DWORD dwObjectCountForTypeSample = 0;
+
+    // BLOCK: Hold the crst around the type stats hash table while we read and update
+    // the type's stats
+    {
+        CrstHolder _crst(GetHashCrst());
+
+        // Get stats for type
+        TypeLoggingInfo typeLoggingInfo(NULL);
+        LoggedTypesFromModule * pLoggedTypesFromModule = NULL;
+        BOOL fCreatedNew = FALSE;
+        typeLoggingInfo = LookupOrCreateTypeLoggingInfo(th, &fCreatedNew, &pLoggedTypesFromModule);
+        if (typeLoggingInfo.th.IsNull())
+            return;
+
+        // Update stats with current allocation
+        typeLoggingInfo.dwAllocsSkippedForSample++;
+        typeLoggingInfo.cbIgnoredSizeForSample += size;
+
+        // This is our filter. If we should ignore this alloc, then record our updated
+        // our stats, and bail without sending the event. Note that we always log objects
+        // over 10K in size.
+        if (size < 10000 && typeLoggingInfo.dwAllocsSkippedForSample < typeLoggingInfo.dwAllocsToSkipPerSample)
+        {
+            // Update hash table's copy of type logging info with these values.  Sucks that
+            // we're doing another hash table lookup here.  Could instead have used LookupPtr()
+            // if it gave us back a non-const pointer, and then we could have updated in-place
+            AddOrReplaceTypeLoggingInfo(pLoggedTypesFromModule, &typeLoggingInfo);
+            if (fCreatedNew)
+            {
+                // Although we're skipping logging the allocation, we still need to log
+                // the type (so it's available for resolving future allocation events to
+                // their types).
+                // 
+                // (See other call to LogTypeAndParametersIfNecessary further down for
+                // more comments.)
+                LogTypeAndParametersIfNecessary(
+                    NULL,                           
+                    th.AsTAddr(), 
+                    kTypeLogBehaviorAssumeLockAndAlwaysLogTopLevelType);
+            }
+            return;
+        }
+
+        // Based on observed allocation stats, adjust our sampling rate for this type
+
+        typeLoggingInfo.dwAllocCountInCurrentBucket += typeLoggingInfo.dwAllocsSkippedForSample;
+        int delta = (dwTickNow - typeLoggingInfo.dwTickOfCurrentTimeBucket) & 0x7FFFFFFF;	// make wrap around work.  
+
+        int nMinAllocPerMSec = typeLoggingInfo.dwAllocCountInCurrentBucket / 16;		// This is an underestimation of the true rate.  
+        if (delta >= 16 || (nMinAllocPerMSec > 2 && nMinAllocPerMSec > typeLoggingInfo.flAllocPerMSec * 1.5F))
+        {
+            float flNewAllocPerMSec  = 0;
+            if (delta >= 16)
+            {
+                // This is the normal case, our allocation rate is under control with the current throttling.   
+                flNewAllocPerMSec  = ((float) typeLoggingInfo.dwAllocCountInCurrentBucket) / delta;
+                // Do a exponential decay window that is 5 * max(16, AllocationInterval)  
+                typeLoggingInfo.flAllocPerMSec = 0.8F *  typeLoggingInfo.flAllocPerMSec + 0.2F * flNewAllocPerMSec; 
+                typeLoggingInfo.dwTickOfCurrentTimeBucket = dwTickNow;
+                typeLoggingInfo.dwAllocCountInCurrentBucket = 0;
+            }
+            else 
+            {
+                flNewAllocPerMSec = (float) nMinAllocPerMSec;
+                // This means the second clause above is true, which means our sampling rate is too low
+                // so we need to throttle quickly. 
+                typeLoggingInfo.flAllocPerMSec = flNewAllocPerMSec;
+            }
+
+
+            // Obey the desired sampling rate, but don't ignore > 1000 allocations per second
+            // per type
+            int nDesiredMsBetweenEvents = (s_nCustomMsBetweenEvents == 0) ? GetDefaultMsBetweenEvents() : s_nCustomMsBetweenEvents;
+            typeLoggingInfo.dwAllocsToSkipPerSample = min((int) (typeLoggingInfo.flAllocPerMSec * nDesiredMsBetweenEvents), 1000);
+            if (typeLoggingInfo.dwAllocsToSkipPerSample == 1)
+                typeLoggingInfo.dwAllocsToSkipPerSample = 0;
+        }
+
+        // We're logging this sample, so save the values we need into locals, and reset
+        // our counts for the next sample.
+        nTotalSizeForTypeSample = typeLoggingInfo.cbIgnoredSizeForSample;
+        dwObjectCountForTypeSample = typeLoggingInfo.dwAllocsSkippedForSample;
+        typeLoggingInfo.cbIgnoredSizeForSample = 0;
+        typeLoggingInfo.dwAllocsSkippedForSample = 0;
+
+        // Save updated stats into hash table
+        if (!AddOrReplaceTypeLoggingInfo(pLoggedTypesFromModule, &typeLoggingInfo))
+        {
+            return;
+        }
+
+        // While we're still holding the crst, optionally log any relevant Types now (we may need
+        // to reconsult the hash in here if there are any type parameters, though we can
+        // optimize and NOT consult the hash for th itself).
+        if (fCreatedNew)
+        {
+            // We were the ones to add the Type to the hash.  So it wasn't there before,
+            // which means it hasn't been logged yet.
+            LogTypeAndParametersIfNecessary(
+
+                // No BulkTypeEventLogger, as we're not batching during a GC heap walk
+                NULL,                           
+                
+                th.AsTAddr(), 
+
+                // We've determined the type is not yet logged, so no need to check
+                kTypeLogBehaviorAssumeLockAndAlwaysLogTopLevelType);
+        }
+    }       // RELEASE: CrstHolder _crst(GetHashCrst());
+
+    // Now log the allocation
+    if (s_fHeapAllocHighEventEnabledNow)
+    {
+        FireEtwGCSampledObjectAllocationHigh(pObject, (LPVOID) th.AsTAddr(), dwObjectCountForTypeSample, nTotalSizeForTypeSample, GetClrInstanceId());
+    }
+    else
+    {
+        FireEtwGCSampledObjectAllocationLow(pObject, (LPVOID) th.AsTAddr(), dwObjectCountForTypeSample, nTotalSizeForTypeSample, GetClrInstanceId());
+    }
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Accessor for hash table crst
+//
+// Return Value:
+//      hash table crst
+//
+
+// static
+CrstBase * ETW::TypeSystemLog::GetHashCrst()
+{
+    LIMITED_METHOD_CONTRACT;
+    return &AllLoggedTypes::s_cs;
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Outermost level of ETW-type-logging.  Clients outside eventtrace.cpp call this to log
+// a TypeHandle and (recursively) its type parameters when present.  This guy then calls
+// into the appropriate BulkTypeEventLogger to do the batching and logging
+//
+// Arguments:
+//      * pBulkTypeEventLogger - If our caller is keeping track of batched types, it
+//          passes this to us so we can use it to batch the current type (GC heap walk
+//          does this).  If this is NULL, no batching is going on (e.g., we're called on
+//          object allocation, not a GC heal walk), in which case we create our own
+//          temporary BulkTypeEventLogger.
+//      * thAsAddr - TypeHandle to batch
+//      * typeLogBehavior - Optimization to tell us we don't need to enter the
+//          TypeSystemLog's crst, as the TypeSystemLog's hash table is already protected
+//          by a prior acquisition of the crst by our caller.  (Or that we don't even
+//          need to check the hash in the first place.)
+//
+
+// static
+void ETW::TypeSystemLog::LogTypeAndParametersIfNecessary(BulkTypeEventLogger * pLogger, ULONGLONG thAsAddr, TypeLogBehavior typeLogBehavior)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+
+        // LogTypeAndParameters locks, and we take our own lock if typeLogBehavior says to
+        CAN_TAKE_LOCK;
+    }
+    CONTRACTL_END;
+
+    if (!ETW_TRACING_CATEGORY_ENABLED(
+        MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_TYPE_KEYWORD))
+    {
+        return;
+    }
+
+    TypeHandle th = TypeHandle::FromTAddr((TADDR) thAsAddr);
+    if (!th.IsRestored())
+    {
+        return;
+    }
+
+    // Check to see if we've already logged this type.  If so, bail immediately. 
+    // Otherwise, mark that it's getting logged (by adding it to the hash), and fall
+    // through to the logging code below.  If caller doesn't care, then don't even
+    // check; just log the type
+    BOOL fShouldLogType = ((typeLogBehavior == kTypeLogBehaviorAlwaysLog) || 
+                           (typeLogBehavior == kTypeLogBehaviorAssumeLockAndAlwaysLogTopLevelType)) ?
+                           TRUE :
+                           ((typeLogBehavior == kTypeLogBehaviorTakeLockAndLogIfFirstTime) ? 
+                               ShouldLogType(th) :
+                               ShouldLogTypeNoLock(th));
+    if (!fShouldLogType)
+        return;
+
+    if (pLogger == NULL)
+    {
+        // We're not batching this type against previous types (e.g., we're being called
+        // on object allocate instead of a GC heap walk).  So create a temporary logger
+        // on the stack.  If there are generic parameters that need to be logged, then
+        // at least they'll get batched together with the type
+        BulkTypeEventLogger logger;
+        logger.LogTypeAndParameters(thAsAddr, typeLogBehavior);
+
+        // Since this logger isn't being used to batch anything else, flush what we have
+        logger.FireBulkTypeEvent();
+    }
+    else
+    {
+        // We are batching this type with others (e.g., we're being called at the end of
+        // a GC on a heap walk).  So use the logger our caller set up for us.
+        pLogger->LogTypeAndParameters(thAsAddr, typeLogBehavior);
+    }
+}
+
+
+//---------------------------------------------------------------------------------------
+//
+// Same as code:ETW::TypeSystemLog::ShouldLogTypeNoLock but acquires the lock first.
+
+// static
+BOOL ETW::TypeSystemLog::ShouldLogType(TypeHandle th)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        CAN_TAKE_LOCK;
+    }
+    CONTRACTL_END;
+
+    CrstHolder _crst(GetHashCrst());
+    return ShouldLogTypeNoLock(th);
+}
+
+
+//---------------------------------------------------------------------------------------
+//
+// Ask hash table if we've already logged the type, without first acquiring the lock
+// (our caller already did this).  As a side-effect, a TypeLoggingInfo will be created
+// for this type (so future calls to this function will return FALSE to avoid dupe type
+// logging).
+//
+// Arguments:
+//      pth - TypeHandle to query
+//
+// Return Value:
+//      nonzero iff type should be logged (i.e., not previously logged)
+//
+// Assumptions:
+//      Caller must own the hash table's crst
+//
+
+// static
+BOOL ETW::TypeSystemLog::ShouldLogTypeNoLock(TypeHandle th)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    _ASSERTE(GetHashCrst()->OwnedByCurrentThread());
+
+    // Check to see if TypeLoggingInfo exists yet for *pth.  If not, creates one and
+    // adds it to the hash.
+    BOOL fCreatedNew = FALSE;
+    LookupOrCreateTypeLoggingInfo(th, &fCreatedNew);
+    
+    // Return whether we had to create the TypeLoggingInfo (indicating it was not yet in
+    // the hash, and thus that we hadn't yet logged the type).
+    return fCreatedNew;
+}
+
+
+//---------------------------------------------------------------------------------------
+//
+// Helper that returns (creating if necessary) the TypeLoggingInfo in the hash table
+// corresponding with the specified TypeHandle
+//
+// Arguments:
+//      * th - Key to lookup the TypeLoggingInfo
+//      * pfCreatedNew - [out] Points to nonzero iff a new TypeLoggingInfo was created
+//          (i.e., none existed yet in the hash for th).
+//      * ppLoggedTypesFromModule - [out] Points to the inner hash that was used to do
+//          the lookup.  (An otpimization so the caller doesn't have to find this again,
+//          if it needs to do further operations on it.)
+//
+// Return Value:
+//      TypeLoggingInfo found or created.
+//
+// Assumptions:
+//      Hash crst must be held by caller
+//
+
+// static
+ETW::TypeLoggingInfo ETW::TypeSystemLog::LookupOrCreateTypeLoggingInfo(TypeHandle th, BOOL * pfCreatedNew, LoggedTypesFromModule ** ppLoggedTypesFromModule /* = NULL */)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    _ASSERTE(pfCreatedNew != NULL);
+    _ASSERTE(GetHashCrst()->OwnedByCurrentThread());
+
+    if (ppLoggedTypesFromModule != NULL)
+    {
+        *ppLoggedTypesFromModule = NULL;
+    }
+
+    BOOL fSucceeded = FALSE;
+
+    if (s_pAllLoggedTypes == NULL)
+    {
+        s_pAllLoggedTypes = new (nothrow) AllLoggedTypes;
+        if (s_pAllLoggedTypes == NULL)
+        {
+            // out of memory.  Bail on ETW stuff
+            *pfCreatedNew = FALSE;
+            return TypeLoggingInfo(NULL);
+        }
+    }
+
+    // Step 1: go from LoaderModule to hash of types.
+    
+    Module * pLoaderModule = th.GetLoaderModule();
+    _ASSERTE(pLoaderModule != NULL);
+    LoggedTypesFromModule * pLoggedTypesFromModule = s_pAllLoggedTypes->allLoggedTypesHash.Lookup(pLoaderModule);
+    if (pLoggedTypesFromModule == NULL)
+    {
+        pLoggedTypesFromModule = new (nothrow) LoggedTypesFromModule(pLoaderModule);
+        if (pLoggedTypesFromModule == NULL)
+        {
+            // out of memory.  Bail on ETW stuff
+            *pfCreatedNew = FALSE;
+            return TypeLoggingInfo(NULL);
+        }
+
+        fSucceeded = FALSE;
+        EX_TRY
+        {
+            s_pAllLoggedTypes->allLoggedTypesHash.Add(pLoggedTypesFromModule);
+            fSucceeded = TRUE;
+        }
+        EX_CATCH
+        {
+            fSucceeded = FALSE;
+        }
+        EX_END_CATCH(RethrowCorruptingExceptions);
+        if (!fSucceeded)
+        {
+            *pfCreatedNew = FALSE;
+            return TypeLoggingInfo(NULL);
+        }
+    }
+
+    if (ppLoggedTypesFromModule != NULL)
+    {
+        *ppLoggedTypesFromModule = pLoggedTypesFromModule;
+    }
+
+    // Step 2: From hash of types, see if our TypeHandle is there already
+    TypeLoggingInfo typeLoggingInfoPreexisting = pLoggedTypesFromModule->loggedTypesFromModuleHash.Lookup(th);
+    if (!typeLoggingInfoPreexisting.th.IsNull())
+    {
+        // Type is already hashed, so it's already logged, so we don't need to
+        // log it again.
+        *pfCreatedNew = FALSE;
+        return typeLoggingInfoPreexisting;
+    }
+
+    // We haven't logged this type, so we need to continue with this function to
+    // log it below. Add it to the hash table first so any recursive calls will
+    // see that this type is already being taken care of
+    fSucceeded = FALSE;
+    TypeLoggingInfo typeLoggingInfoNew(th);
+    EX_TRY
+    {
+        pLoggedTypesFromModule->loggedTypesFromModuleHash.Add(typeLoggingInfoNew);
+        fSucceeded = TRUE;
+    }
+    EX_CATCH
+    {
+        fSucceeded = FALSE;
+    }
+    EX_END_CATCH(RethrowCorruptingExceptions);
+    if (!fSucceeded)
+    {
+        *pfCreatedNew = FALSE;
+        return TypeLoggingInfo(NULL);
+    }
+
+    *pfCreatedNew = TRUE;
+    return typeLoggingInfoNew;
+}
+
+
+//---------------------------------------------------------------------------------------
+//
+// Called when we determine if a module was unloaded, so we can clear out that module's
+// set of types from our hash table
+//
+// Arguments:
+//      pModule - Module getting unloaded
+//
+
+// static
+void ETW::TypeSystemLog::OnModuleUnload(Module * pModule)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        CAN_TAKE_LOCK;
+    }
+    CONTRACTL_END;
+
+    if (!ETW_TRACING_CATEGORY_ENABLED(
+        MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_TYPE_KEYWORD))
+    {
+        return;
+    }
+
+    LoggedTypesFromModule * pLoggedTypesFromModule = NULL;
+
+    {
+        CrstHolder _crst(GetHashCrst());
+
+        if (s_pAllLoggedTypes == NULL)
+            return;
+
+        // Is there a TypesHash for this module?
+        pLoggedTypesFromModule = s_pAllLoggedTypes->allLoggedTypesHash.Lookup(pModule);
+        if (pLoggedTypesFromModule == NULL)
+            return;
+
+        // Remove TypesHash from master hash mapping modules to their TypesHash
+        s_pAllLoggedTypes->allLoggedTypesHash.Remove(pModule);
+    }
+
+    // Destruct this TypesHash we just removed
+    delete pLoggedTypesFromModule;
+    pLoggedTypesFromModule = NULL;
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Whenever we detect that the Types keyword is off, this gets called. This eliminates the
+// hash tables that tracked which types were logged (if the hash tables had been created
+// previously). If type events are turned back on later, we'll re-log them all as we
+// encounter them.
+//
+
+// static
+void ETW::TypeSystemLog::OnTypesKeywordTurnedOff()
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        CAN_TAKE_LOCK;
+    }
+    CONTRACTL_END;
+
+    CrstHolder _crst(GetHashCrst());
+
+    if (s_pAllLoggedTypes == NULL)
+        return;
+
+    // Destruct each of the per-module TypesHashes
+    AllLoggedTypesHash * pLoggedTypesHash = &s_pAllLoggedTypes->allLoggedTypesHash;
+    for (AllLoggedTypesHash::Iterator iter = pLoggedTypesHash->Begin();
+         iter != pLoggedTypesHash->End();
+         ++iter)
+    {
+        LoggedTypesFromModule * pLoggedTypesFromModule = *iter;
+        delete pLoggedTypesFromModule;
+    }
+
+    // This causes the default ~AllLoggedTypes() to be called, and thus
+    // ~AllLoggedTypesHash() to be called
+    delete s_pAllLoggedTypes;
+    s_pAllLoggedTypes = NULL;
+}
+
+
+/****************************************************************************/
+/* Called when ETW is turned ON on an existing process and ModuleRange events are to
+     be fired */
+/****************************************************************************/
+void ETW::EnumerationLog::ModuleRangeRundown()
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        if (ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, 
+                                         TRACE_LEVEL_INFORMATION, 
+                                         CLR_PERFTRACK_PRIVATE_KEYWORD))
+        {
+            ETW::EnumerationLog::EnumerationHelper(NULL, NULL, ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoadPrivate);
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/****************************************************************************/
+/* Called when ETW is turned ON on an existing process */
+/****************************************************************************/
+void ETW::EnumerationLog::StartRundown()
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        BOOL bIsArmRundownEnabled = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context,
+                                                                 TRACE_LEVEL_INFORMATION,
+                                                                 CLR_RUNDOWNAPPDOMAINRESOURCEMANAGEMENT_KEYWORD);
+        BOOL bIsPerfTrackRundownEnabled = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context,
+                                                                 TRACE_LEVEL_INFORMATION,
+                                                                 CLR_RUNDOWNPERFTRACK_KEYWORD);
+        BOOL bIsThreadingRundownEnabled = ETW_TRACING_CATEGORY_ENABLED(
+            MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context,
+            TRACE_LEVEL_INFORMATION,
+            CLR_RUNDOWNTHREADING_KEYWORD);
+    
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_RUNDOWNJIT_KEYWORD) 
+           ||
+           ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_RUNDOWNLOADER_KEYWORD) 
+           ||
+           IsRundownNgenKeywordEnabledAndNotSuppressed()
+           ||
+           ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_RUNDOWNJITTEDMETHODILTONATIVEMAP_KEYWORD)
+           ||
+           bIsArmRundownEnabled
+           ||
+           bIsPerfTrackRundownEnabled
+           ||
+           bIsThreadingRundownEnabled)
+        {
+            // begin marker event will go to the rundown provider
+            FireEtwDCStartInit_V1(GetClrInstanceId());
+
+            // The rundown flag is expected to be checked in the caller, so no need to check here again
+            DWORD enumerationOptions=ETW::EnumerationLog::EnumerationStructs::None;
+            if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                            TRACE_LEVEL_INFORMATION, 
+                                            CLR_RUNDOWNLOADER_KEYWORD))
+            {
+                enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart;
+            }
+            if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                            TRACE_LEVEL_INFORMATION, 
+                                            CLR_RUNDOWNJIT_KEYWORD))
+            {
+                enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::JitMethodDCStart;
+            }
+            if(IsRundownNgenKeywordEnabledAndNotSuppressed())
+            {
+                enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::NgenMethodDCStart;
+            }
+            if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                            TRACE_LEVEL_INFORMATION, 
+                                            CLR_RUNDOWNJITTEDMETHODILTONATIVEMAP_KEYWORD))
+            {
+                enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::MethodDCStartILToNativeMap;
+            }
+            if(bIsPerfTrackRundownEnabled)
+            {
+                enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCStart;
+            }
+
+            ETW::EnumerationLog::EnumerationHelper(NULL, NULL, enumerationOptions);
+            
+            if (bIsArmRundownEnabled)
+            {
+                // When an ETW event consumer asks for ARM rundown, that not only enables
+                // the ETW events, but also causes some minor behavioral changes in the
+                // CLR, such as gathering CPU usage baselines for each thread right now,
+                // and also gathering resource usage information later on (keyed off of
+                // g_fEnableARM, which we'll set right now).
+                EnableARM();
+            }
+
+            if (bIsArmRundownEnabled || bIsThreadingRundownEnabled)
+            {
+                SendThreadRundownEvent();
+            }
+
+            // end marker event will go to the rundown provider
+            FireEtwDCStartComplete_V1(GetClrInstanceId());
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Simple helper to convert the currently active keywords on the runtime provider into a
+// bitmask of enumeration options as defined in ETW::EnumerationLog::EnumerationStructs
+//
+// Return Value:
+//      ETW::EnumerationLog::EnumerationStructs bitmask corresponding to the currently
+//      active keywords on the runtime provider
+//
+
+// static
+DWORD ETW::EnumerationLog::GetEnumerationOptionsFromRuntimeKeywords()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    DWORD enumerationOptions=ETW::EnumerationLog::EnumerationStructs::None;
+    if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_LOADER_KEYWORD))
+    {
+        enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload;
+    }
+    if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_JIT_KEYWORD) &&
+        ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_ENDENUMERATION_KEYWORD))
+    {
+        enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::JitMethodUnload;
+    }
+    if(IsRuntimeNgenKeywordEnabledAndNotSuppressed() &&
+        ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_ENDENUMERATION_KEYWORD))
+    {
+        enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::NgenMethodUnload;
+    }
+
+    return enumerationOptions;
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Executes a flavor of rundown initiated by a CAPTURE_STATE request to
+// code:#EtwCallback.  CAPTURE_STATE is the "ETW-sanctioned" way of performing a
+// rundown, whereas the CLR's rundown provider was *our* version of this, implemented
+// before CAPTURE_STATE was standardized.
+// 
+// When doing a CAPTURE_STATE, the CLR rundown provider is completely unused.  Instead,
+// we pay attention to the runtime keywords active at the time the CAPTURE_STATE was
+// requested, and enumerate through the appropriate objects (AppDomains, assemblies,
+// modules, types, methods, threads) and send runtime events for each of them.
+//
+// CAPTURE_STATE is intended to be used primarily by PerfTrack.  Implementing this form
+// of rundown allows PerfTrack to be blissfully unaware of the CLR's rundown provider.
+// 
+
+// static
+void ETW::EnumerationLog::EnumerateForCaptureState()
+{
+    CONTRACTL 
+    {
+        NOTHROW;
+        GC_TRIGGERS;
+    }
+    CONTRACTL_END;
+
+    EX_TRY
+    {
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, KEYWORDZERO))
+        {
+            DWORD enumerationOptions = GetEnumerationOptionsFromRuntimeKeywords();
+
+            // Send unload events for all remaining domains, including shared domain and
+            // default domain.
+            ETW::EnumerationLog::EnumerationHelper(NULL /* module filter */, NULL /* domain filter */, enumerationOptions);
+
+            // Send thread created events for all currently active threads, if requested
+            if (ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context,
+                                                                 TRACE_LEVEL_INFORMATION,
+                                                                 CLR_THREADING_KEYWORD))
+            {
+                SendThreadRundownEvent();
+            }
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/**************************************************************************************/
+/* Called when ETW is turned OFF on an existing process .Will be used by the controller for end rundown*/
+/**************************************************************************************/
+void ETW::EnumerationLog::EndRundown()
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        BOOL bIsPerfTrackRundownEnabled = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context,
+                                                                 TRACE_LEVEL_INFORMATION,
+                                                                 CLR_RUNDOWNPERFTRACK_KEYWORD);
+        BOOL bIsThreadingRundownEnabled = ETW_TRACING_CATEGORY_ENABLED(
+            MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context,
+            TRACE_LEVEL_INFORMATION,
+            CLR_RUNDOWNTHREADING_KEYWORD);
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_RUNDOWNJIT_KEYWORD) 
+           ||
+           ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_RUNDOWNLOADER_KEYWORD) 
+           ||
+           IsRundownNgenKeywordEnabledAndNotSuppressed()
+           ||
+           ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_RUNDOWNJITTEDMETHODILTONATIVEMAP_KEYWORD)
+           ||
+           bIsPerfTrackRundownEnabled
+           ||
+           bIsThreadingRundownEnabled
+        )
+        {
+            // begin marker event will go to the rundown provider
+            FireEtwDCEndInit_V1(GetClrInstanceId());
+
+            // The rundown flag is expected to be checked in the caller, so no need to check here again
+            DWORD enumerationOptions=ETW::EnumerationLog::EnumerationStructs::None;
+            if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                            TRACE_LEVEL_INFORMATION, 
+                                            CLR_RUNDOWNLOADER_KEYWORD))
+            {
+                enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd;
+            }
+            if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                            TRACE_LEVEL_INFORMATION, 
+                                            CLR_RUNDOWNJIT_KEYWORD))
+            {
+                enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::JitMethodDCEnd;
+            }
+            if(IsRundownNgenKeywordEnabledAndNotSuppressed())
+            {
+                enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::NgenMethodDCEnd;
+            }
+            if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                            TRACE_LEVEL_INFORMATION, 
+                                            CLR_RUNDOWNJITTEDMETHODILTONATIVEMAP_KEYWORD))
+            {
+                enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::MethodDCEndILToNativeMap;
+            }
+            if(bIsPerfTrackRundownEnabled)
+            {
+                enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCEnd;
+            }
+
+            ETW::EnumerationLog::EnumerationHelper(NULL, NULL, enumerationOptions);
+
+            if (bIsThreadingRundownEnabled)
+            {
+                SendThreadRundownEvent();
+            }
+
+            // end marker event will go to the rundown provider
+            FireEtwDCEndComplete_V1(GetClrInstanceId());
+        }
+    } EX_CATCH { 
+        STRESS_LOG1(LF_ALWAYS, LL_ERROR, "Exception during Rundown Enumeration, EIP of last AV = %p", g_LastAccessViolationEIP);
+    } EX_END_CATCH(SwallowAllExceptions);
+}
+
+// #Registration
+/*++
+
+Routine Description:
+
+    Registers provider with ETW tracing framework. 
+    This function should not be called more than once, on 
+    Dll Process attach only. 
+    Not thread safe.    
+
+Arguments:
+    none
+
+Return Value:
+    Returns the return value from RegisterTraceGuids or EventRegister. 
+
+--*/
+
+void InitializeEventTracing()
+{
+    CONTRACTL
+    {
+        THROWS;
+        GC_TRIGGERS;
+        MODE_ANY;
+    }
+    CONTRACTL_END;
+
+    // Do startup-only initialization of any state required by the ETW classes before
+    // events can be fired
+    HRESULT hr = ETW::TypeSystemLog::PreRegistrationInit();
+    if (FAILED(hr))
+        return;
+
+    // Register CLR providers with the OS
+    if (g_pEtwTracer == NULL)
+    {
+        NewHolder <ETW::CEtwTracer> tempEtwTracer (new (nothrow) ETW::CEtwTracer());
+        if (tempEtwTracer != NULL && tempEtwTracer->Register () == ERROR_SUCCESS)
+            g_pEtwTracer = tempEtwTracer.Extract ();
+    }
+
+    g_nClrInstanceId = GetRuntimeId() & 0x0000FFFF; // This will give us duplicate ClrInstanceId after UINT16_MAX
+
+    // Any classes that need some initialization to happen after we've registered the
+    // providers can do so now
+    ETW::TypeSystemLog::PostRegistrationInit();
+}
+
+HRESULT ETW::CEtwTracer::Register()
+{
+    WRAPPER_NO_CONTRACT;
+
+    OSVERSIONINFO osVer;
+    osVer.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+
+    if (GetOSVersion(&osVer) == FALSE) {
+        return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
+    }
+    else if (osVer.dwMajorVersion < ETW_SUPPORTED_MAJORVER) {
+        return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
+    }
+
+    // if running on OS < Longhorn, skip registration unless reg key is set
+    // since ETW reg is expensive (in both time and working set) on older OSes
+    if (osVer.dwMajorVersion < ETW_ENABLED_MAJORVER && !g_fEnableETW && !CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_PreVistaETWEnabled))
+        return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
+
+    // If running on OS >= Longhorn, skip registration if ETW is not enabled
+    if (osVer.dwMajorVersion >= ETW_ENABLED_MAJORVER && !g_fEnableETW && !CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_VistaAndAboveETWEnabled))
+        return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED);
+
+    EventRegisterMicrosoft_Windows_DotNETRuntime();
+    EventRegisterMicrosoft_Windows_DotNETRuntimePrivate();
+    EventRegisterMicrosoft_Windows_DotNETRuntimeRundown();
+
+    // Stress Log ETW events are available only on the desktop version of the runtime
+#ifndef FEATURE_CORECLR
+    EventRegisterMicrosoft_Windows_DotNETRuntimeStress();
+#endif // !FEATURE_CORECLR
+
+    MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context.RegistrationHandle = Microsoft_Windows_DotNETRuntimeHandle;
+    MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context.RegistrationHandle = Microsoft_Windows_DotNETRuntimePrivateHandle;
+    MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context.RegistrationHandle = Microsoft_Windows_DotNETRuntimeRundownHandle;
+#ifndef FEATURE_CORECLR
+    MICROSOFT_WINDOWS_DOTNETRUNTIME_STRESS_PROVIDER_Context.RegistrationHandle = Microsoft_Windows_DotNETRuntimeStressHandle;
+#endif // !FEATURE_CORECLR
+
+    return S_OK;
+}
+
+// #Unregistration
+/*++
+
+Routine Description:
+        Unregisters the provider from ETW. This function
+        should only be called once from DllMain Detach process.
+        Not thread safe.
+
+Arguments:
+       none
+
+Return Value:
+       Returns ERROR_SUCCESS
+
+--*/
+HRESULT ETW::CEtwTracer::UnRegister() 
+{
+    LIMITED_METHOD_CONTRACT;
+
+    EventUnregisterMicrosoft_Windows_DotNETRuntime();
+    EventUnregisterMicrosoft_Windows_DotNETRuntimePrivate();
+    EventUnregisterMicrosoft_Windows_DotNETRuntimeRundown();
+#ifndef FEATURE_CORECLR
+    EventUnregisterMicrosoft_Windows_DotNETRuntimeStress();
+#endif // !FEATURE_CORECLR
+    return S_OK;
+}
+
+extern "C"
+{
+    ETW_INLINE
+    void EtwCallout(REGHANDLE RegHandle,
+                    PCEVENT_DESCRIPTOR Descriptor,
+                    ULONG ArgumentCount,
+                    PEVENT_DATA_DESCRIPTOR EventData)
+    {
+        WRAPPER_NO_CONTRACT;
+        UINT8 providerIndex = 0;
+        if(RegHandle == Microsoft_Windows_DotNETRuntimeHandle) {
+            providerIndex = 0;
+        } else if(RegHandle == Microsoft_Windows_DotNETRuntimeRundownHandle) {
+            providerIndex = 1;
+        } else if(RegHandle == Microsoft_Windows_DotNETRuntimeStressHandle) {
+            providerIndex = 2;
+        } else if(RegHandle == Microsoft_Windows_DotNETRuntimePrivateHandle) {
+            providerIndex = 3;
+        } else {
+            _ASSERTE(!"Provider not one of Runtime, Rundown, Private and Stress");
+            return;
+        }
+
+        // stacks are supposed to be fired for only the events with a bit set in the etwStackSupportedEvents bitmap
+        if(((etwStackSupportedEvents[providerIndex][Descriptor->Id/8]) & 
+            (1<<(Descriptor->Id%8))) != 0)
+        {
+            if(RegHandle == Microsoft_Windows_DotNETRuntimeHandle) {                
+                ETW::SamplingLog::SendStackTrace(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, &CLRStackWalk, &CLRStackId);
+            } else if(RegHandle == Microsoft_Windows_DotNETRuntimeRundownHandle) {
+                ETW::SamplingLog::SendStackTrace(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, &CLRStackWalkDCStart, &CLRStackRundownId);
+            } else if(RegHandle == Microsoft_Windows_DotNETRuntimePrivateHandle) {
+                ETW::SamplingLog::SendStackTrace(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, &CLRStackWalkPrivate, &CLRStackPrivateId);
+            } else if(RegHandle == Microsoft_Windows_DotNETRuntimeStressHandle) {
+                ETW::SamplingLog::SendStackTrace(MICROSOFT_WINDOWS_DOTNETRUNTIME_STRESS_PROVIDER_Context, &CLRStackWalkStress, &CLRStackStressId);
+            }
+        }
+    }
+}
+
+extern "C"
+{
+
+    // #EtwCallback:
+    // During the build, MC generates the code to register our provider, and to register
+    // our ETW callback. (This is buried under Intermediates, in a path like
+    // Intermediate\clr\corguids.nativeproj_1723354836\obj1c\x86\ClrEtwAll.h.) The ETW
+    // callback is also generated for us by MC. But we can hook into this generated
+    // callback by #defining MCGEN_PRIVATE_ENABLE_CALLBACK_V2 to be a call to this
+    // function (EtwCallback), thus causing EtwCallback to get called after the
+    // MC-generated code executes.
+    // 
+    // This callback function is called whenever an ETW session is enabled or disabled. A
+    // callback function needs to be specified when the provider is registered. C style
+    // callback wrappers are needed during event registration. To handle the callback
+    // action in this class, we pass "this" during provider registration and modify the
+    // context to the relevant context in the C callback later.
+    ETW_INLINE
+    void EtwCallback(
+        _In_ LPCGUID SourceId,
+        _In_ ULONG ControlCode,
+        _In_ UCHAR Level,
+        _In_ ULONGLONG MatchAnyKeyword,
+        _In_ ULONGLONG MatchAllKeyword,
+        _In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData,
+        _Inout_opt_ PVOID CallbackContext)
+    {
+        CONTRACTL {
+            NOTHROW;
+            if(g_fEEStarted) {GC_TRIGGERS;} else {DISABLED(GC_NOTRIGGER);};
+            MODE_ANY;
+            CAN_TAKE_LOCK;
+            STATIC_CONTRACT_FAULT;
+            SO_NOT_MAINLINE;
+        } CONTRACTL_END;
+
+        // Mark that we are the special ETWRundown thread.  Currently all this does
+        // is insure that AVs thrown in this thread are treated as normal exceptions.
+        // This allows us to catch and swallow them.   We can do this because we have 
+        // a reasonably strong belief that doing ETW Rundown does not change runtime state
+        // and thus if an AV happens it is better to simply give up logging ETW and 
+        // instead of terminating the process (which is what we would do normally)
+        ClrFlsThreadTypeSwitch etwRundownThreadHolder(ThreadType_ETWRundownThread);
+        PMCGEN_TRACE_CONTEXT context = (PMCGEN_TRACE_CONTEXT)CallbackContext;
+
+        BOOLEAN bIsPublicTraceHandle = 
+#ifdef WINXP_AND_WIN2K3_BUILD_SUPPORT
+            McGenPreVista ? ((ULONGLONG)Microsoft_Windows_DotNETRuntimeHandle==(ULONGLONG)context) :
+#endif
+            (context->RegistrationHandle==Microsoft_Windows_DotNETRuntimeHandle);
+        
+        BOOLEAN bIsPrivateTraceHandle =
+#ifdef WINXP_AND_WIN2K3_BUILD_SUPPORT
+            McGenPreVista ? ((ULONGLONG)Microsoft_Windows_DotNETRuntimePrivateHandle==(ULONGLONG)context) :
+#endif
+            (context->RegistrationHandle==Microsoft_Windows_DotNETRuntimePrivateHandle);
+        
+        BOOLEAN bIsRundownTraceHandle = 
+#ifdef WINXP_AND_WIN2K3_BUILD_SUPPORT
+            McGenPreVista ? ((ULONGLONG)Microsoft_Windows_DotNETRuntimeRundownHandle==(ULONGLONG)context) :
+#endif
+            (context->RegistrationHandle==Microsoft_Windows_DotNETRuntimeRundownHandle);
+
+
+        // A manifest based provider can be enabled to multiple event tracing sessions
+        // As long as there is atleast 1 enabled session, IsEnabled will be TRUE
+        // Since classic providers can be enabled to only a single session, 
+        // IsEnabled will be TRUE when it is enabled and FALSE when disabled
+        BOOL bEnabled = 
+            ((ControlCode == EVENT_CONTROL_CODE_ENABLE_PROVIDER) || 
+             (ControlCode == EVENT_CONTROL_CODE_CAPTURE_STATE));
+        if(bEnabled)
+        {
+            // TypeSystemLog needs a notification when certain keywords are modified, so
+            // give it a hook here.
+            if (g_fEEStarted && !g_fEEShutDown && bIsPublicTraceHandle)
+            {
+                ETW::TypeSystemLog::OnKeywordsChanged();
+            }
+
+            if (bIsPrivateTraceHandle)
+            {
+                ETW::GCLog::GCSettingsEvent();
+                if(g_fEEStarted && !g_fEEShutDown)
+                {
+                    ETW::EnumerationLog::ModuleRangeRundown();
+                }
+            }
+
+#ifdef _WIN64   // We only do this on 64 bit (NOT ARM, because ARM uses frame based stack crawling)
+            // If we have turned on the JIT keyword to the VERBOSE setting (needed to get JIT names) then
+            // we assume that we also want good stack traces so we need to publish unwind information so
+            // ETW can get at it
+            if(bIsPublicTraceHandle && ETW_CATEGORY_ENABLED((*context), TRACE_LEVEL_VERBOSE, CLR_RUNDOWNJIT_KEYWORD))
+                UnwindInfoTable::PublishUnwindInfo(g_fEEStarted != FALSE);
+#endif
+            if(g_fEEStarted && !g_fEEShutDown && bIsRundownTraceHandle)
+            {
+                // Fire the runtime information event
+                ETW::InfoLog::RuntimeInformation(ETW::InfoLog::InfoStructs::Callback);
+
+                // Start and End Method/Module Rundowns
+                // Used to fire events that we missed since we started the controller after the process started
+                // flags for immediate start rundown
+                if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                                TRACE_LEVEL_INFORMATION, 
+                                                CLR_RUNDOWNSTART_KEYWORD))
+                    ETW::EnumerationLog::StartRundown();
+
+                // flags delayed end rundown
+                if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+                                                TRACE_LEVEL_INFORMATION, 
+                                                CLR_RUNDOWNEND_KEYWORD))
+                    ETW::EnumerationLog::EndRundown();
+            }
+
+            if (g_fEEStarted && !g_fEEShutDown && (ControlCode == EVENT_CONTROL_CODE_CAPTURE_STATE))
+            {
+                ETW::EnumerationLog::EnumerateForCaptureState();
+            }
+
+            // Special check for the runtime provider's GCHeapCollectKeyword.  Profilers
+            // flick this to force a full GC.
+            if (g_fEEStarted && !g_fEEShutDown && bIsPublicTraceHandle &&
+                ((MatchAnyKeyword & CLR_GCHEAPCOLLECT_KEYWORD) != 0))
+            {
+                // Profilers may (optionally) specify extra data in the filter parameter
+                // to log with the GCStart event.
+                LONGLONG l64ClientSequenceNumber = 0;
+                if ((FilterData != NULL) &&
+                    (FilterData->Type == 1) &&
+                    (FilterData->Size == sizeof(l64ClientSequenceNumber)))
+                {
+                    l64ClientSequenceNumber = *(LONGLONG *) (FilterData->Ptr);
+                }
+                ETW::GCLog::ForceGC(l64ClientSequenceNumber);
+            }
+        }
+#ifdef FEATURE_COMINTEROP
+        if (ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, CCWRefCountChange)) 
+            g_pConfig->SetLogCCWRefCountChangeEnabled(bEnabled != 0);
+#endif // FEATURE_COMINTEROP
+
+    }
+}
+
+#endif // FEATURE_REDHAWK
+#else // !FEATURE_DTRACE
+
+/**************************************************************************************/
+/* Helper data structure for supporting string in Dtrace probes. Since Dtrace does not support Unicode    */
+/* in its printf API, we cast the unicode string to UFT8 string and then output them.                                */
+/**************************************************************************************/
+#define DTRACE_OUTPUT_STRING_LEN 512
+const CHAR szDtraceOutputNULL[]="NULL";
+INT32 WideCharToMultiByte(LPCWSTR wszSrcStr, LPSTR szDstStr);
+
+#include <rotor_pal.h>
+
+// The possible value of COMPlus_ETWEnabled should be '0' or '1'
+#define SIZE_ETWEnabled 2
+// The possible value of COMPlus_EventInfo should be a string in the following format:
+// GUID:HexNumfer:Level
+// GUID: For example e13c0d23-ccbc-4e12-931b-d9cc2eee27e4 (36 bytes)
+// HewNumber: 0xffffffff (10 bytes)
+// Level: 0~9 (1 bytes)
+// Therefore the length of it should be 36 + 1 + 10 + 1 + 1 + 1 = 50
+#define SIZE_EventInfo 50
+
+ULONG ETW::CEtwTracer::Register()
+{
+    // Get Env Var COMPlus_ETWEnabled
+    char szETWEnabled[SIZE_ETWEnabled];
+    DWORD newLen = GetEnvironmentVariableA("COMPlus_ETWEnabled", szETWEnabled, SIZE_ETWEnabled);
+    if (newLen  == 0 || newLen >= SIZE_ETWEnabled || strcmp(szETWEnabled, "1") != 0)
+        return 0;
+    
+    // Get Env Var COMPlus_EventInfo
+    char szEventInfo[SIZE_EventInfo];
+    newLen = GetEnvironmentVariableA("COMPlus_EventInfo", szEventInfo, SIZE_EventInfo);
+    if (newLen == 0 || newLen >= SIZE_EventInfo || strchr(szEventInfo, ' ') != NULL)
+        return 0;
+
+    // Get Env Var COMPlus_EventLogFileName
+    char szEventLogFN[_MAX_FNAME];
+    newLen = GetEnvironmentVariableA("COMPlus_EventLogFileName", szEventLogFN, _MAX_FNAME);
+    if (newLen == 0 || newLen >= _MAX_FNAME || strchr(szEventLogFN, '|') != NULL)
+        return 0;
+    char szEventLogFullPath[_MAX_PATH];
+    newLen = GetFullPathNameA(szEventLogFN, _MAX_PATH, szEventLogFullPath, NULL);
+    if (newLen == 0 || newLen > _MAX_PATH || strchr(szEventLogFN, '|') != NULL)
+        return 0;
+
+    // Get the process id which is ued in dtrace to fire the probes of the process
+    int nProcessId = GetCurrentProcessId();
+
+    // Start the log (By calling an PAL API to connect to a Unix Domain Server)
+    PAL_StartLog(szEventInfo, szEventLogFullPath, nProcessId);
+
+    return 0;
+}
+
+INT32  WideCharToMultiByte(LPCWSTR wszSrcStr, LPSTR szDstStr)
+{
+    INT32 nSize = WideCharToMultiByte(CP_UTF8, 0, wszSrcStr, -1, NULL, 0, NULL, NULL);
+    if (0 == nSize)
+    {
+        return 0;
+    }
+    if (nSize > DTRACE_OUTPUT_STRING_LEN-1)
+    {
+        nSize = DTRACE_OUTPUT_STRING_LEN-1;
+    }
+    INT32 nSize2 =  WideCharToMultiByte(CP_UTF8, 0, wszSrcStr, -1, szDstStr, nSize, NULL, NULL);
+    if(nSize2 != nSize || nSize2 <=0 )
+    {
+        return 0;
+    }
+    return nSize;
+}
+
+void EEConfigSetup_V1()
+{
+    FireEtwEEConfigSetup_V1(GetClrInstanceId());
+}
+
+void EEConfigSetupEnd_V1()
+{
+    FireEtwEEConfigSetupEnd_V1(GetClrInstanceId());
+}
+
+void LdSysBases_V1()
+{
+    FireEtwLdSysBases_V1(GetClrInstanceId());
+}
+
+void LdSysBasesEnd_V1()
+{
+    FireEtwLdSysBasesEnd_V1(GetClrInstanceId());
+}
+
+void ExecExe_V1()
+{
+    FireEtwExecExe_V1(GetClrInstanceId());
+}
+
+void ExecExeEnd_V1()
+{
+    FireEtwExecExeEnd_V1(GetClrInstanceId());
+}
+
+void Main_V1()
+{
+    FireEtwMain_V1(GetClrInstanceId());
+}
+
+void MainEnd_V1()
+{
+    FireEtwMainEnd_V1(GetClrInstanceId());
+}
+
+
+void ApplyPolicyStart_V1()
+{
+    FireEtwApplyPolicyStart_V1(GetClrInstanceId());
+}
+
+void ApplyPolicyEnd_V1()
+{
+    FireEtwApplyPolicyEnd_V1(GetClrInstanceId());
+}
+
+void PrestubWorker_V1()
+{
+    FireEtwPrestubWorker_V1(GetClrInstanceId());
+}
+
+void PrestubWorkerEnd_V1()
+{
+    FireEtwPrestubWorkerEnd_V1(GetClrInstanceId());
+}
+
+void ExplicitBindStart_V1()
+{
+    FireEtwExplicitBindStart_V1(GetClrInstanceId());
+}
+
+void ExplicitBindEnd_V1()
+{
+    FireEtwExplicitBindEnd_V1(GetClrInstanceId());
+}
+
+void ParseXml_V1()
+{
+    FireEtwParseXml_V1(GetClrInstanceId());
+}
+
+void ParseXmlEnd_V1()
+{
+    FireEtwParseXmlEnd_V1(GetClrInstanceId());
+}
+
+void InitDefaultDomain_V1()
+{
+    FireEtwInitDefaultDomain_V1(GetClrInstanceId());
+}
+
+void InitDefaultDomainEnd_V1()
+{
+    FireEtwInitDefaultDomainEnd_V1(GetClrInstanceId());
+}
+void AllowBindingRedirs_V1()
+{
+    FireEtwAllowBindingRedirs_V1(GetClrInstanceId());
+}
+
+void AllowBindingRedirsEnd_V1()
+{
+    FireEtwAllowBindingRedirsEnd_V1(GetClrInstanceId());
+}
+
+void EEConfigSync_V1()
+{
+    FireEtwEEConfigSync_V1(GetClrInstanceId());
+}
+
+void EEConfigSyncEnd_V1()
+{
+    FireEtwEEConfigSyncEnd_V1(GetClrInstanceId());
+}
+
+void FusionBinding_V1()
+{
+    FireEtwFusionBinding_V1(GetClrInstanceId());
+}
+
+void FusionBindingEnd_V1()
+{
+    FireEtwFusionBindingEnd_V1(GetClrInstanceId());
+}
+
+void LoaderCatchCall_V1()
+{
+    FireEtwLoaderCatchCall_V1(GetClrInstanceId());
+}
+
+void LoaderCatchCallEnd_V1()
+{
+    FireEtwLoaderCatchCallEnd_V1(GetClrInstanceId());
+}
+
+void FusionInit_V1()
+{
+    FireEtwFusionInit_V1(GetClrInstanceId());
+}
+
+void FusionInitEnd_V1()
+{
+    FireEtwFusionInitEnd_V1(GetClrInstanceId());
+}
+
+void FusionAppCtx_V1()
+{
+    FireEtwFusionAppCtx_V1(GetClrInstanceId());
+}
+
+void FusionAppCtxEnd_V1()
+{
+    FireEtwFusionAppCtxEnd_V1(GetClrInstanceId());
+}
+
+void SecurityCatchCall_V1()
+{
+    FireEtwSecurityCatchCall_V1(GetClrInstanceId());
+}
+
+void SecurityCatchCallEnd_V1()
+{
+    FireEtwSecurityCatchCallEnd_V1(GetClrInstanceId());
+}
+
+
+#endif // !FEATURE_DTRACE
+
+#ifndef FEATURE_REDHAWK
+
+/****************************************************************************/
+/* This is called by the runtime when an exception is thrown */
+/****************************************************************************/
+void ETW::ExceptionLog::ExceptionThrown(CrawlFrame  *pCf, BOOL bIsReThrownException, BOOL bIsNewException)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+        PRECONDITION(GetThread() != NULL);
+        PRECONDITION(GetThread()->GetThrowable() != NULL);
+    } CONTRACTL_END;
+
+    if(!(bIsReThrownException || bIsNewException))
+    {
+        return;
+    }
+    if(!ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, ExceptionThrown_V1))
+    {
+        return;
+    }
+    EX_TRY
+    {
+        SString exceptionType(L"");
+        LPWSTR exceptionMessage = NULL;
+        BOOL bIsCLSCompliant=FALSE, bIsCSE=FALSE, bIsNestedException=FALSE, bHasInnerException=FALSE;
+        UINT16 exceptionFlags=0;
+        PVOID exceptionEIP=0;
+
+        Thread *pThread = GetThread();
+
+        struct 
+        {
+            OBJECTREF exceptionObj;
+            OBJECTREF innerExceptionObj;
+            STRINGREF exceptionMessageRef;
+        } gc;
+        ZeroMemory(&gc, sizeof(gc));
+        GCPROTECT_BEGIN(gc);
+
+        gc.exceptionObj = pThread->GetThrowable();
+        gc.innerExceptionObj = ((EXCEPTIONREF)gc.exceptionObj)->GetInnerException();
+
+        ThreadExceptionState *pExState = pThread->GetExceptionState();
+#ifndef WIN64EXCEPTIONS
+        PTR_ExInfo pExInfo = NULL;
+#else
+        PTR_ExceptionTracker pExInfo = NULL;
+#endif //!WIN64EXCEPTIONS
+        pExInfo = pExState->GetCurrentExceptionTracker();
+        _ASSERTE(pExInfo != NULL);
+        bIsNestedException = (pExInfo->GetPreviousExceptionTracker() != NULL);
+        bIsCSE = (pExInfo->GetCorruptionSeverity() == ProcessCorrupting);
+        bIsCLSCompliant = IsException((gc.exceptionObj)->GetMethodTable()) && 
+                          ((gc.exceptionObj)->GetMethodTable() != MscorlibBinder::GetException(kRuntimeWrappedException));
+
+        // A rethrown exception is also a nested exception
+        // but since we have a separate flag for it, lets unset the nested flag
+        if(bIsReThrownException)
+        {
+            bIsNestedException = FALSE;
+        }
+        bHasInnerException = (gc.innerExceptionObj) != NULL;
+
+        exceptionFlags = ((bHasInnerException ? ETW::ExceptionLog::ExceptionStructs::HasInnerException : 0) |
+                          (bIsNestedException ? ETW::ExceptionLog::ExceptionStructs::IsNestedException : 0) |
+                          (bIsReThrownException ? ETW::ExceptionLog::ExceptionStructs::IsReThrownException : 0) |
+                          (bIsCSE ? ETW::ExceptionLog::ExceptionStructs::IsCSE : 0) |
+                          (bIsCLSCompliant ? ETW::ExceptionLog::ExceptionStructs::IsCLSCompliant : 0));
+
+        if (pCf->IsFrameless())
+        {
+#ifndef _WIN64
+            exceptionEIP = (PVOID)pCf->GetRegisterSet()->ControlPC;
+#else
+            exceptionEIP = (PVOID)GetIP(pCf->GetRegisterSet()->pContext);
+#endif //!_WIN64
+        }
+        else
+        {
+            exceptionEIP = (PVOID)(pCf->GetFrame()->GetIP());
+        }
+
+        // On platforms other than IA64, we are at the instruction after the faulting instruction
+        // This check has been copied from StackTraceInfo::AppendElement
+        if (!(pCf->HasFaulted() || pCf->IsIPadjusted()) && exceptionEIP != 0)
+        {
+            exceptionEIP = (PVOID)((UINT_PTR)exceptionEIP - 1);
+        }
+
+        gc.exceptionMessageRef =  ((EXCEPTIONREF)gc.exceptionObj)->GetMessage();
+        TypeHandle exceptionTypeHandle = (gc.exceptionObj)->GetTypeHandle();
+        exceptionTypeHandle.GetName(exceptionType);
+        WCHAR *exceptionTypeName = (WCHAR *)exceptionType.GetUnicode(); 
+
+        if(gc.exceptionMessageRef != NULL)
+        {
+            exceptionMessage = (gc.exceptionMessageRef)->GetBuffer();
+        }
+        
+        HRESULT exceptionHRESULT = ((EXCEPTIONREF)gc.exceptionObj)->GetHResult();
+
+        FireEtwExceptionThrown_V1(exceptionTypeName,
+                                  exceptionMessage,
+                                  exceptionEIP,
+                                  exceptionHRESULT,
+                                  exceptionFlags,
+                                  GetClrInstanceId());
+        GCPROTECT_END();
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/****************************************************************************/
+/* This is called by the runtime when a domain is loaded */
+/****************************************************************************/
+void ETW::LoaderLog::DomainLoadReal(BaseDomain *pDomain, __in_opt LPWSTR wszFriendlyName)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_LOADER_KEYWORD))
+        {
+            DWORD dwEventOptions = ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad;
+            ETW::LoaderLog::SendDomainEvent(pDomain, dwEventOptions, wszFriendlyName);
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/****************************************************************************/
+/* This is called by the runtime when an AppDomain is unloaded */
+/****************************************************************************/
+void ETW::LoaderLog::DomainUnload(AppDomain *pDomain)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        KEYWORDZERO))
+        {
+            if(!pDomain->NoAccessToHandleTable())
+            {
+                DWORD enumerationOptions = ETW::EnumerationLog::GetEnumerationOptionsFromRuntimeKeywords();
+
+                // Domain unload also causes type unload events
+                if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                                TRACE_LEVEL_INFORMATION, 
+                                                CLR_TYPE_KEYWORD))
+                {
+                    enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::TypeUnload;
+                }
+
+                ETW::EnumerationLog::EnumerationHelper(NULL, pDomain, enumerationOptions);
+            }
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/****************************************************************************/
+/* This is called by the runtime when a LoaderAllocator is unloaded */
+/****************************************************************************/
+void ETW::LoaderLog::CollectibleLoaderAllocatorUnload(AssemblyLoaderAllocator *pLoaderAllocator)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        KEYWORDZERO))
+        {
+            DWORD enumerationOptions = ETW::EnumerationLog::GetEnumerationOptionsFromRuntimeKeywords();
+
+            // Collectible Loader Allocator unload also causes type unload events
+            if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                            TRACE_LEVEL_INFORMATION, 
+                                            CLR_TYPE_KEYWORD))
+            {
+                enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::TypeUnload;
+            }
+
+            ETW::EnumerationLog::IterateCollectibleLoaderAllocator(pLoaderAllocator, enumerationOptions);
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/****************************************************************************/
+/* This is called by the runtime when the runtime is loaded
+   Function gets called by both the Callback mechanism and regular ETW events.
+   Type is used to differentiate whether its a callback or a normal call*/
+/****************************************************************************/
+void ETW::InfoLog::RuntimeInformation(INT32 type) 
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    EX_TRY {
+        if((type == ETW::InfoLog::InfoStructs::Normal && ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, RuntimeInformationStart)) 
+#ifndef FEATURE_PAL
+            ||
+           (type == ETW::InfoLog::InfoStructs::Callback && ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, RuntimeInformationDCStart))
+#endif //!FEATURE_PAL
+          )
+        {
+#ifndef FEATURE_DTRACE
+            PCWSTR szDtraceOutput1=L"",szDtraceOutput2=L"";
+#else
+            CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+            CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN];
+#endif // !FEATURE_DTRACE
+            UINT8 startupMode = 0;
+            UINT startupFlags = 0;
+            WCHAR dllPath[MAX_PATH+1] = {0};
+            UINT8 Sku = 0;
+            _ASSERTE(g_fEEManagedEXEStartup ||   //CLR started due to a managed exe
+                g_fEEIJWStartup ||               //CLR started as a mixed mode Assembly
+                CLRHosted() || g_fEEHostedStartup || //CLR started through one of the Hosting API CLRHosted() returns true if CLR started through the V2 Interface while 
+                                                    // g_fEEHostedStartup is true if CLR is hosted through the V1 API.
+                g_fEEComActivatedStartup ||      //CLR started as a COM object
+                g_fEEOtherStartup  );            //In case none of the 4 above mentioned cases are true for example ngen, ildasm then we asssume its a "other" startup
+
+#ifdef FEATURE_CORECLR
+            Sku = ETW::InfoLog::InfoStructs::CoreCLR;
+#else
+            Sku = ETW::InfoLog::InfoStructs::DesktopCLR;
+#endif //FEATURE_CORECLR
+        
+            //version info for clr.dll
+            USHORT vmMajorVersion = VER_MAJORVERSION;
+            USHORT vmMinorVersion = VER_MINORVERSION;
+            USHORT vmBuildVersion = VER_PRODUCTBUILD;
+            USHORT vmQfeVersion = VER_PRODUCTBUILD_QFE;
+
+            //version info for mscorlib.dll
+            USHORT bclMajorVersion = VER_ASSEMBLYMAJORVERSION;
+            USHORT bclMinorVersion = VER_ASSEMBLYMINORVERSION;
+            USHORT bclBuildVersion = VER_ASSEMBLYBUILD;
+            USHORT bclQfeVersion = VER_ASSEMBLYBUILD_QFE;
+
+#ifndef FEATURE_PAL
+            LPCGUID comGUID=g_fEEComObjectGuid;
+#else
+            unsigned int comGUID=0;
+#endif //!FEATURE_PAL
+
+#ifndef FEATURE_DTRACE
+            LPWSTR lpwszCommandLine = L"";
+            LPWSTR lpwszRuntimeDllPath = (LPWSTR)dllPath;
+#else
+            SIZE_T lpwszCommandLine = (SIZE_T)szDtraceOutput1;
+            SIZE_T lpwszRuntimeDllPath = (SIZE_T)szDtraceOutput2;
+#endif //!FEATURE_DTRACE
+
+#ifndef FEATURE_CORECLR
+            startupFlags = CorHost2::GetStartupFlags();
+#endif //!FEATURE_CORECLR
+
+            // Determine the startupmode
+            if(g_fEEIJWStartup)
+            {
+                //IJW Mode
+                startupMode = ETW::InfoLog::InfoStructs::IJW;
+            }
+            else if(g_fEEManagedEXEStartup) 
+            {
+                //managed exe
+                startupMode = ETW::InfoLog::InfoStructs::ManagedExe;
+#ifndef FEATURE_DTRACE
+                lpwszCommandLine = WszGetCommandLine();
+#else
+                INT32 nSize = WideCharToMultiByte(WszGetCommandLine(), szDtraceOutput1);
+                if(nSize > 0) {
+                    lpwszCommandLine = (SIZE_T)szDtraceOutput1;
+                }
+#endif //!FEATURE_DTRACE
+            }
+            else if (CLRHosted() || g_fEEHostedStartup)
+            {
+                //Hosted CLR
+                startupMode = ETW::InfoLog::InfoStructs::HostedCLR;
+            }
+            else if(g_fEEComActivatedStartup) 
+            {
+                //com activated
+                startupMode = ETW::InfoLog::InfoStructs::COMActivated;
+            }
+            else if(g_fEEOtherStartup)
+            {
+                //startup type is other
+                startupMode = ETW::InfoLog::InfoStructs::Other;
+            }
+
+            _ASSERTE (NumItems(dllPath) > MAX_PATH);
+            // if WszGetModuleFileName fails, we return an empty string
+            if (!WszGetModuleFileName(GetCLRModule(), dllPath, MAX_PATH)) {
+                dllPath[0] = 0;
+            }
+            dllPath[MAX_PATH] = 0;
+#ifdef FEATURE_DTRACE
+            _ASSERTE (NumItems(szDtraceOutput2) >= NumItems(dllPath));
+            INT32 nSize = WideCharToMultiByte(dllPath, szDtraceOutput2);
+            if(nSize > 0) {
+                lpwszRuntimeDllPath = (SIZE_T)szDtraceOutput2;
+            }
+#endif // FEATURE_DTRACE
+
+            if(type == ETW::InfoLog::InfoStructs::Callback)
+            {
+                FireEtwRuntimeInformationDCStart( GetClrInstanceId(),
+                                                  Sku,
+                                                  bclMajorVersion,
+                                                  bclMinorVersion,
+                                                  bclBuildVersion,
+                                                  bclQfeVersion,
+                                                  vmMajorVersion,
+                                                  vmMinorVersion,
+                                                  vmBuildVersion,
+                                                  vmQfeVersion,
+                                                  startupFlags,
+                                                  startupMode,
+                                                  lpwszCommandLine,
+                                                  comGUID,
+                                                  lpwszRuntimeDllPath );
+            }
+            else
+            {
+                FireEtwRuntimeInformationStart( GetClrInstanceId(),
+                                                Sku,
+                                                bclMajorVersion,
+                                                bclMinorVersion,
+                                                bclBuildVersion,
+                                                bclQfeVersion,
+                                                vmMajorVersion,
+                                                vmMinorVersion,
+                                                vmBuildVersion,
+                                                vmQfeVersion,
+                                                startupFlags,
+                                                startupMode,
+                                                lpwszCommandLine,
+                                                comGUID,
+                                                lpwszRuntimeDllPath );
+            }
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/*******************************************************/
+/* This is called by the runtime when a method is jitted completely */
+/*******************************************************/
+void ETW::MethodLog::MethodJitted(MethodDesc *pMethodDesc, SString *namespaceOrClassName, SString *methodName, SString *methodSignature, SIZE_T pCode, ReJITID rejitID)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_JIT_KEYWORD))
+        {
+            ETW::MethodLog::SendMethodEvent(pMethodDesc, ETW::EnumerationLog::EnumerationStructs::JitMethodLoad, TRUE, namespaceOrClassName, methodName, methodSignature, pCode, rejitID);
+        }
+#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_JITTEDMETHODILTONATIVEMAP_KEYWORD))
+        {
+            // The call to SendMethodILToNativeMapEvent assumes that the debugger's lazy
+            // data has already been initialized.
+
+            // g_pDebugInterface is initialized on startup on desktop CLR, regardless of whether a debugger
+            // or profiler is loaded.  So it should always be available.
+            _ASSERTE(g_pDebugInterface != NULL);
+            g_pDebugInterface->InitializeLazyDataIfNecessary();
+            
+            ETW::MethodLog::SendMethodILToNativeMapEvent(pMethodDesc, ETW::EnumerationLog::EnumerationStructs::JitMethodILToNativeMap, rejitID);
+        }
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/*************************************************/
+/* This is called by the runtime when method jitting started */
+/*************************************************/
+void ETW::MethodLog::MethodJitting(MethodDesc *pMethodDesc, SString *namespaceOrClassName, SString *methodName, SString *methodSignature)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+        PRECONDITION(pMethodDesc != NULL);
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                        TRACE_LEVEL_VERBOSE, 
+                                        CLR_JIT_KEYWORD))
+        {
+            pMethodDesc->GetMethodInfo(*namespaceOrClassName, *methodName, *methodSignature);
+            ETW::MethodLog::SendMethodJitStartEvent(pMethodDesc, namespaceOrClassName, methodName, methodSignature);
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/**********************************************************************/
+/* This is called by the runtime when a single jit helper method with stub is initialized */
+/**********************************************************************/
+void ETW::MethodLog::StubInitialized(ULONGLONG ullHelperStartAddress, LPCWSTR pHelperName)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+        PRECONDITION(ullHelperStartAddress != 0);
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_JIT_KEYWORD))
+        {
+            DWORD dwHelperSize=0;
+            Stub::RecoverStubAndSize((TADDR)ullHelperStartAddress, &dwHelperSize);
+            ETW::MethodLog::SendHelperEvent(ullHelperStartAddress, dwHelperSize, pHelperName);
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/**********************************************************/
+/* This is called by the runtime when helpers with stubs are initialized */
+/**********************************************************/
+void ETW::MethodLog::StubsInitialized(PVOID *pHelperStartAddresss, PVOID *pHelperNames, LONG lNoOfHelpers)
+{
+    WRAPPER_NO_CONTRACT;
+
+    if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                    TRACE_LEVEL_INFORMATION, 
+                                    CLR_JIT_KEYWORD))
+    {
+        for(int i=0; i<lNoOfHelpers; i++)
+        {
+            if(pHelperStartAddresss[i])
+            {
+                StubInitialized((ULONGLONG)pHelperStartAddresss[i], (LPCWSTR)pHelperNames[i]);
+            }
+        }
+    }
+}
+
+/****************************************************************************/
+/* This is called by the runtime when a dynamic method is destroyed */
+/****************************************************************************/
+void ETW::MethodLog::DynamicMethodDestroyed(MethodDesc *pMethodDesc)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_JIT_KEYWORD))
+            ETW::MethodLog::SendMethodEvent(pMethodDesc, ETW::EnumerationLog::EnumerationStructs::JitMethodUnload, TRUE);
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/****************************************************************************/
+/* This is called by the runtime when a ngen method is restored */
+/****************************************************************************/
+void ETW::MethodLog::MethodRestored(MethodDesc *pMethodDesc)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        if(IsRuntimeNgenKeywordEnabledAndNotSuppressed()
+           && 
+           ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_STARTENUMERATION_KEYWORD))
+        {
+            ETW::MethodLog::SendMethodEvent(pMethodDesc, ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad, FALSE);
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/****************************************************************************/
+/* This is called by the runtime when a method table is restored */
+/****************************************************************************/
+void ETW::MethodLog::MethodTableRestored(MethodTable *pMethodTable)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+    EX_TRY
+    {
+        if(IsRuntimeNgenKeywordEnabledAndNotSuppressed()
+            && 
+            ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                         TRACE_LEVEL_INFORMATION, 
+                                         CLR_STARTENUMERATION_KEYWORD))
+        {
+#ifdef FEATURE_REMOTING
+            if(!pMethodTable->IsThunking())
+#endif
+            {
+                MethodTable::MethodIterator iter(pMethodTable);
+                for (; iter.IsValid(); iter.Next())
+                {
+                    MethodDesc *pMD = (MethodDesc *)(iter.GetMethodDesc());
+                    if(pMD && pMD->IsRestored() && pMD->GetMethodTable_NoLogging() == pMethodTable)
+                        ETW::MethodLog::SendMethodEvent(pMD, ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad, FALSE);
+                }
+            }
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+
+/****************************************************************************/
+/* This is called by the runtime when a Strong Name Verification Starts */
+/****************************************************************************/
+void ETW::SecurityLog::StrongNameVerificationStart(DWORD dwInFlags, __in LPWSTR strFullyQualifiedAssemblyName)
+{
+    WRAPPER_NO_CONTRACT;
+#ifndef FEATURE_CORECLR
+#ifndef FEATURE_DTRACE
+    FireEtwStrongNameVerificationStart_V1(dwInFlags, 0, strFullyQualifiedAssemblyName, GetClrInstanceId());
+#else
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    // since DTrace do not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSize = WideCharToMultiByte(strFullyQualifiedAssemblyName, szDtraceOutput1);
+    if (nSize != 0)
+        FireEtwStrongNameVerificationStart_V1(dwInFlags, 0, szDtraceOutput1, GetClrInstanceId()); 
+#endif
+#endif // !FEATURE_CORECLR
+}
+
+
+/****************************************************************************/
+/* This is called by the runtime when a Strong Name Verification Ends */
+/****************************************************************************/
+void ETW::SecurityLog::StrongNameVerificationStop(DWORD dwInFlags,ULONG result, __in LPWSTR strFullyQualifiedAssemblyName)
+{
+    WRAPPER_NO_CONTRACT;
+#ifndef FEATURE_CORECLR
+#ifndef FEATURE_DTRACE
+    FireEtwStrongNameVerificationStop_V1(dwInFlags, result, strFullyQualifiedAssemblyName, GetClrInstanceId());
+#else
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    // since DTrace do not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSize = WideCharToMultiByte(strFullyQualifiedAssemblyName, szDtraceOutput1);
+    if (nSize != 0)
+        FireEtwStrongNameVerificationStop_V1(dwInFlags, result, szDtraceOutput1, GetClrInstanceId()); 
+#endif
+#endif // !FEATURE_CORECLR
+}
+
+/****************************************************************************/
+/* This is called by the runtime when field transparency calculations begin */
+/****************************************************************************/
+void ETW::SecurityLog::FireFieldTransparencyComputationStart(LPCWSTR wszFieldName,
+                                                             LPCWSTR wszModuleName,
+                                                             DWORD dwAppDomain)
+{
+    WRAPPER_NO_CONTRACT;
+#ifndef FEATURE_DTRACE
+    FireEtwFieldTransparencyComputationStart(wszFieldName, wszModuleName, dwAppDomain, GetClrInstanceId());
+#else // FEATURE_DTRACE
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN];
+    // since DTrace does not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSizeField = WideCharToMultiByte(wszFieldName, szDtraceOutput1);
+    INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput2);
+
+    if (nSizeField != 0 && nSizeModule != 0)
+        FireEtwFieldTransparencyComputationStart(szDtraceOutput1, szDtraceOutput2, dwAppDomain, GetClrInstanceId());
+#endif // !FEATURE_DTRACE
+}
+
+/****************************************************************************/
+/* This is called by the runtime when field transparency calculations end   */
+/****************************************************************************/
+void ETW::SecurityLog::FireFieldTransparencyComputationEnd(LPCWSTR wszFieldName,
+                                                           LPCWSTR wszModuleName,
+                                                           DWORD dwAppDomain,
+                                                           BOOL fIsCritical,
+                                                           BOOL fIsTreatAsSafe)
+{
+    WRAPPER_NO_CONTRACT;
+#ifndef FEATURE_DTRACE
+    FireEtwFieldTransparencyComputationEnd(wszFieldName, wszModuleName, dwAppDomain, fIsCritical, fIsTreatAsSafe, GetClrInstanceId());
+#else // FEATURE_DTRACE
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN];
+    // since DTrace does not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSizeField = WideCharToMultiByte(wszFieldName, szDtraceOutput1);
+    INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput2);
+
+    if (nSizeField != 0 && nSizeModule != 0)
+        FireEtwFieldTransparencyComputationEnd(szDtraceOutput1, szDtraceOutput2, dwAppDomain, fIsCritical, fIsTreatAsSafe, GetClrInstanceId());
+#endif // !FEATURE_DTRACE
+}
+
+/*****************************************************************************/
+/* This is called by the runtime when method transparency calculations begin */
+/*****************************************************************************/
+void ETW::SecurityLog::FireMethodTransparencyComputationStart(LPCWSTR wszMethodName,
+                                                              LPCWSTR wszModuleName,
+                                                              DWORD dwAppDomain)
+{
+    WRAPPER_NO_CONTRACT;
+#ifndef FEATURE_DTRACE
+    FireEtwMethodTransparencyComputationStart(wszMethodName, wszModuleName, dwAppDomain, GetClrInstanceId());
+#else // FEATURE_DTRACE
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN];
+    // since DTrace does not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSizeMethod = WideCharToMultiByte(wszMethodName, szDtraceOutput1);
+    INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput2);
+
+    if (nSizeMethod != 0 && nSizeModule != 0)
+        FireEtwMethodTransparencyComputationStart(szDtraceOutput1, szDtraceOutput2, dwAppDomain, GetClrInstanceId());
+#endif // !FEATURE_DTRACE
+}
+
+/*****************************************************************************/
+/* This is called by the runtime when method transparency calculations end   */
+/********************************************(********************************/
+void ETW::SecurityLog::FireMethodTransparencyComputationEnd(LPCWSTR wszMethodName,
+                                                            LPCWSTR wszModuleName,
+                                                            DWORD dwAppDomain,
+                                                            BOOL fIsCritical,
+                                                            BOOL fIsTreatAsSafe)
+{
+    WRAPPER_NO_CONTRACT;
+#ifndef FEATURE_DTRACE
+    FireEtwMethodTransparencyComputationEnd(wszMethodName, wszModuleName, dwAppDomain, fIsCritical, fIsTreatAsSafe, GetClrInstanceId());
+#else // FEATURE_DTRACE
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN];
+    // since DTrace does not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSizeMethod = WideCharToMultiByte(wszMethodName, szDtraceOutput1);
+    INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput2);
+
+    if (nSizeMethod != 0 && nSizeModule != 0)
+        FireEtwMethodTransparencyComputationEnd(szDtraceOutput1, szDtraceOutput2, dwAppDomain, fIsCritical, fIsTreatAsSafe, GetClrInstanceId());
+#endif // !FEATURE_DTRACE
+}
+
+/*****************************************************************************/
+/* This is called by the runtime when module transparency calculations begin */
+/*****************************************************************************/
+void ETW::SecurityLog::FireModuleTransparencyComputationStart(LPCWSTR wszModuleName,
+                                                              DWORD dwAppDomain)
+{
+    WRAPPER_NO_CONTRACT;
+#ifndef FEATURE_DTRACE
+    FireEtwModuleTransparencyComputationStart(wszModuleName, dwAppDomain, GetClrInstanceId());
+#else // FEATURE_DTRACE
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    // since DTrace does not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput1);
+
+    if (nSizeModule != 0)
+        FireEtwModuleTransparencyComputationStart(szDtraceOutput1, dwAppDomain, GetClrInstanceId());
+#endif // !FEATURE_DTRACE
+}
+
+/****************************************************************************/
+/* This is called by the runtime when module transparency calculations end  */
+/****************************************************************************/
+void ETW::SecurityLog::FireModuleTransparencyComputationEnd(LPCWSTR wszModuleName,
+                                                            DWORD dwAppDomain,
+                                                            BOOL fIsAllCritical,
+                                                            BOOL fIsAllTransparent,
+                                                            BOOL fIsTreatAsSafe,
+                                                            BOOL fIsOpportunisticallyCritical,
+                                                            DWORD dwSecurityRuleSet)
+{
+    WRAPPER_NO_CONTRACT;
+#ifndef FEATURE_DTRACE
+    FireEtwModuleTransparencyComputationEnd(wszModuleName, dwAppDomain, fIsAllCritical, fIsAllTransparent, fIsTreatAsSafe, fIsOpportunisticallyCritical, dwSecurityRuleSet, GetClrInstanceId());
+#else // FEATURE_DTRACE
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    // since DTrace does not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput1);
+
+    if (nSizeModule != 0)
+        FireEtwModuleTransparencyComputationEnd(szDtraceOutput1, dwAppDomain, fIsAllCritical, fIsAllTransparent, fIsTreatAsSafe, fIsOpportunisticallyCritical, dwSecurityRuleSet, GetClrInstanceId());
+#endif // !FEATURE_DTRACE
+}
+
+/****************************************************************************/
+/* This is called by the runtime when token transparency calculations begin */
+/****************************************************************************/
+void ETW::SecurityLog::FireTokenTransparencyComputationStart(DWORD dwToken,
+                                                             LPCWSTR wszModuleName,
+                                                             DWORD dwAppDomain)
+{
+    WRAPPER_NO_CONTRACT;
+#ifndef FEATURE_DTRACE
+    FireEtwTokenTransparencyComputationStart(dwToken, wszModuleName, dwAppDomain, GetClrInstanceId());
+#else // FEATURE_DTRACE
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    // since DTrace does not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput1);
+
+    if (nSizeModule != 0)
+        FireEtwTokenTransparencyComputationStart(dwToken, szDtraceOutput1, dwAppDomain, GetClrInstanceId());
+#endif // !FEATURE_DTRACE
+}
+
+/****************************************************************************/
+/* This is called by the runtime when token transparency calculations end   */
+/****************************************************************************/
+void ETW::SecurityLog::FireTokenTransparencyComputationEnd(DWORD dwToken,
+                                                           LPCWSTR wszModuleName,
+                                                           DWORD dwAppDomain,
+                                                           BOOL fIsCritical,
+                                                           BOOL fIsTreatAsSafe)
+{
+    WRAPPER_NO_CONTRACT;
+#ifndef FEATURE_DTRACE
+    FireEtwTokenTransparencyComputationEnd(dwToken, wszModuleName, dwAppDomain, fIsCritical, fIsTreatAsSafe, GetClrInstanceId());
+#else // FEATURE_DTRACE
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    // since DTrace does not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput1);
+
+    if (nSizeModule != 0)
+        FireEtwTokenTransparencyComputationEnd(dwToken, szDtraceOutput1, dwAppDomain, fIsCritical, fIsTreatAsSafe, GetClrInstanceId());
+#endif // !FEATURE_DTRACE
+}
+
+/*****************************************************************************/
+/* This is called by the runtime when type transparency calculations begin   */
+/*****************************************************************************/
+void ETW::SecurityLog::FireTypeTransparencyComputationStart(LPCWSTR wszTypeName,
+                                                            LPCWSTR wszModuleName,
+                                                            DWORD dwAppDomain)
+{
+    WRAPPER_NO_CONTRACT;
+#ifndef FEATURE_DTRACE
+    FireEtwTypeTransparencyComputationStart(wszTypeName, wszModuleName, dwAppDomain, GetClrInstanceId());
+#else // FEATURE_DTRACE
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN];
+    // since DTrace does not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSizeType = WideCharToMultiByte(wszTypeName, szDtraceOutput1);
+    INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput2);
+
+    if (nSizeType != 0 && nSizeModule != 0)
+        FireEtwTypeTransparencyComputationStart(szDtraceOutput1, szDtraceOutput2, dwAppDomain, GetClrInstanceId());
+#endif // !FEATURE_DTRACE
+}
+
+/****************************************************************************/
+/* This is called by the runtime when type transparency calculations end    */
+/****************************************************************************/
+void ETW::SecurityLog::FireTypeTransparencyComputationEnd(LPCWSTR wszTypeName,
+                                                          LPCWSTR wszModuleName,
+                                                          DWORD dwAppDomain,
+                                                          BOOL fIsAllCritical,
+                                                          BOOL fIsAllTransparent,
+                                                          BOOL fIsCritical,
+                                                          BOOL fIsTreatAsSafe)
+{
+    WRAPPER_NO_CONTRACT;
+#ifndef FEATURE_DTRACE
+    FireEtwTypeTransparencyComputationEnd(wszTypeName, wszModuleName, dwAppDomain, fIsAllCritical, fIsAllTransparent, fIsCritical, fIsTreatAsSafe, GetClrInstanceId());
+#else // FEATURE_DTRACE
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN];
+    // since DTrace does not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSizeType = WideCharToMultiByte(wszTypeName, szDtraceOutput1);
+    INT32 nSizeModule = WideCharToMultiByte(wszModuleName, szDtraceOutput2);
+
+    if (nSizeType != 0 && nSizeModule != 0)
+        FireEtwTypeTransparencyComputationEnd(szDtraceOutput1, szDtraceOutput2, dwAppDomain, fIsAllCritical, fIsAllTransparent, fIsCritical, fIsTreatAsSafe, GetClrInstanceId());
+#endif // !FEATURE_DTRACE
+}
+
+/**********************************************************************************/
+/* This is called by the runtime when a module is loaded */
+/* liReportedSharedModule will be 0 when this module is reported for the 1st time */
+/**********************************************************************************/
+void ETW::LoaderLog::ModuleLoad(Module *pModule, LONG liReportedSharedModule)
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        DWORD enumerationOptions = ETW::EnumerationLog::EnumerationStructs::None;
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        KEYWORDZERO))
+        {
+            BOOL bTraceFlagLoaderSet = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                                                    TRACE_LEVEL_INFORMATION, 
+                                                                    CLR_LOADER_KEYWORD);
+            BOOL bTraceFlagNgenMethodSet = IsRuntimeNgenKeywordEnabledAndNotSuppressed();
+            BOOL bTraceFlagStartRundownSet = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                                                          TRACE_LEVEL_INFORMATION, 
+                                                                          CLR_STARTENUMERATION_KEYWORD);
+            BOOL bTraceFlagPerfTrackSet = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+                                                                          TRACE_LEVEL_INFORMATION, 
+                                                                          CLR_PERFTRACK_KEYWORD);
+
+            if(liReportedSharedModule == 0)
+            {
+
+                if(bTraceFlagLoaderSet)
+                    enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad;
+                if (bTraceFlagPerfTrackSet)
+                    enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoad;
+                if(bTraceFlagNgenMethodSet && bTraceFlagStartRundownSet)
+                    enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad;
+
+                if(pModule->IsManifest() && bTraceFlagLoaderSet)
+                    ETW::LoaderLog::SendAssemblyEvent(pModule->GetAssembly(), enumerationOptions);
+
+                if(bTraceFlagLoaderSet || bTraceFlagPerfTrackSet)
+                    ETW::LoaderLog::SendModuleEvent(pModule, ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad | ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoad);
+
+                ETW::EnumerationLog::EnumerationHelper(pModule, NULL, enumerationOptions);
+            }
+
+            // we want to report domainmodule events whenever they are loaded in any AppDomain
+            if(bTraceFlagLoaderSet)
+                ETW::LoaderLog::SendModuleEvent(pModule, ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad, TRUE);
+        }
+
+#if !defined(FEATURE_PAL)
+        {
+            BOOL bTraceFlagPerfTrackPrivateSet = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context,
+                                                                                TRACE_LEVEL_INFORMATION,
+                                                                                CLR_PERFTRACK_PRIVATE_KEYWORD);
+            if (liReportedSharedModule == 0 && bTraceFlagPerfTrackPrivateSet)
+            {
+                enumerationOptions |= ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoadPrivate;
+                ETW::LoaderLog::SendModuleRange(pModule, enumerationOptions);
+            }
+        }
+#endif 
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/****************************************************************************/
+/* This is called by the runtime when the process is being shutdown */
+/****************************************************************************/
+void ETW::EnumerationLog::ProcessShutdown()
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    EX_TRY
+    {
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, TRACE_LEVEL_INFORMATION, KEYWORDZERO))
+        {
+            DWORD enumerationOptions = GetEnumerationOptionsFromRuntimeKeywords();
+
+            // Send unload events for all remaining domains, including shared domain and
+            // default domain.
+            ETW::EnumerationLog::EnumerationHelper(NULL /* module filter */, NULL /* domain filter */, enumerationOptions);
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/****************************************************************************/
+/****************************************************************************/
+/* Begining of helper functions */
+/****************************************************************************/
+/****************************************************************************/
+
+/****************************************************************************/
+/* This routine is used to send a domain load/unload or rundown event                              */
+/****************************************************************************/
+void ETW::LoaderLog::SendDomainEvent(BaseDomain *pBaseDomain, DWORD dwEventOptions, LPCWSTR wszFriendlyName)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    if(!pBaseDomain)
+        return;
+
+#ifndef FEATURE_DTRACE
+    PCWSTR szDtraceOutput1=L"";
+#else
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+#endif // !FEATURE_DTRACE
+    BOOL bIsDefaultDomain = pBaseDomain->IsDefaultDomain();
+    BOOL bIsAppDomain = pBaseDomain->IsAppDomain();
+    BOOL bIsExecutable = bIsAppDomain ? !(pBaseDomain->AsAppDomain()->IsPassiveDomain()) : FALSE;
+    BOOL bIsSharedDomain = pBaseDomain->IsSharedDomain();
+    UINT32 uSharingPolicy = bIsAppDomain?(pBaseDomain->AsAppDomain()->GetSharePolicy()):0;
+
+    ULONGLONG ullDomainId = (ULONGLONG)pBaseDomain;
+    ULONG ulDomainFlags = ((bIsDefaultDomain ? ETW::LoaderLog::LoaderStructs::DefaultDomain : 0) | 
+                           (bIsExecutable ? ETW::LoaderLog::LoaderStructs::ExecutableDomain : 0) |
+                           (bIsSharedDomain ? ETW::LoaderLog::LoaderStructs::SharedDomain : 0) |
+                           (uSharingPolicy<<28));
+
+    LPCWSTR wsEmptyString = L"";
+    LPCWSTR wsSharedString = L"SharedDomain";
+
+    LPWSTR lpswzDomainName = (LPWSTR)wsEmptyString;
+
+    if(bIsAppDomain)
+    {
+        if(wszFriendlyName)
+            lpswzDomainName = (PWCHAR)wszFriendlyName;
+        else
+            lpswzDomainName = (PWCHAR)pBaseDomain->AsAppDomain()->GetFriendlyName();
+    }
+    else
+        lpswzDomainName = (LPWSTR)wsSharedString;
+
+    /* prepare events args for ETW and ETM */
+#ifndef FEATURE_DTRACE
+    szDtraceOutput1 = (PCWSTR)lpswzDomainName;
+#else // !FEATURE_DTRACE
+    // since DTrace do not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSize = WideCharToMultiByte(lpswzDomainName, szDtraceOutput1);
+    if (nSize == 0)
+        return;
+#endif // !FEATURE_DTRACE
+
+    if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad)
+    {
+        FireEtwAppDomainLoad_V1(ullDomainId, ulDomainFlags, szDtraceOutput1, pBaseDomain->GetId().m_dwId, GetClrInstanceId());
+    }
+    else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload)
+    {
+        FireEtwAppDomainUnload_V1(ullDomainId, ulDomainFlags, szDtraceOutput1, pBaseDomain->GetId().m_dwId, GetClrInstanceId());
+    }
+    else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart)
+    {
+        FireEtwAppDomainDCStart_V1(ullDomainId, ulDomainFlags, szDtraceOutput1, pBaseDomain->GetId().m_dwId, GetClrInstanceId());
+    }
+    else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd)
+    {
+        FireEtwAppDomainDCEnd_V1(ullDomainId, ulDomainFlags, szDtraceOutput1, pBaseDomain->GetId().m_dwId, GetClrInstanceId());
+    }
+    else
+    {
+        _ASSERTE((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) || 
+                 (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload) ||
+                 (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) ||
+                 (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd));
+    }
+}
+
+/********************************************************/
+/* This routine is used to send thread rundown events when ARM is enabled */
+/********************************************************/
+void ETW::EnumerationLog::SendThreadRundownEvent()
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+#ifndef DACCESS_COMPILE
+    Thread *pThread = NULL;
+
+    // Take the thread store lock while we enumerate threads.
+    ThreadStoreLockHolder tsl;
+    while ((pThread = ThreadStore::GetThreadList(pThread)) != NULL)
+    {
+        if (pThread->IsUnstarted() || pThread->IsDead())
+            continue;
+
+        // Send thread rundown provider events and thread created runtime provider
+        // events (depending on which are enabled)
+        ThreadLog::FireThreadDC(pThread);
+        ThreadLog::FireThreadCreated(pThread);
+    }
+#endif // !DACCESS_COMPILE
+}
+
+/****************************************************************************/
+/* This routine is used to send an assembly load/unload or rundown event ****/
+/****************************************************************************/
+void ETW::LoaderLog::SendAssemblyEvent(Assembly *pAssembly, DWORD dwEventOptions)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    if(!pAssembly) 
+        return;
+
+#ifndef FEATURE_DTRACE
+    PCWSTR szDtraceOutput1=L"";
+#else
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+#endif // !FEATURE_DTRACE
+    BOOL bIsDynamicAssembly = pAssembly->IsDynamic();
+    BOOL bIsCollectibleAssembly = pAssembly->IsCollectible();
+    BOOL bIsDomainNeutral = pAssembly->IsDomainNeutral() ;
+    BOOL bHasNativeImage = pAssembly->GetManifestFile()->HasNativeImage();
+
+    ULONGLONG ullAssemblyId = (ULONGLONG)pAssembly;
+    ULONGLONG ullDomainId = (ULONGLONG)pAssembly->GetDomain();
+    ULONGLONG ullBindingID = 0;
+#if (defined FEATURE_PREJIT) && (defined FEATURE_FUSION_DEPRECATE)  
+    ullBindingID = pAssembly->GetManifestFile()->GetBindingID();
+#endif
+    ULONG ulAssemblyFlags = ((bIsDomainNeutral ? ETW::LoaderLog::LoaderStructs::DomainNeutralAssembly : 0) |
+                             (bIsDynamicAssembly ? ETW::LoaderLog::LoaderStructs::DynamicAssembly : 0) |
+                             (bHasNativeImage ? ETW::LoaderLog::LoaderStructs::NativeAssembly : 0) |
+                             (bIsCollectibleAssembly ? ETW::LoaderLog::LoaderStructs::CollectibleAssembly : 0));
+
+    SString sAssemblyPath;
+    pAssembly->GetDisplayName(sAssemblyPath);
+    LPWSTR lpszAssemblyPath = (LPWSTR)sAssemblyPath.GetUnicode();
+
+/* prepare events args for ETW and ETM */
+#ifndef FEATURE_DTRACE
+    szDtraceOutput1 = (PCWSTR)lpszAssemblyPath;
+#else // !FEATURE_DTRACE
+    // since DTrace do not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSize = WideCharToMultiByte(lpszAssemblyPath, szDtraceOutput1);
+    if (nSize == 0)
+        return;
+#endif // !FEATURE_DTRACE
+
+    if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad)
+    {
+        FireEtwAssemblyLoad_V1(ullAssemblyId, ullDomainId, ullBindingID, ulAssemblyFlags, szDtraceOutput1, GetClrInstanceId());
+    }
+    else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload)
+    {
+        FireEtwAssemblyUnload_V1(ullAssemblyId, ullDomainId, ullBindingID, ulAssemblyFlags, szDtraceOutput1, GetClrInstanceId());
+    }
+    else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart)
+    {
+        FireEtwAssemblyDCStart_V1(ullAssemblyId, ullDomainId, ullBindingID, ulAssemblyFlags, szDtraceOutput1, GetClrInstanceId());
+    }
+    else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd)
+    {
+        FireEtwAssemblyDCEnd_V1(ullAssemblyId, ullDomainId, ullBindingID, ulAssemblyFlags, szDtraceOutput1, GetClrInstanceId());
+    }
+    else
+    {
+        _ASSERTE((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) ||
+                 (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload) ||
+                 (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) ||
+                 (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd));
+    }
+}
+
+#if !defined(FEATURE_PAL)
+ETW_INLINE
+    ULONG
+    ETW::LoaderLog::SendModuleRange(
+    __in Module *pModule,
+    __in DWORD dwEventOptions)
+
+{
+    ULONG Result = ERROR_SUCCESS;
+
+
+    // do not fire the ETW event when:
+    // 1. We did not load the native image
+    // 2. We do not have IBC data for the native image
+    if( !pModule || !pModule->HasNativeImage() || !pModule->IsIbcOptimized() )
+    {
+        return Result;
+    }
+
+    // get information about the hot sections from the native image that has been loaded
+    COUNT_T cbSizeOfSectionTable;
+    CORCOMPILE_VIRTUAL_SECTION_INFO* pVirtualSectionsTable = (CORCOMPILE_VIRTUAL_SECTION_INFO* )pModule->GetNativeImage()->GetVirtualSectionsTable(&cbSizeOfSectionTable);
+
+    COUNT_T RangeCount = cbSizeOfSectionTable/sizeof(CORCOMPILE_VIRTUAL_SECTION_INFO);
+
+    // if we do not have any hot ranges, we do not fire the ETW event
+
+    // Figure out the rest of the event data
+    UINT16 ClrInstanceId = GetClrInstanceId();
+    UINT64 ModuleID = (ULONGLONG)(TADDR) pModule;    
+
+    for (COUNT_T i = 0; i < RangeCount; ++i)
+    {
+        DWORD rangeBegin = pVirtualSectionsTable[i].VirtualAddress;
+        DWORD rangeSize = pVirtualSectionsTable[i].Size;
+        DWORD sectionType = pVirtualSectionsTable[i].SectionType;
+
+        UINT8 ibcType = VirtualSectionData::IBCType(sectionType);
+        UINT8 rangeType = VirtualSectionData::RangeType(sectionType);
+        UINT16 virtualSectionType = VirtualSectionData::VirtualSectionType(sectionType);
+        BOOL isIBCProfiledColdSection = VirtualSectionData::IsIBCProfiledColdSection(sectionType);
+        if (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoad)
+        {
+            if (isIBCProfiledColdSection)
+                Result &= FireEtwModuleRangeLoad(ClrInstanceId, ModuleID, rangeBegin, rangeSize, rangeType);
+        }
+        else if (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCStart)
+        {
+            if (isIBCProfiledColdSection)
+                Result &= FireEtwModuleRangeDCStart(ClrInstanceId, ModuleID, rangeBegin, rangeSize, rangeType);
+        }
+        else if (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCEnd)
+        {
+            if (isIBCProfiledColdSection)
+                Result &= FireEtwModuleRangeDCEnd(ClrInstanceId, ModuleID, rangeBegin, rangeSize, rangeType);
+        }
+        // Fire private events if they are requested.
+        if (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoadPrivate)
+        {
+            Result &= FireEtwModuleRangeLoadPrivate(ClrInstanceId, ModuleID, rangeBegin, rangeSize, rangeType, ibcType, virtualSectionType);
+        }
+    }
+    return Result;
+}
+#endif // !FEATURE_PAL
+
+#ifndef FEATURE_DTRACE
+//---------------------------------------------------------------------------------------
+//
+// Helper that takes a module, and returns the managed and native PDB information
+// corresponding to that module. Used by the routine that fires the module load / unload
+// events.
+//
+// Arguments:
+//      * pModule - Module to examine
+//      * pCvInfoIL - [out] CV_INFO_PDB70 corresponding to managed PDB for this module
+//          (the last debug directory entry in the PE File), if it exists. If it doesn't
+//          exist, this is zeroed out.
+//      * pCvInfoNative - [out] CV_INFO_PDB70 corresponding to native NGEN PDB for this
+//          module (the next-to-last debug directory entry in the PE File), if it exists.
+//          If it doesn't exist, this is zeroed out.
+//
+// Notes:
+//     * This method only understands the CV_INFO_PDB70 / RSDS format. If the format
+//         changes, this function will act as if there are no debug directory entries.
+//         Module load / unload events will still be fired, but all PDB info will be
+//         zeroed out.
+//     * The raw data in the PE file's debug directory entries are assumed to be
+//         untrusted, and reported sizes of buffers are verified against their data.
+//
+
+static void GetCodeViewInfo(Module * pModule, CV_INFO_PDB70 * pCvInfoIL, CV_INFO_PDB70 * pCvInfoNative)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    _ASSERTE (pModule != NULL);
+    _ASSERTE (pCvInfoIL != NULL);
+    _ASSERTE (pCvInfoNative != NULL);
+
+    ZeroMemory(pCvInfoIL, sizeof(*pCvInfoIL));
+    ZeroMemory(pCvInfoNative, sizeof(*pCvInfoNative));
+
+    PTR_PEFile pPEFile = pModule->GetFile();
+    _ASSERTE(pPEFile != NULL);
+
+    PTR_PEImageLayout pLayout = NULL;
+    if (pPEFile->HasNativeImage())
+    {
+        pLayout = pPEFile->GetLoadedNative();
+    }
+    else if (pPEFile->HasOpenedILimage())
+    {
+        pLayout = pPEFile->GetLoadedIL();
+    }
+
+    if (pLayout == NULL)
+    {
+        // This can happen for reflection-loaded modules
+        return;
+    }
+
+    if (!pLayout->HasNTHeaders())
+    {
+        // Without NT headers, we'll have a tough time finding the debug directory
+        // entries. This can happen for nlp files.
+        return;
+    }
+
+    if (!pLayout->HasDirectoryEntry(IMAGE_DIRECTORY_ENTRY_DEBUG))
+        return;
+
+    COUNT_T cbDebugEntries;
+    IMAGE_DEBUG_DIRECTORY * rgDebugEntries = 
+        (IMAGE_DEBUG_DIRECTORY *) pLayout->GetDirectoryEntryData(IMAGE_DIRECTORY_ENTRY_DEBUG, &cbDebugEntries);
+
+    if (cbDebugEntries < sizeof(IMAGE_DEBUG_DIRECTORY)) 
+        return;
+
+    // Since rgDebugEntries is an array of IMAGE_DEBUG_DIRECTORYs, cbDebugEntries
+    // should be a multiple of sizeof(IMAGE_DEBUG_DIRECTORY).
+    if (cbDebugEntries % sizeof(IMAGE_DEBUG_DIRECTORY) != 0)
+        return;
+
+    // Temporary storage for a CV_INFO_PDB70 and its size (which could be less than
+    // sizeof(CV_INFO_PDB70); see below).
+    struct PdbInfo
+    {
+        CV_INFO_PDB70 *     m_pPdb70;
+        ULONG               m_cbPdb70;
+    };
+
+    // Iterate through all debug directory entries.  The very last one will be the
+    // managed PDB entry.  The next to last one (if it exists) will be the (native) NGEN
+    // PDB entry.  Treat raw bytes we read as untrusted.
+    PdbInfo pdbInfoLast = {0};
+    PdbInfo pdbInfoNextToLast = {0};
+    int cEntries = cbDebugEntries / sizeof(IMAGE_DEBUG_DIRECTORY);
+    for (int i = 0; i < cEntries; i++)
+    {
+        if (rgDebugEntries[i].Type != IMAGE_DEBUG_TYPE_CODEVIEW)
+            continue;
+
+        // Get raw data pointed to by this IMAGE_DEBUG_DIRECTORY
+
+        // Some compilers set PointerToRawData but not AddressOfRawData as they put the
+        // data at the end of the file in an unmapped part of the file
+        RVA rvaOfRawData = (rgDebugEntries[i].AddressOfRawData != NULL) ? 
+            rgDebugEntries[i].AddressOfRawData : 
+            pLayout->OffsetToRva(rgDebugEntries[i].PointerToRawData);
+
+        ULONG cbDebugData = rgDebugEntries[i].SizeOfData;
+        if (cbDebugData < (offsetof(CV_INFO_PDB70, magic) + sizeof(((CV_INFO_PDB70*)0)->magic)))
+        {
+            // raw data too small to contain magic number at expected spot, so its format
+            // is not recognizeable. Skip
+            continue;
+        }
+
+        if (!pLayout->CheckRva(rvaOfRawData, cbDebugData))
+        {
+            // Memory claimed to belong to the raw data does not fit.
+            // IMAGE_DEBUG_DIRECTORY is outright corrupt. Do not include PDB info in
+            // event at all.
+            return;
+        }
+
+        // Verify the magic number is as expected
+        CV_INFO_PDB70 * pPdb70 = (CV_INFO_PDB70 *) pLayout->GetRvaData(rvaOfRawData);
+        if (pPdb70->magic != CV_SIGNATURE_RSDS)
+        {
+            // Unrecognized magic number.  Skip
+            continue;
+        }
+
+        // From this point forward, the format should adhere to the expected layout of
+        // CV_INFO_PDB70. If we find otherwise, then assume the IMAGE_DEBUG_DIRECTORY is
+        // outright corrupt, and do not include PDB info in event at all. The caller will
+        // still fire the module event, but have zeroed-out / empty PDB fields.
+
+        // Verify sane size of raw data
+        if (cbDebugData > sizeof(CV_INFO_PDB70))
+            return;
+
+        // cbDebugData actually can be < sizeof(CV_INFO_PDB70), since the "path" field
+        // can be truncated to its actual data length (i.e., fewer than MAX_PATH chars
+        // may be present in the PE file). In some cases, though, cbDebugData will
+        // include all MAX_PATH chars even though path gets null-terminated well before
+        // the MAX_PATH limit.
+        
+        // Gotta have at least one byte of the path
+        if (cbDebugData < offsetof(CV_INFO_PDB70, path) + sizeof(char))
+            return;
+        
+        // How much space is available for the path?
+        size_t cchPathMaxIncludingNullTerminator = (cbDebugData - offsetof(CV_INFO_PDB70, path)) / sizeof(char);
+        _ASSERTE(cchPathMaxIncludingNullTerminator >= 1);   // Guaranteed above
+
+        // Verify path string fits inside the declared size
+        size_t cchPathActualExcludingNullTerminator = strnlen(pPdb70->path, cchPathMaxIncludingNullTerminator);
+        if (cchPathActualExcludingNullTerminator == cchPathMaxIncludingNullTerminator)
+        {
+            // This is how strnlen indicates failure--it couldn't find the null
+            // terminator within the buffer size specified
+            return;
+        }
+
+        // Looks valid.  Remember it.
+        pdbInfoNextToLast = pdbInfoLast;
+        pdbInfoLast.m_pPdb70 = pPdb70;
+        pdbInfoLast.m_cbPdb70 = cbDebugData;
+    }
+
+    // Return whatever we found
+    
+    if (pdbInfoLast.m_pPdb70 != NULL)
+    {
+        // The last guy is the IL (managed) PDB info
+        _ASSERTE(pdbInfoLast.m_cbPdb70 <= sizeof(*pCvInfoIL));      // Guaranteed by checks above
+        memcpy(pCvInfoIL, pdbInfoLast.m_pPdb70, pdbInfoLast.m_cbPdb70);
+    }
+    
+    if (pdbInfoNextToLast.m_pPdb70 != NULL)
+    {
+        // The next-to-last guy is the NGEN (native) PDB info
+        _ASSERTE(pdbInfoNextToLast.m_cbPdb70 <= sizeof(*pCvInfoNative));      // Guaranteed by checks above
+        memcpy(pCvInfoNative, pdbInfoNextToLast.m_pPdb70, pdbInfoNextToLast.m_cbPdb70);
+    }
+}
+#endif // FEATURE_DTRACE
+
+
+
+//---------------------------------------------------------------------------------------
+//
+// send a module load/unload or rundown event and domainmodule load and rundown event
+//
+// Arguments:
+//      * pModule - Module loading or unloading
+//      * dwEventOptions - Bitmask of which events to fire
+//      * bFireDomainModuleEvents - nonzero if we are to fire DomainModule events; zero
+//          if we are to fire Module events
+//
+void ETW::LoaderLog::SendModuleEvent(Module *pModule, DWORD dwEventOptions, BOOL bFireDomainModuleEvents)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    if(!pModule) 
+        return;
+
+#ifndef FEATURE_DTRACE
+    PCWSTR szDtraceOutput1=L"",szDtraceOutput2=L"";
+#else
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN];
+#endif // !FEATURE_DTRACE
+    BOOL bIsDynamicAssembly = pModule->GetAssembly()->IsDynamic();
+    BOOL bHasNativeImage = FALSE;
+#ifdef FEATURE_PREJIT
+    bHasNativeImage = pModule->HasNativeImage();
+#endif // FEATURE_PREJIT
+    BOOL bIsManifestModule = pModule->IsManifest();
+    ULONGLONG ullAppDomainId = 0; // This is used only with DomainModule events
+    ULONGLONG ullModuleId = (ULONGLONG)(TADDR) pModule;
+    ULONGLONG ullAssemblyId = (ULONGLONG)pModule->GetAssembly();
+    BOOL bIsDomainNeutral = pModule->GetAssembly()->IsDomainNeutral();
+    BOOL bIsIbcOptimized = FALSE;
+    if(bHasNativeImage)
+    {
+        bIsIbcOptimized = pModule->IsIbcOptimized();
+    }
+    ULONG ulReservedFlags = 0;
+    ULONG ulFlags = ((bIsDomainNeutral ? ETW::LoaderLog::LoaderStructs::DomainNeutralModule : 0) |
+                     (bHasNativeImage ? ETW::LoaderLog::LoaderStructs::NativeModule : 0) |
+                     (bIsDynamicAssembly ? ETW::LoaderLog::LoaderStructs::DynamicModule : 0) |
+                     (bIsManifestModule ? ETW::LoaderLog::LoaderStructs::ManifestModule : 0) |
+                     (bIsIbcOptimized ? ETW::LoaderLog::LoaderStructs::IbcOptimized : 0));
+
+#ifndef FEATURE_DTRACE
+    // Grab PDB path, guid, and age for managed PDB and native (NGEN) PDB when
+    // available.  Any failures are not fatal.  The corresponding PDB info will remain
+    // zeroed out, and that's what we'll include in the event.
+    CV_INFO_PDB70 cvInfoIL = {0};
+    CV_INFO_PDB70 cvInfoNative = {0};
+    GetCodeViewInfo(pModule, &cvInfoIL, &cvInfoNative);
+#endif // FEATURE_DTRACE
+
+    PWCHAR ModuleILPath=L"", ModuleNativePath=L"";
+
+    if(bFireDomainModuleEvents)
+    {
+        if(pModule->GetDomain()->IsSharedDomain()) // for shared domains, we do not fire domainmodule event
+            return;
+        ullAppDomainId = (ULONGLONG)pModule->FindDomainAssembly(pModule->GetDomain()->AsAppDomain())->GetAppDomain();
+    }
+
+    LPCWSTR pEmptyString = L"";
+#ifndef FEATURE_PAL
+    SString moduleName = L"";
+#else // !FEATURE_PAL
+    SString moduleName;
+#endif // !FEATURE_PAL    
+    if(!bIsDynamicAssembly)
+    {
+        ModuleILPath = (PWCHAR)pModule->GetAssembly()->GetManifestFile()->GetILimage()->GetPath().GetUnicode();
+        ModuleNativePath = (PWCHAR)pEmptyString;
+
+#ifdef FEATURE_PREJIT
+        if(bHasNativeImage)
+            ModuleNativePath = (PWCHAR)pModule->GetNativeImage()->GetPath().GetUnicode();
+#endif // FEATURE_PREJIT
+    }
+    
+    // if we do not have a module path yet, we put the module name
+    if(bIsDynamicAssembly || ModuleILPath==NULL || wcslen(ModuleILPath) <= 2)
+    {
+        moduleName.SetUTF8(pModule->GetSimpleName());
+        ModuleILPath = (PWCHAR)moduleName.GetUnicode();
+        ModuleNativePath = (PWCHAR)pEmptyString;
+    }
+
+    /* prepare events args for ETW and ETM */
+#ifndef FEATURE_DTRACE
+    szDtraceOutput1 = (PCWSTR)ModuleILPath;
+    szDtraceOutput2 = (PCWSTR)ModuleNativePath;
+
+    // Convert PDB paths to UNICODE
+    StackSString managedPdbPath(SString::Utf8, cvInfoIL.path);
+    StackSString nativePdbPath(SString::Utf8, cvInfoNative.path);
+#else // !FEATURE_DTRACE
+    // since DTrace do not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSizeOfILPath = WideCharToMultiByte(ModuleILPath, szDtraceOutput1);
+    if (nSizeOfILPath == 0)
+        return;
+    INT32 nSizeOfNativePath = WideCharToMultiByte(ModuleNativePath, szDtraceOutput2);
+    if (nSizeOfNativePath == 0)
+        return;
+#endif // !FEATURE_DTRACE
+
+    if(bFireDomainModuleEvents)
+    {
+        if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad)
+        {
+            FireEtwDomainModuleLoad_V1(ullModuleId, ullAssemblyId, ullAppDomainId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId());
+        }
+        else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart)
+        {
+            FireEtwDomainModuleDCStart_V1(ullModuleId, ullAssemblyId, ullAppDomainId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId());
+        }
+        else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd)
+        {
+            FireEtwDomainModuleDCEnd_V1(ullModuleId, ullAssemblyId, ullAppDomainId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId());
+        }
+        else
+        {
+            _ASSERTE((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) || 
+                     (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) || 
+                     (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd));
+        }
+    }
+    else
+    {
+        if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) || (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoad))
+        {
+            FireEtwModuleLoad_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId(), &cvInfoIL.signature, cvInfoIL.age, managedPdbPath, &cvInfoNative.signature, cvInfoNative.age, nativePdbPath);
+        }
+        else if(dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload)
+        {
+            FireEtwModuleUnload_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId(), &cvInfoIL.signature, cvInfoIL.age, managedPdbPath, &cvInfoNative.signature, cvInfoNative.age, nativePdbPath);
+        }
+        else if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) || (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCStart))
+        {
+            FireEtwModuleDCStart_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId(), &cvInfoIL.signature, cvInfoIL.age, managedPdbPath, &cvInfoNative.signature, cvInfoNative.age, nativePdbPath);
+        }
+        else if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) || (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCEnd))
+        {
+            FireEtwModuleDCEnd_V1_or_V2(ullModuleId, ullAssemblyId, ulFlags, ulReservedFlags, szDtraceOutput1, szDtraceOutput2, GetClrInstanceId(), &cvInfoIL.signature, cvInfoIL.age, managedPdbPath, &cvInfoNative.signature, cvInfoNative.age, nativePdbPath);
+        }
+        else
+        {
+            _ASSERTE((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleLoad) || 
+                     (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload) || 
+                     (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) || 
+                     (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) ||
+                     (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeEnabledAny));
+
+        }
+#if !defined(FEATURE_PAL)
+        if (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeEnabledAny)
+        {
+            // Fire ModuleRangeLoad, ModuleRangeDCStart, ModuleRangeDCEnd or ModuleRangeLoadPrivate event for this Module
+            SendModuleRange(pModule, dwEventOptions);
+        }
+#endif
+    }
+}
+
+/*****************************************************************/
+/* This routine is used to send an ETW event just before a method starts jitting*/
+/*****************************************************************/
+void ETW::MethodLog::SendMethodJitStartEvent(MethodDesc *pMethodDesc, SString *namespaceOrClassName, SString *methodName, SString *methodSignature)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    Module *pModule = NULL;
+    Module *pLoaderModule = NULL; // This must not be used except for getting the ModuleID
+
+    ULONGLONG ullMethodIdentifier=0;
+    ULONGLONG ullModuleID=0;
+    ULONG ulMethodToken=0;
+    ULONG ulMethodILSize=0;
+#ifndef FEATURE_DTRACE
+    PCWSTR szDtraceOutput1=L"",szDtraceOutput2=L"",szDtraceOutput3=L"";
+#else
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN];
+    CHAR szDtraceOutput3[DTRACE_OUTPUT_STRING_LEN];
+#endif // !FEATURE_DTRACE
+
+    if(pMethodDesc) {
+        pModule = pMethodDesc->GetModule_NoLogging();
+
+        if(!pMethodDesc->IsRestored()) {
+                return;
+        }
+
+        bool bIsDynamicMethod = pMethodDesc->IsDynamicMethod();
+        BOOL bIsGenericMethod = FALSE;
+        if(pMethodDesc->GetMethodTable_NoLogging())
+            bIsGenericMethod = pMethodDesc->HasClassOrMethodInstantiation_NoLogging();
+
+        ullModuleID = (ULONGLONG)(TADDR) pModule;
+        ullMethodIdentifier = (ULONGLONG)pMethodDesc;
+
+        // Use MethodDesc if Dynamic or Generic methods
+        if( bIsDynamicMethod || bIsGenericMethod)
+        {
+            if(bIsGenericMethod)
+                ulMethodToken = (ULONG)pMethodDesc->GetMemberDef_NoLogging();
+            if(bIsDynamicMethod) // if its a generic and a dynamic method, we would set the methodtoken to 0
+                ulMethodToken = (ULONG)0;
+        }
+        else
+            ulMethodToken = (ULONG)pMethodDesc->GetMemberDef_NoLogging();
+
+        if(pMethodDesc->IsIL())
+        {
+            COR_ILMETHOD_DECODER::DecoderStatus decoderstatus = COR_ILMETHOD_DECODER::FORMAT_ERROR;
+            COR_ILMETHOD_DECODER ILHeader(pMethodDesc->GetILHeader(), pMethodDesc->GetMDImport(), &decoderstatus);
+            ulMethodILSize = (ULONG)ILHeader.GetCodeSize();
+        }
+
+        SString tNamespace, tMethodName, tMethodSignature;
+        if(!namespaceOrClassName|| !methodName|| !methodSignature || (methodName->IsEmpty() && namespaceOrClassName->IsEmpty() && methodSignature->IsEmpty()))
+        {
+            pMethodDesc->GetMethodInfo(tNamespace, tMethodName, tMethodSignature);
+            namespaceOrClassName = &tNamespace; 
+            methodName = &tMethodName;
+            methodSignature = &tMethodSignature;
+        }
+
+        // fire method information
+        /* prepare events args for ETW and ETM */
+#ifndef FEATURE_DTRACE
+        szDtraceOutput1 = (PCWSTR)namespaceOrClassName->GetUnicode();
+        szDtraceOutput2 = (PCWSTR)methodName->GetUnicode();
+        szDtraceOutput3 = (PCWSTR)methodSignature->GetUnicode();
+#else // !FEATURE_DTRACE
+        // since DTrace do not support UNICODE string, they need to be converted to ANSI string
+        INT32 nSizeOfNamespaceOrClassName = WideCharToMultiByte((PCWSTR)namespaceOrClassName->GetUnicode(), szDtraceOutput1);
+        if (nSizeOfNamespaceOrClassName == 0)
+            return;
+        INT32 nSizeOfMethodName = WideCharToMultiByte((PCWSTR)methodName->GetUnicode(), szDtraceOutput2);
+        if (nSizeOfMethodName == 0)
+            return;
+        INT32 nSizeMethodsignature = WideCharToMultiByte((PCWSTR)methodSignature->GetUnicode(), szDtraceOutput3);
+        if (nSizeMethodsignature == 0)
+            return;
+#endif // !FEATURE_DTRACE
+
+        FireEtwMethodJittingStarted_V1(ullMethodIdentifier, 
+                                       ullModuleID, 
+                                       ulMethodToken, 
+                                       ulMethodILSize, 
+                                       szDtraceOutput1,
+                                       szDtraceOutput2,
+                                       szDtraceOutput3,
+                                       GetClrInstanceId());
+    }
+}
+
+/****************************************************************************/
+/* This routine is used to send a method load/unload or rundown event                              */
+/****************************************************************************/
+void ETW::MethodLog::SendMethodEvent(MethodDesc *pMethodDesc, DWORD dwEventOptions, BOOL bIsJit, SString *namespaceOrClassName, SString *methodName, SString *methodSignature, SIZE_T pCode, ReJITID rejitID)
+{
+    CONTRACTL {
+        THROWS;
+        GC_NOTRIGGER;
+        SO_NOT_MAINLINE;
+    } CONTRACTL_END;
+
+    Module *pModule = NULL;
+    Module *pLoaderModule = NULL; // This must not be used except for getting the ModuleID
+    ULONGLONG ullMethodStartAddress=0, ullColdMethodStartAddress=0, ullModuleID=0, ullMethodIdentifier=0;
+    ULONG ulMethodSize=0, ulColdMethodSize=0, ulMethodToken=0, ulMethodFlags=0, ulColdMethodFlags=0;
+    PWCHAR pMethodName=NULL, pNamespaceName=NULL, pMethodSignature=NULL;
+    BOOL bHasNativeImage = FALSE, bShowVerboseOutput = FALSE, bIsDynamicMethod = FALSE, bHasSharedGenericCode = FALSE, bIsGenericMethod = FALSE;
+#ifndef FEATURE_DTRACE
+    PCWSTR szDtraceOutput1=L"",szDtraceOutput2=L"",szDtraceOutput3=L"";
+#else
+    CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+    CHAR szDtraceOutput2[DTRACE_OUTPUT_STRING_LEN];
+    CHAR szDtraceOutput3[DTRACE_OUTPUT_STRING_LEN];
+#endif // !FEATURE_DTRACE
+    
+    BOOL bIsRundownProvider = ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodDCStart) ||
+                               (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodDCEnd) ||
+                               (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCStart) ||
+                               (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCEnd));
+
+    BOOL bIsRuntimeProvider = ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoad) ||
+                               (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnload) ||
+                               (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad) ||
+                               (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodUnload));
+
+    if (pMethodDesc == NULL)
+        return;
+
+    if(!pMethodDesc->IsRestored()) 
+    {
+        // Forcibly restoring ngen methods can cause all sorts of deadlocks and contract violations
+        // These events are therefore put under the private provider
+        if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, 
+                                        TRACE_LEVEL_INFORMATION, 
+                                        CLR_PRIVATENGENFORCERESTORE_KEYWORD))
+        {
+            PERMANENT_CONTRACT_VIOLATION(GCViolation, ReasonNonShippingCode);
+            pMethodDesc->CheckRestore();
+        }
+        else
+        {
+            return;
+        }
+    }
+
+
+    if(bIsRundownProvider)
+    {
+        bShowVerboseOutput = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_Context, 
+            TRACE_LEVEL_VERBOSE, 
+            KEYWORDZERO);        
+    }
+    else if(bIsRuntimeProvider)
+    {
+        bShowVerboseOutput = ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+            TRACE_LEVEL_VERBOSE, 
+            KEYWORDZERO);
+    }
+
+    pModule = pMethodDesc->GetModule_NoLogging();
+#ifdef FEATURE_PREJIT
+    bHasNativeImage = pModule->HasNativeImage();
+#endif // FEATURE_PREJIT
+    bIsDynamicMethod = (BOOL)pMethodDesc->IsDynamicMethod();
+    bHasSharedGenericCode = pMethodDesc->IsSharedByGenericInstantiations();
+
+    if(pMethodDesc->GetMethodTable_NoLogging())
+        bIsGenericMethod = pMethodDesc->HasClassOrMethodInstantiation_NoLogging();
+
+    ulMethodFlags = ((ulMethodFlags |
+        (bHasSharedGenericCode ? ETW::MethodLog::MethodStructs::SharedGenericCode : 0) |
+        (bIsGenericMethod ? ETW::MethodLog::MethodStructs::GenericMethod : 0) |
+        (bIsDynamicMethod ? ETW::MethodLog::MethodStructs::DynamicMethod : 0) |
+        (bIsJit ? ETW::MethodLog::MethodStructs::JittedMethod : 0)));
+
+    // Intentionally set the extent flags (cold vs. hot) only after all the other common
+    // flags (above) have been set.
+    ulColdMethodFlags = ulMethodFlags | ETW::MethodLog::MethodStructs::ColdSection; // Method Extent (bits 28, 29, 30, 31)
+    ulMethodFlags = ulMethodFlags | ETW::MethodLog::MethodStructs::HotSection;         // Method Extent (bits 28, 29, 30, 31)
+
+    // MethodDesc ==> Code Address ==>JitMananger 
+    TADDR start = pCode ? pCode : PCODEToPINSTR(pMethodDesc->GetNativeCode());
+    if(start == 0) {
+        // this method hasn't been jitted
+        return;
+    }
+
+    // EECodeInfo is technically initialized by a "PCODE", but it can also be initialized
+    // by a TADDR (i.e., w/out thumb bit set on ARM)
+    EECodeInfo codeInfo(start);
+
+    // MethodToken ==> MethodRegionInfo
+    IJitManager::MethodRegionInfo methodRegionInfo;
+    codeInfo.GetMethodRegionInfo(&methodRegionInfo);
+
+    ullMethodStartAddress = (ULONGLONG)methodRegionInfo.hotStartAddress;
+    ulMethodSize = (ULONG)methodRegionInfo.hotSize;
+
+    ullModuleID = (ULONGLONG)(TADDR) pModule;
+    ullMethodIdentifier = (ULONGLONG)pMethodDesc;
+
+    // Use MethodDesc if Dynamic or Generic methods
+    if( bIsDynamicMethod || bIsGenericMethod)
+    {
+        bShowVerboseOutput = TRUE;
+        if(bIsGenericMethod)
+            ulMethodToken = (ULONG)pMethodDesc->GetMemberDef_NoLogging();
+        if(bIsDynamicMethod) // if its a generic and a dynamic method, we would set the methodtoken to 0
+            ulMethodToken = (ULONG)0;
+    }
+    else
+        ulMethodToken = (ULONG)pMethodDesc->GetMemberDef_NoLogging();
+
+    if(bHasNativeImage)
+    {
+        ullColdMethodStartAddress = (ULONGLONG)methodRegionInfo.coldStartAddress;
+        ulColdMethodSize = (ULONG)methodRegionInfo.coldSize; // methodRegionInfo.coldSize is size_t and info.MethodLoadInfo.MethodSize is 32 bit; will give incorrect values on a 64-bit machine
+    }
+
+    SString tNamespace, tMethodName, tMethodSignature;
+
+    // if verbose method load info needed, only then 
+    // find method name and signature and fire verbose method load info
+    if(bShowVerboseOutput) 
+    {
+        if(!namespaceOrClassName|| !methodName|| !methodSignature || (methodName->IsEmpty() && namespaceOrClassName->IsEmpty() && methodSignature->IsEmpty()))
+        {
+            pMethodDesc->GetMethodInfo(tNamespace, tMethodName, tMethodSignature);
+            namespaceOrClassName = &tNamespace; 
+            methodName = &tMethodName;
+            methodSignature = &tMethodSignature;
+        }
+        pNamespaceName = (PWCHAR)namespaceOrClassName->GetUnicode();
+        pMethodName = (PWCHAR)methodName->GetUnicode();
+        pMethodSignature = (PWCHAR)methodSignature->GetUnicode();
+    }
+
+    BOOL bFireEventForColdSection = (bHasNativeImage && ullColdMethodStartAddress && ulColdMethodSize);
+
+    /* prepare events args for ETW and ETM */
+#ifndef FEATURE_DTRACE
+    szDtraceOutput1 = (PCWSTR)pNamespaceName;
+    szDtraceOutput2 = (PCWSTR)pMethodName;
+    szDtraceOutput3 = (PCWSTR)pMethodSignature;
+#else // !FEATURE_DTRACE
+    // since DTrace do not support UNICODE string, they need to be converted to ANSI string
+    INT32 nSizeTempNamespaceName = WideCharToMultiByte(pNamespaceName, szDtraceOutput1);
+    if (nSizeTempNamespaceName == 0)
+        return;
+    INT32 nSizeTempMethodName = WideCharToMultiByte(pMethodName, szDtraceOutput2);
+    if (nSizeTempMethodName == 0)
+        return;
+    INT32 nSizeMothodSignature = WideCharToMultiByte(pMethodSignature, szDtraceOutput3);
+    if (nSizeMothodSignature == 0)
+        return; 
+#endif // !FEATURE_DTRACE
+
+    if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoad) ||
+        (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad))
+    {
+        if(bShowVerboseOutput)
+        {
+            FireEtwMethodLoadVerbose_V1_or_V2(ullMethodIdentifier, 
+                ullModuleID, 
+                ullMethodStartAddress, 
+                ulMethodSize, 
+                ulMethodToken, 
+                ulMethodFlags, 
+                szDtraceOutput1, 
+                szDtraceOutput2, 
+                szDtraceOutput3, 
+                GetClrInstanceId(),
+                rejitID);
+        }
+        else
+        {
+            FireEtwMethodLoad_V1_or_V2(ullMethodIdentifier, 
+                ullModuleID, 
+                ullMethodStartAddress, 
+                ulMethodSize, 
+                ulMethodToken, 
+                ulMethodFlags, 
+                GetClrInstanceId(),
+                rejitID);
+        }
+        if(bFireEventForColdSection)
+        {
+            if(bShowVerboseOutput)
+            {
+                FireEtwMethodLoadVerbose_V1_or_V2(ullMethodIdentifier, 
+                    ullModuleID, 
+                    ullColdMethodStartAddress, 
+                    ulColdMethodSize, 
+                    ulMethodToken, 
+                    ulColdMethodFlags, 
+                    szDtraceOutput1, 
+                    szDtraceOutput2, 
+                    szDtraceOutput3, 
+                    GetClrInstanceId(),
+                    rejitID);
+            }
+            else
+            {
+                FireEtwMethodLoad_V1_or_V2(ullMethodIdentifier, 
+                    ullModuleID, 
+                    ullColdMethodStartAddress, 
+                    ulColdMethodSize, 
+                    ulMethodToken, 
+                    ulColdMethodFlags, 
+                    GetClrInstanceId(),
+                    rejitID);
+            }
+        }
+    }
+    else if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnload) ||
+        (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodUnload))
+    {
+        if(bShowVerboseOutput)
+        {
+            FireEtwMethodUnloadVerbose_V1_or_V2(ullMethodIdentifier, 
+                ullModuleID, 
+                ullMethodStartAddress, 
+                ulMethodSize, 
+                ulMethodToken, 
+                ulMethodFlags, 
+                szDtraceOutput1, 
+                szDtraceOutput2, 
+                szDtraceOutput3, 
+                GetClrInstanceId(),
+                rejitID);
+        }
+        else
+        {
+            FireEtwMethodUnload_V1_or_V2(ullMethodIdentifier, 
+                ullModuleID, 
+                ullMethodStartAddress, 
+                ulMethodSize, 
+                ulMethodToken, 
+                ulMethodFlags, 
+                GetClrInstanceId(),
+                rejitID);
+        }
+        if(bFireEventForColdSection)
+        {
+            if(bShowVerboseOutput)
+            {
+                FireEtwMethodUnloadVerbose_V1_or_V2(ullMethodIdentifier, 
+                    ullModuleID, 
+                    ullColdMethodStartAddress, 
+                    ulColdMethodSize, 
+                    ulMethodToken, 
+                    ulColdMethodFlags, 
+                    szDtraceOutput1, 
+                    szDtraceOutput2, 
+                    szDtraceOutput3, 
+                    GetClrInstanceId(),
+                    rejitID);
+            }
+            else
+            {
+                FireEtwMethodUnload_V1_or_V2(ullMethodIdentifier, 
+                    ullModuleID, 
+                    ullColdMethodStartAddress, 
+                    ulColdMethodSize, 
+                    ulMethodToken, 
+                    ulColdMethodFlags, 
+                    GetClrInstanceId(),
+                    rejitID);
+            }
+        }
+    }
+    else if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodDCStart) ||
+        (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCStart))
+    {
+        if(bShowVerboseOutput)
+        {
+            FireEtwMethodDCStartVerbose_V1_or_V2(ullMethodIdentifier, 
+                ullModuleID, 
+                ullMethodStartAddress, 
+                ulMethodSize, 
+                ulMethodToken, 
+                ulMethodFlags, 
+                szDtraceOutput1, 
+                szDtraceOutput2, 
+                szDtraceOutput3, 
+                GetClrInstanceId(),
+                rejitID);
+        }
+        else
+        {
+            FireEtwMethodDCStart_V1_or_V2(ullMethodIdentifier, 
+                ullModuleID, 
+                ullMethodStartAddress, 
+                ulMethodSize, 
+                ulMethodToken, 
+                ulMethodFlags, 
+                GetClrInstanceId(),
+                rejitID);
+        }
+        if(bFireEventForColdSection)
+        {
+            if(bShowVerboseOutput)
+            {
+                FireEtwMethodDCStartVerbose_V1_or_V2(ullMethodIdentifier, 
+                    ullModuleID, 
+                    ullColdMethodStartAddress, 
+                    ulColdMethodSize, 
+                    ulMethodToken, 
+                    ulColdMethodFlags, 
+                    szDtraceOutput1, 
+                    szDtraceOutput2, 
+                    szDtraceOutput3, 
+                    GetClrInstanceId(),
+                    rejitID);
+            }
+            else
+            {
+                FireEtwMethodDCStart_V1_or_V2(ullMethodIdentifier, 
+                    ullModuleID, 
+                    ullColdMethodStartAddress, 
+                    ulColdMethodSize, 
+                    ulMethodToken, 
+                    ulColdMethodFlags, 
+                    GetClrInstanceId(),
+                    rejitID);
+            }
+        }
+    }
+    else if((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodDCEnd) ||
+        (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCEnd))
+    {
+        if(bShowVerboseOutput)
+        {
+            FireEtwMethodDCEndVerbose_V1_or_V2(ullMethodIdentifier, 
+                ullModuleID, 
+                ullMethodStartAddress, 
+                ulMethodSize, 
+                ulMethodToken, 
+                ulMethodFlags, 
+                szDtraceOutput1, 
+                szDtraceOutput2, 
+                szDtraceOutput3, 
+                GetClrInstanceId(),
+                rejitID);
+        }
+        else
+        {
+            FireEtwMethodDCEnd_V1_or_V2(ullMethodIdentifier, 
+                ullModuleID, 
+                ullMethodStartAddress, 
+                ulMethodSize, 
+                ulMethodToken, 
+                ulMethodFlags, 
+                GetClrInstanceId(),
+                rejitID);
+        }
+        if(bFireEventForColdSection)
+        {
+            if(bShowVerboseOutput)
+            {
+                FireEtwMethodDCEndVerbose_V1_or_V2(ullMethodIdentifier, 
+                    ullModuleID, 
+                    ullColdMethodStartAddress, 
+                    ulColdMethodSize, 
+                    ulMethodToken, 
+                    ulColdMethodFlags, 
+                    szDtraceOutput1, 
+                    szDtraceOutput2, 
+                    szDtraceOutput3, 
+                    GetClrInstanceId(),
+                    rejitID);
+            }
+            else
+            {
+                FireEtwMethodDCEnd_V1_or_V2(ullMethodIdentifier, 
+                    ullModuleID, 
+                    ullColdMethodStartAddress, 
+                    ulColdMethodSize, 
+                    ulMethodToken, 
+                    ulColdMethodFlags, 
+                    GetClrInstanceId(),
+                    rejitID);
+            }
+        }
+    }
+    else
+    {
+        _ASSERTE((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoad) ||
+            (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnload) ||
+            (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodDCStart) ||
+            (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodDCEnd) ||
+            (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad) ||
+            (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodUnload) ||
+            (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCStart) ||
+            (dwEventOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCEnd));
+    }
+}
+
+// This event cannot be supported yet on coreclr, since Silverlight needs to support
+// XP, and this event uses a format (dynamic-sized arrays) only supported by the
+// Vista+ Crimson event format. So stub out the whole function to a no-op on pre-Vista
+// platforms.
+#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT
+//---------------------------------------------------------------------------------------
+//
+// Fires the IL-to-native map event for JITted methods.  This is used for the runtime,
+// rundown start, and rundown end events that include the il-to-native map information
+//
+// Arguments:
+//      pMethodDesc - MethodDesc for which we'll fire the map event
+//      dwEventOptions - Options that tells us, in the rundown case, whether we're
+//                       supposed to fire the start or end rundown events.
+//
+
+// static
+void ETW::MethodLog::SendMethodILToNativeMapEvent(MethodDesc * pMethodDesc, DWORD dwEventOptions, ReJITID rejitID)
+{
+    CONTRACTL
+    {
+        THROWS;
+        GC_NOTRIGGER;
+        SO_NOT_MAINLINE;
+    }
+    CONTRACTL_END;
+
+    // This is the limit on how big the il-to-native map can get, as measured by number
+    // of entries in each parallel array (IL offset array and native offset array). 
+    // This number was chosen to ensure the overall event stays under the Windows limit
+    // of 64K
+    const USHORT kMapEntriesMax = 7000;
+
+    if (pMethodDesc == NULL)
+        return;
+
+    if (pMethodDesc->HasClassOrMethodInstantiation() && pMethodDesc->IsTypicalMethodDefinition())
+        return;
+
+    // g_pDebugInterface is initialized on startup on desktop CLR, regardless of whether a debugger
+    // or profiler is loaded.  So it should always be available.
+    _ASSERTE(g_pDebugInterface != NULL);
+
+    ULONGLONG ullMethodIdentifier = (ULONGLONG)pMethodDesc;
+
+    USHORT cMap;
+    NewArrayHolder<UINT> rguiILOffset;
+    NewArrayHolder<UINT> rguiNativeOffset;
+
+    HRESULT hr = g_pDebugInterface->GetILToNativeMappingIntoArrays(
+        pMethodDesc,
+        kMapEntriesMax,
+        &cMap,
+        &rguiILOffset,
+        &rguiNativeOffset);
+    if (FAILED(hr))
+        return;
+
+    // Runtime provider.
+    // 
+    // This macro already checks for the JittedMethodILToNativeMapKeyword before
+    // choosing to fire the event
+    if ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodILToNativeMap) != 0)
+    {
+        FireEtwMethodILToNativeMap(
+            ullMethodIdentifier, 
+            rejitID,
+            0,          // Extent:  This event is only sent for JITted (not NGENd) methods, and
+            //          currently there is only one extent (hot) for JITted methods.
+            cMap,
+            rguiILOffset,
+            rguiNativeOffset,
+            GetClrInstanceId());
+    }
+
+    // Rundown provider
+    // 
+    // These macros already check for the JittedMethodILToNativeMapRundownKeyword
+    // before choosing to fire the event--we further check our options to see if we
+    // should fire the Start and / or End flavor of the event (since the keyword alone
+    // is insufficient to distinguish these).
+    // 
+    // (for an explanation of the parameters see the FireEtwMethodILToNativeMap call above)
+    if ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::MethodDCStartILToNativeMap) != 0)
+        FireEtwMethodDCStartILToNativeMap(ullMethodIdentifier, 0, 0, cMap, rguiILOffset, rguiNativeOffset, GetClrInstanceId());
+    if ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::MethodDCEndILToNativeMap) != 0)
+        FireEtwMethodDCEndILToNativeMap(ullMethodIdentifier, 0, 0, cMap, rguiILOffset, rguiNativeOffset, GetClrInstanceId());
+}
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+
+
+void ETW::MethodLog::SendHelperEvent(ULONGLONG ullHelperStartAddress, ULONG ulHelperSize, LPCWSTR pHelperName)
+{
+    WRAPPER_NO_CONTRACT;
+    if(pHelperName)
+    {
+#ifndef FEATURE_DTRACE
+         PCWSTR szDtraceOutput1=L"";
+#else
+         CHAR szDtraceOutput1[DTRACE_OUTPUT_STRING_LEN];
+#endif // !FEATURE_DTRACE
+         ULONG methodFlags = ETW::MethodLog::MethodStructs::JitHelperMethod; // helper flag set
+#ifndef FEATURE_DTRACE
+         FireEtwMethodLoadVerbose_V1(ullHelperStartAddress, 
+                                     0, 
+                                     ullHelperStartAddress, 
+                                     ulHelperSize, 
+                                     0, 
+                                     methodFlags, 
+                                     NULL, 
+                                     pHelperName, 
+                                     NULL, 
+                                     GetClrInstanceId());
+#else // !FEATURE_DTRACE
+         // since DTrace do not support UNICODE string, they need to be converted to ANSI string
+         INT32 nTempHelperName = WideCharToMultiByte(pHelperName, szDtraceOutput1);
+         if (nTempHelperName == 0)
+             return;
+         // in the action, printf, of DTtrace, it cannot print an arg with value NULL when the format is set %s.
+         // Dtrace does not provide the condition statement so that we give a string "NULL" to it.
+         FireEtwMethodLoadVerbose_V1(ullHelperStartAddress, 
+                                     0, 
+                                     ullHelperStartAddress, 
+                                     ulHelperSize, 
+                                     0, 
+                                     methodFlags, 
+                                     szDtraceOutputNULL, 
+                                     szDtraceOutput1, 
+                                     szDtraceOutputNULL, 
+                                     GetClrInstanceId());
+#endif // !FEATURE_DTRACE
+    }
+}
+
+
+/****************************************************************************/
+/* This routine sends back method events of type 'dwEventOptions', for all 
+   NGEN methods in pModule */
+/****************************************************************************/
+void ETW::MethodLog::SendEventsForNgenMethods(Module *pModule, DWORD dwEventOptions)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+#ifdef FEATURE_PREJIT
+    if(!pModule || !pModule->HasNativeImage())
+        return;
+
+    MethodIterator mi(pModule);
+
+    while(mi.Next())
+    {
+        MethodDesc *hotDesc = (MethodDesc *)mi.GetMethodDesc();
+        ETW::MethodLog::SendMethodEvent(hotDesc, dwEventOptions, FALSE);
+    }
+#endif // FEATURE_PREJIT
+}
+
+/****************************************************************************/
+/* This routine sends back method events of type 'dwEventOptions', for all 
+   JITed methods in either a given LoaderAllocator (if pLoaderAllocatorFilter is non NULL) 
+   or in a given Domain (if pDomainFilter is non NULL) or for
+   all methods (if both filters are null) */ 
+/****************************************************************************/
+void ETW::MethodLog::SendEventsForJitMethods(BaseDomain *pDomainFilter, LoaderAllocator *pLoaderAllocatorFilter, DWORD dwEventOptions)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+#if !defined(FEATURE_PAL) && !defined(DACCESS_COMPILE)
+
+    // This is only called for JITted methods loading xor unloading
+    BOOL fLoadOrDCStart = ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoadOrDCStartAny) != 0);
+    BOOL fUnloadOrDCEnd = ((dwEventOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnloadOrDCEndAny) != 0);
+    _ASSERTE((fLoadOrDCStart || fUnloadOrDCEnd) && !(fLoadOrDCStart && fUnloadOrDCEnd));
+
+    BOOL fSendMethodEvent =
+        (dwEventOptions & 
+            (ETW::EnumerationLog::EnumerationStructs::JitMethodLoad |
+            ETW::EnumerationLog::EnumerationStructs::JitMethodDCStart |
+            ETW::EnumerationLog::EnumerationStructs::JitMethodUnload |
+            ETW::EnumerationLog::EnumerationStructs::JitMethodDCEnd)) != 0;
+
+    BOOL fSendILToNativeMapEvent =
+        (dwEventOptions & 
+            (ETW::EnumerationLog::EnumerationStructs::MethodDCStartILToNativeMap |
+            ETW::EnumerationLog::EnumerationStructs::MethodDCEndILToNativeMap)) != 0;
+
+    BOOL fCollectibleLoaderAllocatorFilter = 
+        ((pLoaderAllocatorFilter != NULL) && (pLoaderAllocatorFilter->IsCollectible()));
+#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT
+    if (fSendILToNativeMapEvent)
+    {
+        // The call to SendMethodILToNativeMapEvent assumes that the debugger's lazy
+        // data has already been initialized, to ensure we don't try to do the lazy init
+        // while under the implicit, notrigger CodeHeapIterator lock below.
+
+        // g_pDebugInterface is initialized on startup on desktop CLR, regardless of whether a debugger
+        // or profiler is loaded.  So it should always be available.
+        _ASSERTE(g_pDebugInterface != NULL);
+        g_pDebugInterface->InitializeLazyDataIfNecessary();
+    }
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+
+    EEJitManager::CodeHeapIterator heapIterator(pDomainFilter, pLoaderAllocatorFilter);
+    while(heapIterator.Next())
+    {
+        MethodDesc * pMD = heapIterator.GetMethod();
+        if (pMD == NULL)
+            continue;
+
+        TADDR codeStart = heapIterator.GetMethodCode();
+        
+        // Grab rejitID from the rejit manager. Short-circuit the call if we're filtering
+        // by a collectible loader allocator, since rejit is not supported on RefEmit
+        // assemblies.  This also allows us to avoid having to pre-enter the rejit
+        // manager locks (which we have to do when filtering by domain; see
+        // code:#TableLockHolder).
+        ReJITID rejitID = 
+            fCollectibleLoaderAllocatorFilter ?
+            0 :
+            pMD->GetReJitManager()->GetReJitIdNoLock(pMD, codeStart);
+
+        // There are small windows of time where the heap iterator may come across a
+        // codeStart that is not yet published to the MethodDesc. This may happen if
+        // we're JITting the method right now on another thread, and have not completed
+        // yet. Detect the race, and skip the method if appropriate. (If rejitID is
+        // nonzero, there is no race, as GetReJitIdNoLock will not return a nonzero
+        // rejitID if the codeStart has not yet been published for that rejitted version
+        // of the method.) This check also catches recompilations due to EnC, which we do
+        // not want to issue events for, in order to ensure xperf's assumption that
+        // MethodDesc* + ReJITID + extent (hot vs. cold) form a unique key for code
+        // ranges of methods
+        if ((rejitID == 0) && (codeStart != PCODEToPINSTR(pMD->GetNativeCode())))
+            continue;
+
+        // When we're called to announce loads, then the methodload event itself must
+        // precede any supplemental events, so that the method load or method jitting
+        // event is the first event the profiler sees for that MethodID (and not, say,
+        // the MethodILToNativeMap event.)
+        if (fLoadOrDCStart)
+        {
+            if (fSendMethodEvent)
+            {
+                ETW::MethodLog::SendMethodEvent(
+                    pMD, 
+                    dwEventOptions, 
+                    TRUE,           // bIsJit
+                    NULL,           // namespaceOrClassName
+                    NULL,           // methodName
+                    NULL,           // methodSignature
+                    codeStart,
+                    rejitID);
+            }
+        }
+
+        // Send any supplemental events requested for this MethodID
+#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT
+        if (fSendILToNativeMapEvent)
+            ETW::MethodLog::SendMethodILToNativeMapEvent(pMD, dwEventOptions, rejitID);
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+
+        // When we're called to announce unloads, then the methodunload event itself must
+        // come after any supplemental events, so that the method unload event is the
+        // last event the profiler sees for this MethodID
+        if (fUnloadOrDCEnd)
+        {
+            if (fSendMethodEvent)
+            {
+                ETW::MethodLog::SendMethodEvent(
+                    pMD, 
+                    dwEventOptions, 
+                    TRUE,           // bIsJit
+                    NULL,           // namespaceOrClassName
+                    NULL,           // methodName
+                    NULL,           // methodSignature
+                    codeStart,
+                    rejitID);
+            }
+        }
+    }
+#endif // !FEATURE_PAL && !DACCESS_COMPILE
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Wrapper around IterateDomain, which locks the AppDomain to be <
+// STAGE_FINALIZED until the iteration is complete.
+//
+// Arguments:
+//      pAppDomain - AppDomain to iterate
+//      enumerationOptions - Flags indicating what to enumerate.  Just passed
+//         straight through to IterateDomain
+//
+void ETW::EnumerationLog::IterateAppDomain(AppDomain * pAppDomain, DWORD enumerationOptions)
+{
+    CONTRACTL 
+    {
+        THROWS;
+        GC_TRIGGERS;
+        PRECONDITION(pAppDomain != NULL);
+    }
+    CONTRACTL_END;
+
+    // Hold the system domain lock during the entire iteration, so we can
+    // ensure the App Domain does not get finalized until we're all done
+    SystemDomain::LockHolder lh;
+
+    if (pAppDomain->IsFinalized())
+    {
+        return; 
+    }
+
+    // Since we're not FINALIZED yet, the handle table should remain intact,
+    // as should all type information in this AppDomain
+    _ASSERTE(!pAppDomain->NoAccessToHandleTable());
+
+    // Now it's safe to do the iteration
+    IterateDomain(pAppDomain, enumerationOptions);
+
+    // Since we're holding the system domain lock, the AD type info should be
+    // there throughout the entire iteration we just did
+    _ASSERTE(!pAppDomain->NoAccessToHandleTable());
+}
+
+/********************************************************************************/
+/* This routine fires ETW events for 
+   Domain, 
+   Assemblies in them, 
+   DomainModule's in them, 
+   Modules in them, 
+   JIT methods in them,
+   and the NGEN methods in them
+   based on enumerationOptions.*/
+/********************************************************************************/
+void ETW::EnumerationLog::IterateDomain(BaseDomain *pDomain, DWORD enumerationOptions)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+        PRECONDITION(pDomain != NULL);
+    } CONTRACTL_END;
+    
+#if defined(_DEBUG) && !defined(DACCESS_COMPILE)
+    // Do not call IterateDomain() directly with an AppDomain.  Use
+    // IterateAppDomain(), whch wraps this function with a hold on the
+    // SystemDomain lock, which ensures pDomain's type data doesn't disappear
+    // on us.
+    if (pDomain->IsAppDomain())
+    {
+        _ASSERTE(SystemDomain::IsUnderDomainLock());
+    }
+#endif // defined(_DEBUG) && !defined(DACCESS_COMPILE)
+
+    EX_TRY
+    {    
+        // DC Start events for Domain
+        if(enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart)
+        {
+            ETW::LoaderLog::SendDomainEvent(pDomain, enumerationOptions);
+        }
+    
+        // DC End or Unload Jit Method events
+        if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnloadOrDCEndAny)
+        {
+            ETW::MethodLog::SendEventsForJitMethods(pDomain, NULL, enumerationOptions);
+        }
+    
+        if (pDomain->IsAppDomain())
+        {
+            AppDomain::AssemblyIterator assemblyIterator = pDomain->AsAppDomain()->IterateAssembliesEx(
+                (AssemblyIterationFlags)(kIncludeLoaded | kIncludeExecution));
+            CollectibleAssemblyHolder<DomainAssembly *> pDomainAssembly;
+            while (assemblyIterator.Next(pDomainAssembly.This()))
+            {
+                CollectibleAssemblyHolder<Assembly *> pAssembly = pDomainAssembly->GetLoadedAssembly();
+                BOOL bIsDomainNeutral = pAssembly->IsDomainNeutral();
+                if (bIsDomainNeutral)
+                    continue;
+                if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart)
+                {
+                    ETW::EnumerationLog::IterateAssembly(pAssembly, enumerationOptions);
+                }
+                
+                DomainModuleIterator domainModuleIterator = pDomainAssembly->IterateModules(kModIterIncludeLoaded);
+                while (domainModuleIterator.Next()) 
+                {
+                    Module * pModule = domainModuleIterator.GetModule();
+                    ETW::EnumerationLog::IterateModule(pModule, enumerationOptions);
+                }
+
+                if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) ||
+                   (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload))
+                {
+                    ETW::EnumerationLog::IterateAssembly(pAssembly, enumerationOptions);
+                }
+            }
+        }
+        else
+        {
+            SharedDomain::SharedAssemblyIterator sharedDomainIterator;
+            while (sharedDomainIterator.Next())
+            {
+                Assembly * pAssembly = sharedDomainIterator.GetAssembly();
+                if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart)
+                {
+                    ETW::EnumerationLog::IterateAssembly(pAssembly, enumerationOptions);
+                }
+
+                ModuleIterator domainModuleIterator = pAssembly->IterateModules();
+                while (domainModuleIterator.Next()) 
+                {
+                    Module * pModule = domainModuleIterator.GetModule();
+                    ETW::EnumerationLog::IterateModule(pModule, enumerationOptions);
+                }
+
+                if ((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) || 
+                    (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload))
+                {
+                    ETW::EnumerationLog::IterateAssembly(pAssembly, enumerationOptions);
+                }
+            }
+        }
+        
+        // DC Start or Load Jit Method events
+        if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoadOrDCStartAny)
+        {
+            ETW::MethodLog::SendEventsForJitMethods(pDomain, NULL, enumerationOptions);
+        }
+    
+        // DC End or Unload events for Domain
+        if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) || 
+           (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload))
+        {
+            ETW::LoaderLog::SendDomainEvent(pDomain, enumerationOptions);
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+
+/********************************************************************************/
+/* This routine fires ETW events for 
+   Assembly in LoaderAllocator, 
+   DomainModule's in them, 
+   Modules in them, 
+   JIT methods in them,
+   and the NGEN methods in them
+   based on enumerationOptions.*/
+/********************************************************************************/
+void ETW::EnumerationLog::IterateCollectibleLoaderAllocator(AssemblyLoaderAllocator *pLoaderAllocator, DWORD enumerationOptions)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+        PRECONDITION(pLoaderAllocator != NULL);
+    } CONTRACTL_END;
+    
+    EX_TRY
+    {    
+        // Unload Jit Method events
+        if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnload)
+        {
+            ETW::MethodLog::SendEventsForJitMethods(NULL, pLoaderAllocator, enumerationOptions);
+        }
+    
+        Assembly *pAssembly = pLoaderAllocator->Id()->GetDomainAssembly()->GetAssembly();
+        _ASSERTE(!pAssembly->IsDomainNeutral()); // Collectible Assemblies are not domain neutral.
+
+        DomainModuleIterator domainModuleIterator = pLoaderAllocator->Id()->GetDomainAssembly()->IterateModules(kModIterIncludeLoaded);
+        while (domainModuleIterator.Next()) 
+        {
+            Module *pModule = domainModuleIterator.GetModule();
+            ETW::EnumerationLog::IterateModule(pModule, enumerationOptions);
+        }
+
+        if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload)
+        {
+            ETW::EnumerationLog::IterateAssembly(pAssembly, enumerationOptions);
+        }
+
+        // Load Jit Method events
+        if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoad)
+        {
+            ETW::MethodLog::SendEventsForJitMethods(NULL, pLoaderAllocator, enumerationOptions);
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/********************************************************************************/
+/* This routine fires ETW events for Assembly and the DomainModule's in them
+   based on enumerationOptions.*/
+/********************************************************************************/
+void ETW::EnumerationLog::IterateAssembly(Assembly *pAssembly, DWORD enumerationOptions)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+        PRECONDITION(pAssembly != NULL);
+    } CONTRACTL_END;
+    
+    EX_TRY
+    {
+        // DC Start events for Assembly
+        if(enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart)
+        {
+            ETW::LoaderLog::SendAssemblyEvent(pAssembly, enumerationOptions);
+        }
+        
+        // DC Start, DCEnd, events for DomainModule
+        if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) ||
+           (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart))
+        {
+            if(pAssembly->GetDomain()->IsAppDomain())
+            {
+                DomainModuleIterator dmIterator = pAssembly->FindDomainAssembly(pAssembly->GetDomain()->AsAppDomain())->IterateModules(kModIterIncludeLoaded);
+                while (dmIterator.Next()) 
+                {
+                    ETW::LoaderLog::SendModuleEvent(dmIterator.GetModule(), enumerationOptions, TRUE);
+                }
+            }
+        }
+
+        // DC End or Unload events for Assembly
+        if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) ||
+           (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload))
+        {
+            ETW::LoaderLog::SendAssemblyEvent(pAssembly, enumerationOptions);
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+/********************************************************************************/
+/* This routine fires ETW events for Module, their range information and the NGEN methods in them
+   based on enumerationOptions.*/
+/********************************************************************************/
+void ETW::EnumerationLog::IterateModule(Module *pModule, DWORD enumerationOptions)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+        PRECONDITION(pModule != NULL);
+    } CONTRACTL_END;
+    
+    EX_TRY
+    {    
+        // DC Start events for Module
+        if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCStart) ||
+           (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCStart))
+        {
+            ETW::LoaderLog::SendModuleEvent(pModule, enumerationOptions);
+        }
+        
+        // DC Start or Load or DC End or Unload Ngen Method events
+        if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodLoad) || 
+           (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCStart) || 
+           (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodUnload) || 
+           (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::NgenMethodDCEnd))
+        {
+            ETW::MethodLog::SendEventsForNgenMethods(pModule, enumerationOptions);
+        }            
+        
+        // DC End or Unload events for Module
+        if((enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleDCEnd) || 
+           (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::DomainAssemblyModuleUnload) ||
+           (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeDCEnd))
+        {
+            ETW::LoaderLog::SendModuleEvent(pModule, enumerationOptions);
+        }
+
+        // If we're logging types, then update the internal Type hash table to account
+        // for the module's unloading
+        if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::TypeUnload)
+        {
+            ETW::TypeSystemLog::OnModuleUnload(pModule);
+        }
+        
+        // ModuleRangeLoadPrivate events for module range information from attach/detach scenarios
+        if (ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, 
+                                         TRACE_LEVEL_INFORMATION, 
+                                         CLR_PERFTRACK_PRIVATE_KEYWORD) && 
+            (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::ModuleRangeLoadPrivate))
+        {
+            ETW::LoaderLog::SendModuleEvent(pModule, enumerationOptions);
+        }
+    } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions);
+}
+
+//---------------------------------------------------------------------------------------
+//
+// This routine sends back domain, assembly, module and method events based on
+// enumerationOptions.
+//
+// Arguments:
+//      * moduleFilter - if non-NULL, events from only moduleFilter module are reported
+//      * domainFilter - if non-NULL, events from only domainFilter domain are reported
+//      * enumerationOptions - Flags from ETW::EnumerationLog::EnumerationStructs which
+//          describe which events should be sent.
+//
+// Notes:
+//     * if all filter args are NULL, events from all domains are reported
+//     
+// #TableLockHolder:
+// 
+// A word about ReJitManager::TableLockHolder... As we enumerate through the functions,
+// we may need to grab their ReJITIDs. The ReJitManager grabs its table Crst in order to
+// fetch these. However, several other kinds of locks are being taken during this
+// enumeration, such as the SystemDomain lock and the EEJitManager::CodeHeapIterator's
+// lock. In order to avoid lock-leveling issues, we grab the appropriate ReJitManager
+// table locks up front. In particular, we need to grab the SharedDomain's ReJitManager
+// table lock as well as the specific AppDomain's ReJitManager table lock for the current
+// AppDomain we're iterating. Why the SharedDomain's ReJitManager lock? For any given
+// AppDomain we're iterating over, the MethodDescs we find may be managed by that
+// AppDomain's ReJitManger OR the SharedDomain's ReJitManager. (This is due to generics
+// and whether given instantiations may be shared based on their arguments.) Therefore,
+// we proactively take the SharedDomain's ReJitManager's table lock up front, and then
+// individually take the appropriate AppDomain's ReJitManager's table lock that
+// corresponds to the domain or module we're currently iterating over.
+//
+
+// static
+void ETW::EnumerationLog::EnumerationHelper(Module *moduleFilter, BaseDomain *domainFilter, DWORD enumerationOptions)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+    
+    // Disable IBC logging during ETW enumeration since we call a lot of functionality
+    // that does logging and causes problems in the shutdown path due to critical
+    // section access for IBC logging
+    IBCLoggingDisabler disableLogging;
+    
+    // See code:#TableLockHolder
+    ReJitManager::TableLockHolder lkRejitMgrSharedDomain(SharedDomain::GetDomain()->GetReJitManager());
+
+    if(moduleFilter)
+    {
+        // See code:#TableLockHolder
+        ReJitManager::TableLockHolder lkRejitMgrModule(moduleFilter->GetReJitManager());
+
+
+        // DC End or Unload Jit Method events from all Domains
+        if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnloadOrDCEndAny)
+        {
+            ETW::MethodLog::SendEventsForJitMethods(NULL, NULL, enumerationOptions);
+        }
+    
+        ETW::EnumerationLog::IterateModule(moduleFilter, enumerationOptions);
+
+        // DC Start or Load Jit Method events from all Domains
+        if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodLoadOrDCStartAny)
+        {
+            ETW::MethodLog::SendEventsForJitMethods(NULL, NULL, enumerationOptions);
+        }
+    }
+    else
+    {
+        if(domainFilter)
+        {
+            // See code:#TableLockHolder
+            ReJitManager::TableLockHolder lkRejitMgrAD(domainFilter->GetReJitManager());
+
+            if(domainFilter->IsAppDomain())
+            {
+                ETW::EnumerationLog::IterateAppDomain(domainFilter->AsAppDomain(), enumerationOptions);
+            }
+            else
+            {
+                ETW::EnumerationLog::IterateDomain(domainFilter, enumerationOptions);
+            }
+        }
+        else 
+        {
+            AppDomainIterator appDomainIterator(FALSE);
+            while(appDomainIterator.Next())
+            {
+                AppDomain *pDomain = appDomainIterator.GetDomain();
+                if (pDomain != NULL)
+                {
+                    // See code:#TableLockHolder
+                    ReJitManager::TableLockHolder lkRejitMgrAD(pDomain->GetReJitManager());
+
+                    ETW::EnumerationLog::IterateAppDomain(pDomain, enumerationOptions);
+                }
+            }
+            ETW::EnumerationLog::IterateDomain(SharedDomain::GetDomain(), enumerationOptions);
+        }    
+    }    
+}
+
+#endif // !FEATURE_REDHAWK
+#endif  // defined(FEATURE_REDHAWK) || !defined(FEATURE_PAL) || defined(FEATURE_DTRACE)
diff --git a/src/coreclr/src/nativeaot/Runtime/eventtrace.h b/src/coreclr/src/nativeaot/Runtime/eventtrace.h
new file mode 100644
index 0000000000000..03744b76ea4ef
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/eventtrace.h
@@ -0,0 +1,343 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+// File: eventtrace.h
+// Abstract: This module implements Event Tracing support.  This includes
+// eventtracebase.h, and adds VM-specific ETW helpers to support features like type
+// logging, allocation logging, and gc heap walk logging.
+//
+// #EventTracing
+// Windows
+// ETW (Event Tracing for Windows) is a high-performance, low overhead and highly scalable
+// tracing facility provided by the Windows Operating System. ETW is available on Win2K and above. There are
+// four main types of components in ETW: event providers, controllers, consumers, and event trace sessions.
+// An event provider is a logical entity that writes events to ETW sessions. The event provider must register
+// a provider ID with ETW through the registration API. A provider first registers with ETW and writes events
+// from various points in the code by invoking the ETW logging API. When a provider is enabled dynamically by
+// the ETW controller application, calls to the logging API sends events to a specific trace session
+// designated by the controller. Each event sent by the event provider to the trace session consists of a
+// fixed header that includes event metadata and additional variable user-context data. CLR is an event
+// provider.
+
+// Mac
+// DTrace is similar to ETW and has been made to look like ETW at most of the places.
+// For convenience, it is called ETM (Event Tracing for Mac) and exists only on the Mac Leopard OS
+// ============================================================================
+
+#ifndef _VMEVENTTRACE_H_
+#define _VMEVENTTRACE_H_
+
+#include "eventtracebase.h"
+#include "gcinterface.h"
+
+#ifdef FEATURE_EVENT_TRACE
+struct ProfilingScanContext : ScanContext
+{
+    BOOL fProfilerPinned;
+    void * pvEtwContext;
+    void *pHeapId;
+    
+    ProfilingScanContext(BOOL fProfilerPinnedParam);
+};
+#endif // defined(FEATURE_EVENT_TRACE)
+
+namespace ETW
+{
+#ifndef FEATURE_REDHAWK
+
+    class LoggedTypesFromModule;
+
+    // We keep a hash of these to keep track of:
+    //     * Which types have been logged through ETW (so we can avoid logging dupe Type
+    //         events), and
+    //     * GCSampledObjectAllocation stats to help with "smart sampling" which
+    //         dynamically adjusts sampling rate of objects by type.
+    // See code:LoggedTypesFromModuleTraits
+    struct TypeLoggingInfo
+    {
+    public:
+        TypeLoggingInfo(TypeHandle thParam)
+        {
+            Init(thParam);
+        }
+
+        TypeLoggingInfo()
+        {
+            Init(TypeHandle());
+        }
+
+        void Init(TypeHandle thParam)
+        {
+            th = thParam;
+            dwTickOfCurrentTimeBucket = 0;
+            dwAllocCountInCurrentBucket = 0;
+            flAllocPerMSec = 0;
+
+            dwAllocsToSkipPerSample = 0;
+            dwAllocsSkippedForSample = 0;
+            cbIgnoredSizeForSample = 0;
+        };
+
+        // The type this TypeLoggingInfo represents
+        TypeHandle th;
+        
+        // Smart sampling
+
+        // These bucket values remember stats of a particular time slice that are used to
+        // help adjust the sampling rate
+        DWORD dwTickOfCurrentTimeBucket;
+        DWORD dwAllocCountInCurrentBucket;
+        float flAllocPerMSec;
+
+        // The number of data points to ignore before taking a "sample" (i.e., logging a
+        // GCSampledObjectAllocation ETW event for this type)
+        DWORD dwAllocsToSkipPerSample;
+
+        // The current number of data points actually ignored for the current sample
+        DWORD dwAllocsSkippedForSample;
+
+        // The current count of bytes of objects of this type actually allocated (and
+        // ignored) for the current sample
+        SIZE_T cbIgnoredSizeForSample;
+    };
+
+    // Class to wrap all type system logic for ETW
+    class TypeSystemLog
+    {
+    private:
+        static AllLoggedTypes * s_pAllLoggedTypes;
+
+        // See code:ETW::TypeSystemLog::PostRegistrationInit
+        static BOOL s_fHeapAllocEventEnabledOnStartup;
+        static BOOL s_fHeapAllocHighEventEnabledNow;
+        static BOOL s_fHeapAllocLowEventEnabledNow;
+
+        // If COMPLUS_UNSUPPORTED_ETW_ObjectAllocationEventsPerTypePerSec is set, then
+        // this is used to determine the event frequency, overriding
+        // s_nDefaultMsBetweenEvents above (regardless of which
+        // GCSampledObjectAllocation*Keyword was used)
+        static int s_nCustomMsBetweenEvents;
+
+    public:
+        // This customizes the type logging behavior in LogTypeAndParametersIfNecessary
+        enum TypeLogBehavior
+        {
+            // Take lock, and consult hash table to see if this is the first time we've
+            // encountered the type, in which case, log it
+            kTypeLogBehaviorTakeLockAndLogIfFirstTime,
+
+            // Caller has already taken lock, so just directly consult hash table to see
+            // if this is the first time we've encountered the type, in which case, log
+            // it
+            kTypeLogBehaviorAssumeLockAndLogIfFirstTime,
+            
+            // Don't take lock, don't consult hash table. Just log the type. (This is
+            // used in cases when checking for dupe type logging isn't worth it, such as
+            // when logging the finalization of an object.)
+            kTypeLogBehaviorAlwaysLog,
+
+            // When logging the type for GCSampledObjectAllocation events, we don't need
+            // the lock (as it's already held by the code doing the stats for smart
+            // sampling), and we already know we need to log the type (since we already
+            // looked it up in the hash).  But we would still need to consult the hash
+            // for any type parameters, so kTypeLogBehaviorAlwaysLog isn't appropriate,
+            // and this is used instead.
+            kTypeLogBehaviorAssumeLockAndAlwaysLogTopLevelType,
+        };
+
+        static HRESULT PreRegistrationInit();
+        static void PostRegistrationInit();
+        static BOOL IsHeapAllocEventEnabled();
+        static void SendObjectAllocatedEvent(Object * pObject);
+        static CrstBase * GetHashCrst();
+        static void LogTypeAndParametersIfNecessary(BulkTypeEventLogger * pBulkTypeEventLogger, ULONGLONG thAsAddr, TypeLogBehavior typeLogBehavior);
+        static void OnModuleUnload(Module * pModule);
+        static void OnKeywordsChanged();
+
+    private:
+        static BOOL ShouldLogType(TypeHandle th);
+        static BOOL ShouldLogTypeNoLock(TypeHandle th);
+        static TypeLoggingInfo LookupOrCreateTypeLoggingInfo(TypeHandle th, BOOL * pfCreatedNew, LoggedTypesFromModule ** ppLoggedTypesFromModule = NULL);
+        static BOOL AddOrReplaceTypeLoggingInfo(ETW::LoggedTypesFromModule * pLoggedTypesFromModule, const ETW::TypeLoggingInfo * pTypeLoggingInfo);
+        static int GetDefaultMsBetweenEvents();
+        static void OnTypesKeywordTurnedOff();
+    };
+
+#endif // FEATURE_REDHAWK
+
+    // Class to wrap all GC logic for ETW
+    class GCLog
+    {
+    private:
+        // When WPA triggers a GC, it gives us this unique number to append to our
+        // GCStart event so WPA can correlate the CLR's GC with the JScript GC they
+        // triggered at the same time.
+        // 
+        // We set this value when the GC is triggered, and then retrieve the value on the
+        // first subsequent FireGcStart() method call for a full, induced GC, assuming
+        // that that's the GC that WPA triggered. This is imperfect, and if we were in
+        // the act of beginning another full, induced GC (for some other reason), then
+        // we'll attach this sequence number to that GC instead of to the WPA-induced GC,
+        // but who cares? When parsing ETW logs later on, it's indistinguishable if both
+        // GCs really were induced at around the same time.
+#ifdef FEATURE_REDHAWK
+        static volatile LONGLONG s_l64LastClientSequenceNumber;
+#else // FEATURE_REDHAWK
+        static Volatile<LONGLONG> s_l64LastClientSequenceNumber;
+#endif // FEATURE_REDHAWK
+
+    public:
+        typedef union st_GCEventInfo {
+            typedef struct _GenerationInfo {
+                ULONGLONG GenerationSize;
+                ULONGLONG TotalPromotedSize;
+            } GenerationInfo;
+
+            struct {
+                GenerationInfo GenInfo[4]; // the heap info on gen0, gen1, gen2 and the large object heap.
+                ULONGLONG        FinalizationPromotedSize; //not available per generation
+                ULONGLONG         FinalizationPromotedCount; //not available per generation
+                ULONG          PinnedObjectCount;
+                ULONG          SinkBlockCount;
+                ULONG          GCHandleCount;
+            } HeapStats;
+
+            typedef enum _HeapType {
+                SMALL_OBJECT_HEAP, LARGE_OBJECT_HEAP, READ_ONLY_HEAP
+            } HeapType;
+            struct {
+                ULONGLONG Address;
+                ULONGLONG Size;
+                HeapType Type;
+            } GCCreateSegment;
+
+            struct {
+                ULONGLONG Address;
+            } GCFreeSegment;
+            struct {
+                ULONG Count;
+                ULONG Depth;
+            } GCEnd;
+
+            typedef enum _AllocationKind {
+                AllocationSmall = 0,
+                AllocationLarge
+            }AllocationKind;
+            struct {
+                ULONG Allocation;
+                AllocationKind Kind;
+            } AllocationTick;
+
+            // These values are gotten from the gc_reason
+            // in gcimpl.h
+            typedef  enum _GC_REASON { 
+                GC_ALLOC_SOH = 0 , 
+                GC_INDUCED = 1 , 
+                GC_LOWMEMORY = 2,
+                GC_EMPTY = 3,
+                GC_ALLOC_LOH = 4,
+                GC_OOS_SOH = 5,
+                GC_OOS_LOH = 6,
+                GC_INDUCED_NOFORCE = 7
+            } GC_REASON;
+            typedef  enum _GC_TYPE { 
+                GC_NGC = 0 , GC_BGC = 1 , GC_FGC = 2
+            } GC_TYPE;
+            struct {
+                ULONG Count;
+                ULONG Depth;
+                GC_REASON Reason;
+                GC_TYPE Type;
+            } GCStart;
+
+            struct {
+                ULONG Count; // how many finalizers we called.
+            } GCFinalizers;
+
+            struct {
+                ULONG Reason;
+                // This is only valid when SuspendEE is called by GC (ie, Reason is either 
+                // SUSPEND_FOR_GC or SUSPEND_FOR_GC_PREP.
+                ULONG GcCount; 
+            } SuspendEE;
+
+            struct {
+                ULONG HeapNum;
+            } GCMark;
+
+            struct {
+                ULONGLONG SegmentSize; 
+                ULONGLONG LargeObjectSegmentSize; 
+                BOOL ServerGC; // TRUE means it's server GC; FALSE means it's workstation.
+            } GCSettings;
+
+            struct {
+                // The generation that triggered this notification.
+                ULONG Count;
+                // 1 means the notification was due to allocation; 0 means it was due to other factors.
+                ULONG Alloc; 
+            } GCFullNotify;
+        } ETW_GC_INFO, *PETW_GC_INFO;
+
+#ifdef FEATURE_EVENT_TRACE
+        static void GCSettingsEvent();
+#else
+        static void GCSettingsEvent() {};
+#endif // FEATURE_EVENT_TRACE
+
+        static BOOL ShouldWalkHeapObjectsForEtw();
+        static BOOL ShouldWalkHeapRootsForEtw();
+        static BOOL ShouldTrackMovementForEtw();
+        static BOOL ShouldWalkStaticsAndCOMForEtw();
+        static HRESULT ForceGCForDiagnostics();
+        static void ForceGC(LONGLONG l64ClientSequenceNumber);
+        static void FireGcStart(ETW_GC_INFO * pGcInfo);
+        static void RootReference(
+            LPVOID pvHandle,
+            Object * pRootedNode,
+            Object * pSecondaryNodeForDependentHandle,
+            BOOL fDependentHandle,
+            ProfilingScanContext * profilingScanContext,
+            DWORD dwGCFlags,
+            DWORD rootFlags);
+        static void ObjectReference(
+            ProfilerWalkHeapContext * profilerWalkHeapContext,
+            Object * pObjReferenceSource,
+            ULONGLONG typeID,
+            ULONGLONG cRefs,
+            Object ** rgObjReferenceTargets);
+        static void EndHeapDump(ProfilerWalkHeapContext * profilerWalkHeapContext);
+        static void BeginMovedReferences(size_t * pProfilingContext);
+        static void MovedReference(BYTE * pbMemBlockStart, BYTE * pbMemBlockEnd, ptrdiff_t cbRelocDistance, size_t profilingContext, BOOL fCompacting, BOOL fAllowProfApiNotification = TRUE);
+        static void EndMovedReferences(size_t profilingContext, BOOL fAllowProfApiNotification = TRUE);
+        static void WalkStaticsAndCOMForETW();
+#ifndef FEATURE_REDHAWK
+        static void SendFinalizeObjectEvent(MethodTable * pMT, Object * pObj);
+#endif // FEATURE_REDHAWK
+    };
+};
+
+#ifndef FEATURE_ETW
+inline BOOL ETW::GCLog::ShouldWalkHeapObjectsForEtw() { return FALSE; }
+inline BOOL ETW::GCLog::ShouldWalkHeapRootsForEtw() { return FALSE; }
+inline BOOL ETW::GCLog::ShouldTrackMovementForEtw() { return FALSE; }
+inline BOOL ETW::GCLog::ShouldWalkStaticsAndCOMForEtw() { return FALSE; }
+inline void ETW::GCLog::FireGcStart(ETW_GC_INFO * pGcInfo) { }
+inline void ETW::GCLog::EndHeapDump(ProfilerWalkHeapContext * profilerWalkHeapContext) { }
+inline void ETW::GCLog::BeginMovedReferences(size_t * pProfilingContext) { }
+inline void ETW::GCLog::MovedReference(BYTE * pbMemBlockStart, BYTE * pbMemBlockEnd, ptrdiff_t cbRelocDistance, size_t profilingContext, BOOL fCompacting) { }
+inline void ETW::GCLog::EndMovedReferences(size_t profilingContext) { }
+inline void ETW::GCLog::WalkStaticsAndCOMForETW() { }
+inline void ETW::GCLog::RootReference(
+    LPVOID pvHandle,
+    Object * pRootedNode,
+    Object * pSecondaryNodeForDependentHandle,
+    BOOL fDependentHandle,
+    ProfilingScanContext * profilingScanContext,
+    DWORD dwGCFlags,
+    DWORD rootFlags) { }
+#endif
+
+inline BOOL EventEnabledPinObjectAtGCTime() { return FALSE; }
+
+#endif //_VMEVENTTRACE_H_
diff --git a/src/coreclr/src/nativeaot/Runtime/eventtracebase.h b/src/coreclr/src/nativeaot/Runtime/eventtracebase.h
new file mode 100644
index 0000000000000..3a51656944c5b
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/eventtracebase.h
@@ -0,0 +1,1095 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// File: eventtracebase.h
+// Abstract: This module implements base Event Tracing support (excluding some of the
+// CLR VM-specific ETW helpers).
+//
+// #EventTracing
+// Windows
+// ETW (Event Tracing for Windows) is a high-performance, low overhead and highly scalable
+// tracing facility provided by the Windows Operating System. ETW is available on Win2K and above. There are
+// four main types of components in ETW: event providers, controllers, consumers, and event trace sessions.
+// An event provider is a logical entity that writes events to ETW sessions. The event provider must register
+// a provider ID with ETW through the registration API. A provider first registers with ETW and writes events
+// from various points in the code by invoking the ETW logging API. When a provider is enabled dynamically by
+// the ETW controller application, calls to the logging API sends events to a specific trace session
+// designated by the controller. Each event sent by the event provider to the trace session consists of a
+// fixed header that includes event metadata and additional variable user-context data. CLR is an event
+// provider.
+
+// Mac
+// DTrace is similar to ETW and has been made to look like ETW at most of the places.
+// For convenience, it is called ETM (Event Tracing for Mac) and exists only on the Mac Leopard OS
+// ============================================================================
+
+#ifndef _ETWTRACER_HXX_
+#define _ETWTRACER_HXX_
+
+struct EventStructTypeData;
+void InitializeEventTracing();
+
+#ifdef FEATURE_EVENT_TRACE
+
+// !!!!!!! NOTE !!!!!!!!
+// The flags must match those in the ETW manifest exactly
+// !!!!!!! NOTE !!!!!!!!
+
+enum EtwTypeFlags
+{
+    kEtwTypeFlagsDelegate =                         0x1,
+    kEtwTypeFlagsFinalizable =                      0x2,
+    kEtwTypeFlagsExternallyImplementedCOMObject =   0x4,
+    kEtwTypeFlagsArray =                            0x8,
+    kEtwTypeFlagsModuleBaseAddress =                0x10,
+};
+
+enum EtwThreadFlags
+{
+    kEtwThreadFlagGCSpecial =         0x00000001,
+    kEtwThreadFlagFinalizer =         0x00000002,
+    kEtwThreadFlagThreadPoolWorker =  0x00000004,
+};
+
+
+// During a heap walk, this is the storage for keeping track of all the nodes and edges
+// being batched up by ETW, and for remembering whether we're also supposed to call into
+// a profapi profiler.  This is allocated toward the end of a GC and passed to us by the
+// GC heap walker.
+struct ProfilerWalkHeapContext
+{
+public:
+    ProfilerWalkHeapContext(BOOL fProfilerPinnedParam, LPVOID pvEtwContextParam)
+    {
+        fProfilerPinned = fProfilerPinnedParam;
+        pvEtwContext = pvEtwContextParam;
+    }
+
+    BOOL fProfilerPinned;
+    LPVOID pvEtwContext;
+};
+
+class Object;
+
+/******************************/
+/* CLR ETW supported versions */
+/******************************/
+#define ETW_SUPPORTED_MAJORVER 5    // ETW is supported on win2k and above
+#define ETW_ENABLED_MAJORVER 6      // OS versions >= to this we enable ETW registration by default, since on XP and Windows 2003, registration is too slow.
+
+/***************************************/
+/* Tracing levels supported by CLR ETW */
+/***************************************/
+#define ETWMAX_TRACE_LEVEL 6        // Maximum Number of Trace Levels supported
+#define TRACE_LEVEL_NONE        0   // Tracing is not on
+#define TRACE_LEVEL_FATAL       1   // Abnormal exit or termination
+#define TRACE_LEVEL_ERROR       2   // Severe errors that need logging
+#define TRACE_LEVEL_WARNING     3   // Warnings such as allocation failure
+#define TRACE_LEVEL_INFORMATION 4   // Includes non-error cases such as Entry-Exit
+#define TRACE_LEVEL_VERBOSE     5   // Detailed traces from intermediate steps
+
+struct ProfilingScanContext;
+
+//
+// Use this macro to check if ETW is initialized and the event is enabled
+//
+#define ETW_TRACING_ENABLED(Context, EventDescriptor) \
+    (Context.IsEnabled && ETW_TRACING_INITIALIZED(Context.RegistrationHandle) && ETW_EVENT_ENABLED(Context, EventDescriptor))
+
+//
+// Using KEYWORDZERO means when checking the events category ignore the keyword
+//
+#define KEYWORDZERO 0x0
+
+//
+// Use this macro to check if ETW is initialized and the category is enabled
+//
+#define ETW_TRACING_CATEGORY_ENABLED(Context, Level, Keyword) \
+    (ETW_TRACING_INITIALIZED(Context.RegistrationHandle) && ETW_CATEGORY_ENABLED(Context, Level, Keyword))
+
+#ifdef FEATURE_DTRACE
+    #define ETWOnStartup(StartEventName, EndEventName) \
+        ETWTraceStartup trace(StartEventName, EndEventName);
+    #define ETWFireEvent(EventName) \
+        FireEtw##EventName(GetClrInstanceId());
+#else
+    #define ETWOnStartup(StartEventName, EndEventName) \
+        ETWTraceStartup trace##StartEventName##(Microsoft_Windows_DotNETRuntimePrivateHandle, &StartEventName, &StartupId, &EndEventName, &StartupId);
+    #define ETWFireEvent(EventName) \
+        ETWTraceStartup::StartupTraceEvent(Microsoft_Windows_DotNETRuntimePrivateHandle, &EventName, &StartupId);
+#endif // FEATURE_DTRACE
+
+#ifndef FEATURE_REDHAWK
+
+// Headers
+#ifndef FEATURE_PAL
+#include <initguid.h>
+#include <wmistr.h>
+#include <evntrace.h>
+#include <evntprov.h>
+#if !defined(DONOT_DEFINE_ETW_CALLBACK) && !defined(DACCESS_COMPILE)
+#define GetVersionEx(Version) (GetOSVersion((LPOSVERSIONINFOW)Version))
+#else
+#define GetVersionEx(Version) (WszGetVersionEx((LPOSVERSIONINFOW)Version))
+#endif // !DONOT_DEFINE_ETW_CALLBACK && !DACCESS_COMPILE
+#endif // !FEATURE_PAL
+
+#if FEATURE_DTRACE
+#include "clrdtrace.h"
+#endif
+
+#endif //!FEATURE_REDHAWK
+
+
+#else // FEATURE_EVENT_TRACE
+
+#include "etmdummy.h"
+#endif // FEATURE_EVENT_TRACE
+
+#ifndef FEATURE_REDHAWK
+
+#if defined(FEATURE_CORECLR) && !defined(FEATURE_CORESYSTEM)
+// For Silverlight non-CoreSys builds we still use an older toolset,
+// headers/libs, and a different value for WINVER. We use this symbol 
+// to distinguish between whether we built the ETW header files from 
+// the ETW manifest using the -mof command line or not.
+#define WINXP_AND_WIN2K3_BUILD_SUPPORT
+#endif
+#include "corprof.h"
+
+// g_nClrInstanceId is defined in Utilcode\Util.cpp. The definition goes into Utilcode.lib.
+// This enables both the VM and Utilcode to raise ETW events.
+extern UINT32 g_nClrInstanceId;
+extern BOOL g_fEEManagedEXEStartup;
+extern BOOL g_fEEIJWStartup;
+
+#define GetClrInstanceId()  (static_cast<UINT16>(g_nClrInstanceId))
+
+#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL)
+// Callback and stack support
+#if !defined(DONOT_DEFINE_ETW_CALLBACK) && !defined(DACCESS_COMPILE)
+extern "C" {
+    /* ETW control callback
+         * Desc:        This function handles the ETW control
+         *              callback.
+         * Ret:         success or failure
+     ***********************************************/
+    void EtwCallback(
+        _In_ LPCGUID SourceId,
+        _In_ ULONG ControlCode,
+        _In_ UCHAR Level,
+        _In_ ULONGLONG MatchAnyKeyword,
+        _In_ ULONGLONG MatchAllKeyword,
+        _In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData,
+        _Inout_opt_ PVOID CallbackContext);
+}
+
+//
+// User defined callback
+//
+#define MCGEN_PRIVATE_ENABLE_CALLBACK(RequestCode, Context, InOutBufferSize, Buffer) \
+        EtwCallback(NULL /* SourceId */, (RequestCode==WMI_ENABLE_EVENTS) ? EVENT_CONTROL_CODE_ENABLE_PROVIDER : EVENT_CONTROL_CODE_DISABLE_PROVIDER, 0 /* Level */, 0 /* MatchAnyKeyword */, 0 /* MatchAllKeyword */, NULL /* FilterData */, Context)
+
+//
+// User defined callback2
+//
+#define MCGEN_PRIVATE_ENABLE_CALLBACK_V2(SourceId, ControlCode, Level, MatchAnyKeyword, MatchAllKeyword, FilterData, CallbackContext) \
+        EtwCallback(SourceId, ControlCode, Level, MatchAnyKeyword, MatchAllKeyword, FilterData, CallbackContext)
+
+extern "C" {
+    /* ETW callout
+         * Desc:        This function handles the ETW callout
+         * Ret:         success or failure
+     ***********************************************/
+    void EtwCallout(
+        REGHANDLE RegHandle,
+        PCEVENT_DESCRIPTOR Descriptor,
+        ULONG ArgumentCount,
+        PEVENT_DATA_DESCRIPTOR EventData);
+}
+
+//
+// Call user defined callout
+//
+#define MCGEN_CALLOUT(RegHandle, Descriptor, NumberOfArguments, EventData) \
+        EtwCallout(RegHandle, Descriptor, NumberOfArguments, EventData)
+#endif //!DONOT_DEFINE_ETW_CALLBACK && !DACCESS_COMPILE
+
+#include <ClrEtwAllMain.h>
+// The bulk type event is too complex for MC.exe to auto-generate proper code.
+// Use code:BulkTypeEventLogger instead.
+#ifdef FireEtwBulkType
+#undef FireEtwBulkType
+#endif // FireEtwBulkType
+#endif // FEATURE_EVENT_TRACE && !FEATURE_PAL
+
+/**************************/
+/* CLR ETW infrastructure */
+/**************************/
+// #CEtwTracer
+// On Windows Vista, ETW has gone through a major upgrade, and one of the most significant changes is the
+// introduction of the unified event provider model and APIs. The older architecture used the classic ETW
+// events. The new ETW architecture uses the manifest based events. To support both types of events at the
+// same time, we use the manpp tool for generating event macros that can be directly used to fire ETW events
+// from various components within the CLR.
+// (http://diagnostics/sites/etw/Lists/Announcements/DispForm.aspx?ID=10&Source=http%3A%2F%2Fdiagnostics%2Fsites%2Fetw%2Fdefault%2Easpx)
+// Every ETW provider has to Register itself to the system, so that when enabled, it is capable of firing
+// ETW events. file:../VM/eventtrace.cpp#Registration is where the actual Provider Registration takes place.
+// At process shutdown, a registered provider need to be unregistered.
+// file:../VM/eventtrace.cpp#Unregistration. Since ETW can also be enabled at any instant after the process
+// has started, one may want to do something useful when that happens (e.g enumerate all the loaded modules
+// in the system). To enable this, we have to implement a callback routine.
+// file:../VM/eventtrace.cpp#EtwCallback is CLR's implementation of the callback.
+// 
+
+#include "daccess.h"
+class Module;
+class Assembly;
+class MethodDesc;
+class MethodTable;
+class BaseDomain;
+class AppDomain;
+class SString;
+class CrawlFrame;
+class LoaderAllocator;
+class AssemblyLoaderAllocator;
+struct AllLoggedTypes;
+class CrstBase;
+class BulkTypeEventLogger;
+class TypeHandle;
+class Thread;
+
+
+// All ETW helpers must be a part of this namespace
+// We have auto-generated macros to directly fire the events
+// but in some cases, gathering the event payload information involves some work
+// and it can be done in a relevant helper class like the one's in this namespace
+namespace ETW
+{
+    // Class to wrap the ETW infrastructure logic
+    class CEtwTracer 
+    {
+#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL)
+        ULONG RegGuids(LPCGUID ProviderId, PENABLECALLBACK EnableCallback, PVOID CallbackContext, PREGHANDLE RegHandle);
+#endif // !FEATURE_PAL
+
+    public:
+#ifdef FEATURE_EVENT_TRACE
+        // Registers all the Event Tracing providers
+        HRESULT Register();
+
+        // Unregisters all the Event Tracing providers
+        HRESULT UnRegister();        
+#else
+        HRESULT Register()
+        {
+            return S_OK;
+        }
+        HRESULT UnRegister()
+        {
+            return S_OK;
+        }
+#endif // FEATURE_EVENT_TRACE
+    };
+
+    class LoaderLog;
+    class MethodLog;   
+    // Class to wrap all the enumeration logic for ETW
+    class EnumerationLog
+    {
+        friend class ETW::LoaderLog;
+        friend class ETW::MethodLog;
+#ifdef FEATURE_EVENT_TRACE
+        static void SendThreadRundownEvent();
+        static void IterateDomain(BaseDomain *pDomain, DWORD enumerationOptions);
+        static void IterateAppDomain(AppDomain * pAppDomain, DWORD enumerationOptions);
+        static void IterateCollectibleLoaderAllocator(AssemblyLoaderAllocator *pLoaderAllocator, DWORD enumerationOptions);
+        static void IterateAssembly(Assembly *pAssembly, DWORD enumerationOptions);
+        static void IterateModule(Module *pModule, DWORD enumerationOptions);
+        static void EnumerationHelper(Module *moduleFilter, BaseDomain *domainFilter, DWORD enumerationOptions);
+        static DWORD GetEnumerationOptionsFromRuntimeKeywords();
+    public:
+        typedef union _EnumerationStructs
+        {
+            typedef enum _EnumerationOptions
+            {
+                None=                               0x00000000,
+                DomainAssemblyModuleLoad=           0x00000001,
+                DomainAssemblyModuleUnload=         0x00000002,
+                DomainAssemblyModuleDCStart=        0x00000004,
+                DomainAssemblyModuleDCEnd=          0x00000008,
+                JitMethodLoad=                      0x00000010,
+                JitMethodUnload=                    0x00000020,
+                JitMethodDCStart=                   0x00000040,
+                JitMethodDCEnd=                     0x00000080,
+                NgenMethodLoad=                     0x00000100,
+                NgenMethodUnload=                   0x00000200,
+                NgenMethodDCStart=                  0x00000400,
+                NgenMethodDCEnd=                    0x00000800,
+                ModuleRangeLoad=                    0x00001000,
+                ModuleRangeDCStart=                 0x00002000,
+                ModuleRangeDCEnd=                   0x00004000,
+                ModuleRangeLoadPrivate=             0x00008000,
+                MethodDCStartILToNativeMap=         0x00010000,
+                MethodDCEndILToNativeMap=           0x00020000,
+                JitMethodILToNativeMap=             0x00040000,
+                TypeUnload=                         0x00080000,
+                
+                // Helpers
+                ModuleRangeEnabledAny = ModuleRangeLoad | ModuleRangeDCStart | ModuleRangeDCEnd | ModuleRangeLoadPrivate,
+                JitMethodLoadOrDCStartAny = JitMethodLoad | JitMethodDCStart | MethodDCStartILToNativeMap,
+                JitMethodUnloadOrDCEndAny = JitMethodUnload | JitMethodDCEnd | MethodDCEndILToNativeMap,
+            }EnumerationOptions;
+        }EnumerationStructs;
+
+        static void ProcessShutdown();
+        static void ModuleRangeRundown();
+        static void StartRundown();
+        static void EndRundown();
+        static void EnumerateForCaptureState();
+#else
+    public:
+        static void ProcessShutdown() {};
+        static void StartRundown() {};
+        static void EndRundown() {};
+#endif // FEATURE_EVENT_TRACE
+    };
+
+
+    // Class to wrap all the sampling logic for ETW
+    class SamplingLog
+    {
+        // StackWalk available only when !FEATURE_PAL
+#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL)
+    public:
+        typedef enum _EtwStackWalkStatus 
+        {
+            Completed = 0,
+            UnInitialized = 1,
+            InProgress = 2
+        } EtwStackWalkStatus;
+    private:
+        static const UINT8 s_MaxStackSize=100;
+        UINT32 m_FrameCount;
+        SIZE_T m_EBPStack[SamplingLog::s_MaxStackSize];
+        void Append(SIZE_T currentFrame);
+        EtwStackWalkStatus SaveCurrentStack(int skipTopNFrames=1);
+    public:
+        static ULONG SendStackTrace(MCGEN_TRACE_CONTEXT TraceContext, PCEVENT_DESCRIPTOR Descriptor, LPCGUID EventGuid);
+        EtwStackWalkStatus GetCurrentThreadsCallStack(UINT32 *frameCount, PVOID **Stack);
+#endif // FEATURE_EVENT_TRACE && !FEATURE_PAL
+    };
+    
+    // Class to wrap all Loader logic for ETW
+    class LoaderLog
+    {
+        friend class ETW::EnumerationLog;
+#ifdef FEATURE_EVENT_TRACE
+        static void SendModuleEvent(Module *pModule, DWORD dwEventOptions, BOOL bFireDomainModuleEvents=FALSE);
+#if !defined(FEATURE_PAL)
+        static ULONG SendModuleRange(Module *pModule, DWORD dwEventOptions);
+#endif // !FEATURE_PAL
+        static void SendAssemblyEvent(Assembly *pAssembly, DWORD dwEventOptions);
+        static void SendDomainEvent(BaseDomain *pBaseDomain, DWORD dwEventOptions, LPCWSTR wszFriendlyName=NULL);
+    public:
+        typedef union _LoaderStructs
+        {
+            typedef enum _AppDomainFlags
+            {
+                DefaultDomain=0x1,
+                ExecutableDomain=0x2,
+                SharedDomain=0x4
+            }AppDomainFlags;
+
+            typedef enum _AssemblyFlags
+            {
+                DomainNeutralAssembly=0x1,
+                DynamicAssembly=0x2,
+                NativeAssembly=0x4,
+                CollectibleAssembly=0x8,
+            }AssemblyFlags;
+
+            typedef enum _ModuleFlags
+            {
+                DomainNeutralModule=0x1,
+                NativeModule=0x2,
+                DynamicModule=0x4,
+                ManifestModule=0x8,
+                IbcOptimized=0x10
+            }ModuleFlags;
+
+            typedef enum _RangeFlags
+            {
+                HotRange=0x0
+            }RangeFlags;
+
+        }LoaderStructs;
+        
+        static void DomainLoadReal(BaseDomain *pDomain, __in_opt LPWSTR wszFriendlyName=NULL);
+        
+        static void DomainLoad(BaseDomain *pDomain, __in_opt LPWSTR wszFriendlyName = NULL)
+        {
+#ifndef FEATURE_PAL
+            if (MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context.IsEnabled)
+#endif
+            {
+                DomainLoadReal(pDomain, wszFriendlyName);
+            }
+        }
+
+        static void DomainUnload(AppDomain *pDomain);
+        static void CollectibleLoaderAllocatorUnload(AssemblyLoaderAllocator *pLoaderAllocator);
+        static void ModuleLoad(Module *pModule, LONG liReportedSharedModule);
+#else
+    public:
+        static void DomainLoad(BaseDomain *pDomain, __in_opt LPWSTR wszFriendlyName=NULL) {};
+        static void DomainUnload(AppDomain *pDomain) {};
+        static void CollectibleLoaderAllocatorUnload(AssemblyLoaderAllocator *pLoaderAllocator) {};
+        static void ModuleLoad(Module *pModule, LONG liReportedSharedModule) {};
+#endif // FEATURE_EVENT_TRACE
+    };
+
+    // Class to wrap all Method logic for ETW
+    class MethodLog
+    {
+        friend class ETW::EnumerationLog;
+#ifdef FEATURE_EVENT_TRACE
+        static void SendEventsForJitMethods(BaseDomain *pDomainFilter, LoaderAllocator *pLoaderAllocatorFilter, DWORD dwEventOptions);
+        static void SendEventsForNgenMethods(Module *pModule, DWORD dwEventOptions);
+        static void SendMethodJitStartEvent(MethodDesc *pMethodDesc, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL);
+#ifndef WINXP_AND_WIN2K3_BUILD_SUPPORT
+        static void SendMethodILToNativeMapEvent(MethodDesc * pMethodDesc, DWORD dwEventOptions, ReJITID rejitID);
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+        static void SendMethodEvent(MethodDesc *pMethodDesc, DWORD dwEventOptions, BOOL bIsJit, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL, SIZE_T pCode = 0, ReJITID rejitID = 0);
+        static void SendHelperEvent(ULONGLONG ullHelperStartAddress, ULONG ulHelperSize, LPCWSTR pHelperName);
+    public:
+        typedef union _MethodStructs
+        {
+            typedef enum _MethodFlags
+            {
+                DynamicMethod=0x1,
+                GenericMethod=0x2,
+                SharedGenericCode=0x4,
+                JittedMethod=0x8,
+                JitHelperMethod=0x10
+            }MethodFlags;
+
+            typedef enum _MethodExtent
+            {
+                HotSection=0x00000000,
+                ColdSection=0x10000000
+            }MethodExtent;
+
+        }MethodStructs;
+
+        static void MethodJitting(MethodDesc *pMethodDesc, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL);
+        static void MethodJitted(MethodDesc *pMethodDesc, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL, SIZE_T pCode = 0, ReJITID rejitID = 0);
+        static void StubInitialized(ULONGLONG ullHelperStartAddress, LPCWSTR pHelperName);
+        static void StubsInitialized(PVOID *pHelperStartAddresss, PVOID *pHelperNames, LONG ulNoOfHelpers);
+        static void MethodRestored(MethodDesc * pMethodDesc);
+        static void MethodTableRestored(MethodTable * pMethodTable);
+        static void DynamicMethodDestroyed(MethodDesc *pMethodDesc);
+#else // FEATURE_EVENT_TRACE
+    public:
+        static void MethodJitting(MethodDesc *pMethodDesc, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL) {};
+        static void MethodJitted(MethodDesc *pMethodDesc, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL, SIZE_T pCode = 0, ReJITID rejitID = 0) {};
+        static void StubInitialized(ULONGLONG ullHelperStartAddress, LPCWSTR pHelperName) {};
+        static void StubsInitialized(PVOID *pHelperStartAddresss, PVOID *pHelperNames, LONG ulNoOfHelpers) {};
+        static void MethodRestored(MethodDesc * pMethodDesc) {};
+        static void MethodTableRestored(MethodTable * pMethodTable) {};
+        static void DynamicMethodDestroyed(MethodDesc *pMethodDesc) {};
+#endif // FEATURE_EVENT_TRACE
+    };
+
+    // Class to wrap all Security logic for ETW
+    class SecurityLog
+    {
+#ifdef FEATURE_EVENT_TRACE
+    public:
+        static void StrongNameVerificationStart(DWORD dwInFlags, __in LPWSTR strFullyQualifiedAssemblyName);
+        static void StrongNameVerificationStop(DWORD dwInFlags,ULONG result, __in LPWSTR strFullyQualifiedAssemblyName);
+
+        static void FireFieldTransparencyComputationStart(LPCWSTR wszFieldName,
+                                                          LPCWSTR wszModuleName,
+                                                          DWORD dwAppDomain);
+        static void FireFieldTransparencyComputationEnd(LPCWSTR wszFieldName,
+                                                        LPCWSTR wszModuleName,
+                                                        DWORD dwAppDomain,
+                                                        BOOL fIsCritical,
+                                                        BOOL fIsTreatAsSafe);
+
+        static void FireMethodTransparencyComputationStart(LPCWSTR wszMethodName,
+                                                           LPCWSTR wszModuleName,
+                                                           DWORD dwAppDomain);
+        static void FireMethodTransparencyComputationEnd(LPCWSTR wszMethodName,
+                                                         LPCWSTR wszModuleName,
+                                                         DWORD dwAppDomain,
+                                                         BOOL fIsCritical,
+                                                         BOOL fIsTreatAsSafe);
+
+        static void FireModuleTransparencyComputationStart(LPCWSTR wszModuleName, DWORD dwAppDomain);
+        static void FireModuleTransparencyComputationEnd(LPCWSTR wszModuleName,
+                                                         DWORD dwAppDomain,
+                                                         BOOL fIsAllCritical,
+                                                         BOOL fIsAllTransparent,
+                                                         BOOL fIsTreatAsSafe,
+                                                         BOOL fIsOpportunisticallyCritical,
+                                                         DWORD dwSecurityRuleSet);
+
+        static void FireTokenTransparencyComputationStart(DWORD dwToken,
+                                                          LPCWSTR wszModuleName,
+                                                          DWORD dwAppDomain);
+        static void FireTokenTransparencyComputationEnd(DWORD dwToken,
+                                                        LPCWSTR wszModuleName,
+                                                        DWORD dwAppDomain,
+                                                        BOOL fIsCritical,
+                                                        BOOL fIsTreatAsSafe);
+
+        static void FireTypeTransparencyComputationStart(LPCWSTR wszTypeName,
+                                                         LPCWSTR wszModuleName,
+                                                         DWORD dwAppDomain);
+        static void FireTypeTransparencyComputationEnd(LPCWSTR wszTypeName,
+                                                       LPCWSTR wszModuleName,
+                                                       DWORD dwAppDomain,
+                                                       BOOL fIsAllCritical,
+                                                       BOOL fIsAllTransparent,
+                                                       BOOL fIsCritical,
+                                                       BOOL fIsTreatAsSafe);
+#else
+    public:
+        static void StrongNameVerificationStart(DWORD dwInFlags,LPWSTR strFullyQualifiedAssemblyName) {};
+        static void StrongNameVerificationStop(DWORD dwInFlags,ULONG result, LPWSTR strFullyQualifiedAssemblyName) {};
+
+        static void FireFieldTransparencyComputationStart(LPCWSTR wszFieldName,
+                                                          LPCWSTR wszModuleName,
+                                                          DWORD dwAppDomain) {};
+        static void FireFieldTransparencyComputationEnd(LPCWSTR wszFieldName,
+                                                        LPCWSTR wszModuleName,
+                                                        DWORD dwAppDomain,
+                                                        BOOL fIsCritical,
+                                                        BOOL fIsTreatAsSafe) {};
+
+        static void FireMethodTransparencyComputationStart(LPCWSTR wszMethodName,
+                                                           LPCWSTR wszModuleName,
+                                                           DWORD dwAppDomain) {};
+        static void FireMethodTransparencyComputationEnd(LPCWSTR wszMethodName,
+                                                         LPCWSTR wszModuleName,
+                                                         DWORD dwAppDomain,
+                                                         BOOL fIsCritical,
+                                                         BOOL fIsTreatAsSafe) {};
+
+        static void FireModuleTransparencyComputationStart(LPCWSTR wszModuleName, DWORD dwAppDomain) {};
+        static void FireModuleTransparencyComputationEnd(LPCWSTR wszModuleName,
+                                                         DWORD dwAppDomain,
+                                                         BOOL fIsAllCritical,
+                                                         BOOL fIsAllTransparent,
+                                                         BOOL fIsTreatAsSafe,
+                                                         BOOL fIsOpportunisticallyCritical,
+                                                         DWORD dwSecurityRuleSet) {};
+
+        static void FireTokenTransparencyComputationStart(DWORD dwToken,
+                                                          LPCWSTR wszModuleName,
+                                                          DWORD dwAppDomain) {};
+        static void FireTokenTransparencyComputationEnd(DWORD dwToken,
+                                                        LPCWSTR wszModuleName,
+                                                        DWORD dwAppDomain,
+                                                        BOOL fIsCritical,
+                                                        BOOL fIsTreatAsSafe) {};
+
+        static void FireTypeTransparencyComputationStart(LPCWSTR wszTypeName,
+                                                         LPCWSTR wszModuleName,
+                                                         DWORD dwAppDomain) {};
+        static void FireTypeTransparencyComputationEnd(LPCWSTR wszTypeName,
+                                                       LPCWSTR wszModuleName,
+                                                       DWORD dwAppDomain,
+                                                       BOOL fIsAllCritical,
+                                                       BOOL fIsAllTransparent,
+                                                       BOOL fIsCritical,
+                                                       BOOL fIsTreatAsSafe) {};
+#endif // FEATURE_EVENT_TRACE
+    };
+
+    // Class to wrap all Binder logic for ETW
+    class BinderLog
+    {
+    public:
+        typedef union _BinderStructs {
+            typedef  enum _NGENBINDREJECT_REASON { 
+                NGEN_BIND_START_BIND = 0,
+                NGEN_BIND_NO_INDEX = 1,
+                NGEN_BIND_SYSTEM_ASSEMBLY_NOT_AVAILABLE = 2,
+                NGEN_BIND_NO_NATIVE_IMAGE = 3,
+                NGEN_BIND_REJECT_CONFIG_MASK = 4,
+                NGEN_BIND_FAIL = 5,
+                NGEN_BIND_INDEX_CORRUPTION = 6,
+                NGEN_BIND_REJECT_TIMESTAMP = 7,
+                NGEN_BIND_REJECT_NATIVEIMAGE_NOT_FOUND = 8,
+                NGEN_BIND_REJECT_IL_SIG = 9,
+                NGEN_BIND_REJECT_LOADER_EVAL_FAIL = 10,
+                NGEN_BIND_MISSING_FOUND = 11,
+                NGEN_BIND_REJECT_HOSTASM = 12,
+                NGEN_BIND_REJECT_IL_NOT_FOUND = 13,
+                NGEN_BIND_REJECT_APPBASE_NOT_FILE = 14,
+                NGEN_BIND_BIND_DEPEND_REJECT_REF_DEF_MISMATCH = 15,
+                NGEN_BIND_BIND_DEPEND_REJECT_NGEN_SIG = 16,
+                NGEN_BIND_APPLY_EXTERNAL_RELOCS_FAILED = 17,
+                NGEN_BIND_SYSTEM_ASSEMBLY_NATIVEIMAGE_NOT_AVAILABLE = 18,
+                NGEN_BIND_ASSEMBLY_HAS_DIFFERENT_GRANT = 19,
+                NGEN_BIND_ASSEMBLY_NOT_DOMAIN_NEUTRAL = 20,
+                NGEN_BIND_NATIVEIMAGE_VERSION_MISMATCH = 21,
+                NGEN_BIND_LOADFROM_NOT_ALLOWED = 22,
+                NGEN_BIND_DEPENDENCY_HAS_DIFFERENT_IDENTITY = 23
+            } NGENBINDREJECT_REASON;
+        } BinderStructs;
+    };
+
+    // Class to wrap all Exception logic for ETW
+    class ExceptionLog
+    {
+    public:
+#ifdef FEATURE_EVENT_TRACE
+        static void ExceptionThrown(CrawlFrame  *pCf, BOOL bIsReThrownException, BOOL bIsNewException);
+#else
+        static void ExceptionThrown(CrawlFrame  *pCf, BOOL bIsReThrownException, BOOL bIsNewException) {};
+#endif // FEATURE_EVENT_TRACE
+        typedef union _ExceptionStructs
+        {
+            typedef enum _ExceptionThrownFlags
+            {
+                HasInnerException=0x1,
+                IsNestedException=0x2,
+                IsReThrownException=0x4,
+                IsCSE=0x8,
+                IsCLSCompliant=0x10
+            }ExceptionThrownFlags;
+        }ExceptionStructs;
+    };    
+    // Class to wrap all Contention logic for ETW
+    class ContentionLog
+    {
+    public:
+        typedef union _ContentionStructs 
+        {
+            typedef  enum _ContentionFlags { 
+                ManagedContention=0,
+                NativeContention=1
+            } ContentionFlags;
+        } ContentionStructs;
+    };    
+    // Class to wrap all Interop logic for ETW
+    class InteropLog
+    {
+    public:
+    };
+
+    // Class to wrap all Information logic for ETW
+    class InfoLog
+    {
+    public:
+        typedef union _InfoStructs 
+        {
+            typedef enum _StartupMode
+            {
+                ManagedExe=0x1,
+                HostedCLR=0x2,
+                IJW=0x4,
+                COMActivated=0x8,
+                Other=0x10
+            }StartupMode;
+
+            typedef enum _Sku
+            {
+                DesktopCLR=0x1,
+                CoreCLR=0x2
+            }Sku;
+
+            typedef enum _EtwMode
+            {
+                Normal=0x0,
+                Callback=0x1
+            }EtwMode;
+        }InfoStructs;
+
+#ifdef FEATURE_EVENT_TRACE
+        static void RuntimeInformation(INT32 type);
+#else
+        static void RuntimeInformation(INT32 type) {};
+#endif // FEATURE_EVENT_TRACE
+    };
+};
+
+
+//
+// The ONE and only ONE global instantiation of this class
+//
+extern ETW::CEtwTracer *  g_pEtwTracer;
+#define ETW_IS_TRACE_ON(level) ( FALSE ) // for fusion which is eventually going to get removed
+#define ETW_IS_FLAG_ON(flag) ( FALSE ) // for fusion which is eventually going to get removed
+
+// Commonly used constats for ETW Assembly Loader and Assembly Binder events.
+#define ETWLoadContextNotAvailable (LOADCTX_TYPE_HOSTED + 1)
+#define ETWAppDomainIdNotAvailable 0 // Valid AppDomain IDs start from 1
+
+#define ETWFieldUnused 0 // Indicates that a particular field in the ETW event payload template is currently unused.
+
+#define ETWLoaderLoadTypeNotAvailable 0 // Static or Dynamic Load is only valid at LoaderPhaseStart and LoaderPhaseEnd events - for other events, 0 indicates "not available"
+#define ETWLoaderStaticLoad 0 // Static reference load
+#define ETWLoaderDynamicLoad 1 // Dynamic assembly load
+
+#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) && !defined(WINXP_AND_WIN2K3_BUILD_SUPPORT)
+// "mc.exe -MOF" already generates this block for XP-suported builds inside ClrEtwAll.h;
+// on Vista+ builds, mc is run without -MOF, and we still have code that depends on it, so
+// we manually place it here.
+FORCEINLINE 
+BOOLEAN __stdcall
+McGenEventTracingEnabled(
+    __in PMCGEN_TRACE_CONTEXT EnableInfo,
+    __in PCEVENT_DESCRIPTOR EventDescriptor
+    )
+{
+
+    if(!EnableInfo){
+        return FALSE;
+    }
+
+
+    //
+    // Check if the event Level is lower than the level at which
+    // the channel is enabled.
+    // If the event Level is 0 or the channel is enabled at level 0,
+    // all levels are enabled.
+    //
+
+    if ((EventDescriptor->Level <= EnableInfo->Level) || // This also covers the case of Level == 0.
+        (EnableInfo->Level == 0)) {
+
+        //
+        // Check if Keyword is enabled
+        //
+
+        if ((EventDescriptor->Keyword == (ULONGLONG)0) ||
+            ((EventDescriptor->Keyword & EnableInfo->MatchAnyKeyword) &&
+             ((EventDescriptor->Keyword & EnableInfo->MatchAllKeyword) == EnableInfo->MatchAllKeyword))) {
+            return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+#endif // defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) && !defined(WINXP_AND_WIN2K3_BUILD_SUPPORT)
+
+
+#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL)
+ETW_INLINE
+ULONG
+ETW::SamplingLog::SendStackTrace(
+    MCGEN_TRACE_CONTEXT TraceContext,
+    PCEVENT_DESCRIPTOR Descriptor,
+    LPCGUID EventGuid)
+{
+#define ARGUMENT_COUNT_CLRStackWalk 5
+    ULONG Result = ERROR_SUCCESS;
+typedef struct _MCGEN_TRACE_BUFFER {
+    EVENT_TRACE_HEADER Header;
+    EVENT_DATA_DESCRIPTOR EventData[ARGUMENT_COUNT_CLRStackWalk];
+} MCGEN_TRACE_BUFFER;
+
+    REGHANDLE RegHandle = TraceContext.RegistrationHandle;
+    if(!TraceContext.IsEnabled || !McGenEventTracingEnabled(&TraceContext, Descriptor))
+    {
+        return Result;
+    }
+
+    PVOID *Stack = NULL;
+    UINT32 FrameCount = 0;
+    ETW::SamplingLog stackObj;
+    if(stackObj.GetCurrentThreadsCallStack(&FrameCount, &Stack) == ETW::SamplingLog::Completed)
+    {
+        UCHAR Reserved1=0, Reserved2=0;
+        UINT16 ClrInstanceId = GetClrInstanceId();
+        MCGEN_TRACE_BUFFER TraceBuf;
+        PEVENT_DATA_DESCRIPTOR EventData = TraceBuf.EventData;
+
+        EventDataDescCreate(&EventData[0], &ClrInstanceId, sizeof(const UINT16)  );
+
+        EventDataDescCreate(&EventData[1], &Reserved1, sizeof(const UCHAR)  );
+
+        EventDataDescCreate(&EventData[2], &Reserved2, sizeof(const UCHAR)  );
+
+        EventDataDescCreate(&EventData[3], &FrameCount, sizeof(const unsigned int)  );
+
+        EventDataDescCreate(&EventData[4], Stack, sizeof(PVOID) * FrameCount );
+
+#ifdef WINXP_AND_WIN2K3_BUILD_SUPPORT
+        if (!McGenPreVista) 
+        {
+            return PfnEventWrite(RegHandle, Descriptor, ARGUMENT_COUNT_CLRStackWalk, EventData);
+        } 
+        else 
+        {
+            const MCGEN_TRACE_CONTEXT* Context = (const MCGEN_TRACE_CONTEXT*)(ULONG_PTR)RegHandle;
+            //
+            // Fill in header fields
+            //
+
+            TraceBuf.Header.GuidPtr = (ULONGLONG)EventGuid;
+            TraceBuf.Header.Flags = WNODE_FLAG_TRACED_GUID |WNODE_FLAG_USE_GUID_PTR|WNODE_FLAG_USE_MOF_PTR;
+            TraceBuf.Header.Class.Version = (SHORT)Descriptor->Version;
+            TraceBuf.Header.Class.Level = Descriptor->Level;
+            TraceBuf.Header.Class.Type = Descriptor->Opcode;
+            TraceBuf.Header.Size = sizeof(MCGEN_TRACE_BUFFER);
+
+            return TraceEvent(Context->Logger, &TraceBuf.Header);
+        }
+#else // !WINXP_AND_WIN2K3_BUILD_SUPPORT
+        return EventWrite(RegHandle, Descriptor, ARGUMENT_COUNT_CLRStackWalk, EventData);
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+    }
+    return Result;
+};
+
+#endif // FEATURE_EVENT_TRACE && !FEATURE_PAL
+
+#ifdef FEATURE_EVENT_TRACE
+#ifdef TARGET_X86
+struct CallStackFrame
+{
+    struct CallStackFrame* m_Next;
+    SIZE_T m_ReturnAddress;
+};
+#endif // TARGET_X86
+#endif // FEATURE_EVENT_TRACE
+
+#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL)
+FORCEINLINE 
+BOOLEAN __stdcall
+McGenEventProviderEnabled(
+    __in PMCGEN_TRACE_CONTEXT Context,
+    __in UCHAR Level,
+    __in ULONGLONG Keyword
+    )
+{
+    if(!Context) {
+        return FALSE;
+    }
+
+#ifdef WINXP_AND_WIN2K3_BUILD_SUPPORT
+    if(McGenPreVista){
+        return ( ((Level <= Context->Level) || (Context->Level == 0)) &&
+                 (((ULONG)(Keyword & 0xFFFFFFFF) == 0) || ((ULONG)(Keyword & 0xFFFFFFFF) & Context->Flags)));
+    }
+#endif // WINXP_AND_WIN2K3_BUILD_SUPPORT
+
+    //
+    // Check if the event Level is lower than the level at which
+    // the channel is enabled.
+    // If the event Level is 0 or the channel is enabled at level 0,
+    // all levels are enabled.
+    //
+
+    if ((Level <= Context->Level) || // This also covers the case of Level == 0.
+        (Context->Level == 0)) {
+
+        //
+        // Check if Keyword is enabled
+        //
+
+        if ((Keyword == (ULONGLONG)0) ||
+            ((Keyword & Context->MatchAnyKeyword) &&
+             ((Keyword & Context->MatchAllKeyword) == Context->MatchAllKeyword))) {
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
+#endif // FEATURE_EVENT_TRACE && !FEATURE_PAL
+
+#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL)
+
+// This macro only checks if a provider is enabled
+// It does not check the flags and keywords for which it is enabled
+#define ETW_PROVIDER_ENABLED(ProviderSymbol)                 \
+        ProviderSymbol##_Context.IsEnabled
+
+#define FireEtwGCPerHeapHistorySpecial(DataPerHeap, DataSize, ClrInstanceId)\
+        MCGEN_ENABLE_CHECK(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, GCPerHeapHistory) ?\
+        Etw_GCDataPerHeapSpecial(&GCPerHeapHistory, &GarbageCollectionPrivateId, DataPerHeap, DataSize, ClrInstanceId)\
+        : ERROR_SUCCESS\
+
+// The GC uses this macro around its heap walk so the TypeSystemLog's crst can be held
+// for the duration of the walk (if the ETW client has requested type information).
+#define ETW_HEAP_WALK_HOLDER(__fShouldWalkHeapRootsForEtw, __fShouldWalkHeapObjectsForEtw) \
+    CrstHolderWithState __crstHolderWithState(ETW::TypeSystemLog::GetHashCrst(), ((__fShouldWalkHeapRootsForEtw) || (__fShouldWalkHeapObjectsForEtw)))
+
+#else
+
+// For ETM, we rely on DTrace to do the checking
+#define ETW_PROVIDER_ENABLED(ProviderSymbol) TRUE
+#define FireEtwGCPerHeapHistorySpecial(DataPerHeap, DataSize, ClrInstanceId) 0
+
+#endif // FEATURE_EVENT_TRACE && !FEATURE_PAL
+
+#endif // !FEATURE_REDHAWK
+// These parts of the ETW namespace are common for both FEATURE_REDHAWK and
+// !FEATURE_REDHAWK builds.
+
+
+struct ProfilingScanContext;
+struct ProfilerWalkHeapContext;
+class Object;
+
+namespace ETW
+{
+    // Class to wrap the logging of threads (runtime and rundown providers)
+    class ThreadLog
+    {
+    private:
+        static DWORD GetEtwThreadFlags(Thread * pThread);
+
+    public:
+        static void FireThreadCreated(Thread * pThread);
+        static void FireThreadDC(Thread * pThread);
+    };
+};
+
+#ifndef FEATURE_REDHAWK
+
+#ifdef FEATURE_EVENT_TRACE
+
+//
+// Use this macro at the least before calling the Event Macros
+//
+
+#define ETW_TRACING_INITIALIZED(RegHandle) \
+    (g_pEtwTracer && RegHandle)
+
+//
+// Use this macro to check if an event is enabled
+// if the fields in the event are not cheap to calculate
+//
+#define ETW_EVENT_ENABLED(Context, EventDescriptor) \
+    (MCGEN_ENABLE_CHECK(Context, EventDescriptor))
+
+//
+// Use this macro to check if a category of events is enabled
+// 
+
+#define ETW_CATEGORY_ENABLED(Context, Level, Keyword) \
+    (Context.IsEnabled && McGenEventProviderEnabled(&Context, Level, Keyword))
+
+
+
+//
+// Special Handling of Startup events
+//
+
+#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) && !defined(WINXP_AND_WIN2K3_BUILD_SUPPORT)
+// "mc.exe -MOF" already generates this block for XP-suported builds inside ClrEtwAll.h;
+// on Vista+ builds, mc is run without -MOF, and we still have code that depends on it, so
+// we manually place it here.
+ETW_INLINE
+ULONG
+CoMofTemplate_h(
+    __in REGHANDLE RegHandle,
+    __in PCEVENT_DESCRIPTOR Descriptor,
+    __in_opt LPCGUID EventGuid,
+    __in const unsigned short  ClrInstanceID
+    )
+{
+#define ARGUMENT_COUNT_h 1
+    ULONG Error = ERROR_SUCCESS;
+typedef struct _MCGEN_TRACE_BUFFER {
+    EVENT_TRACE_HEADER Header;
+    EVENT_DATA_DESCRIPTOR EventData[ARGUMENT_COUNT_h];
+} MCGEN_TRACE_BUFFER;
+
+    MCGEN_TRACE_BUFFER TraceBuf;
+    PEVENT_DATA_DESCRIPTOR EventData = TraceBuf.EventData;
+
+    EventDataDescCreate(&EventData[0], &ClrInstanceID, sizeof(const unsigned short)  );
+
+
+  {
+    Error = EventWrite(RegHandle, Descriptor, ARGUMENT_COUNT_h, EventData);
+
+  }
+
+#ifdef MCGEN_CALLOUT
+MCGEN_CALLOUT(RegHandle,
+              Descriptor,
+              ARGUMENT_COUNT_h,
+              EventData);
+#endif
+
+    return Error;
+}
+#endif // defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_PAL) && !defined(WINXP_AND_WIN2K3_BUILD_SUPPORT)
+
+class ETWTraceStartup {
+#ifndef FEATURE_DTRACE
+    REGHANDLE TraceHandle;
+    PCEVENT_DESCRIPTOR EventStartDescriptor;
+    LPCGUID EventStartGuid;
+    PCEVENT_DESCRIPTOR EventEndDescriptor;
+    LPCGUID EventEndGuid;
+public:
+    ETWTraceStartup(REGHANDLE _TraceHandle, PCEVENT_DESCRIPTOR _EventStartDescriptor, LPCGUID _EventStartGuid, PCEVENT_DESCRIPTOR _EventEndDescriptor, LPCGUID _EventEndGuid) {
+        TraceHandle = _TraceHandle;
+        EventStartDescriptor = _EventStartDescriptor;
+        EventEndDescriptor = _EventEndDescriptor;
+        EventStartGuid = _EventStartGuid;
+        EventEndGuid = _EventEndGuid;
+        StartupTraceEvent(TraceHandle, EventStartDescriptor, EventStartGuid);
+    }
+    ~ETWTraceStartup() {
+        StartupTraceEvent(TraceHandle, EventEndDescriptor, EventEndGuid);
+    }
+    static void StartupTraceEvent(REGHANDLE _TraceHandle, PCEVENT_DESCRIPTOR _EventDescriptor, LPCGUID _EventGuid) {
+        EVENT_DESCRIPTOR desc = *_EventDescriptor;
+        if(ETW_TRACING_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, desc))
+        {
+#ifndef FEATURE_PAL
+            CoMofTemplate_h(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context.RegistrationHandle, _EventDescriptor, _EventGuid, GetClrInstanceId());
+#endif // !FEATURE_PAL
+        }
+    }
+#else //!FEATURE_DTRACE
+    void (*startFP)();
+    void (*endFP)();
+public:
+    ETWTraceStartup(void (*sFP)(), void (*eFP)()) : startFP (sFP), endFP(eFP) {
+          (*startFP)();
+    } 
+    ~ETWTraceStartup() {
+          (*endFP)();
+    }
+#endif //!FEATURE_DTRACE
+};
+
+
+
+#else // FEATURE_EVENT_TRACE
+
+#define ETWOnStartup(StartEventName, EndEventName)
+#define ETWFireEvent(EventName)
+
+// Use this macro at the least before calling the Event Macros
+#define ETW_TRACING_INITIALIZED(RegHandle) (FALSE)
+
+// Use this macro to check if an event is enabled
+// if the fields in the event are not cheap to calculate
+#define ETW_EVENT_ENABLED(Context, EventDescriptor) (FALSE)
+
+// Use this macro to check if a category of events is enabled
+#define ETW_CATEGORY_ENABLED(Context, Level, Keyword) (FALSE)
+
+// Use this macro to check if ETW is initialized and the event is enabled
+#define ETW_TRACING_ENABLED(Context, EventDescriptor) (FALSE)
+
+// Use this macro to check if ETW is initialized and the category is enabled
+#define ETW_TRACING_CATEGORY_ENABLED(Context, Level, Keyword) (FALSE)
+
+#endif // FEATURE_EVENT_TRACE  
+
+#endif // FEATURE_REDHAWK
+
+#endif //_ETWTRACER_HXX_
diff --git a/src/coreclr/src/nativeaot/Runtime/eventtracepriv.h b/src/coreclr/src/nativeaot/Runtime/eventtracepriv.h
new file mode 100644
index 0000000000000..c97eb9fc37f3a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/eventtracepriv.h
@@ -0,0 +1,213 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+// File: eventtracepriv.h
+// 
+// Contains some private definitions used by eventrace.cpp, but that aren't needed by
+// clients of eventtrace.cpp, and thus don't belong in eventtrace.h. Also, since
+// inclusions of this file are tightly controlled (basically just by eventtrace.cpp), we
+// can assume some classes are defined that aren't necessarily defined when eventtrace.h
+// is #included (e.g., StackSString and StackSArray).
+//
+// ============================================================================
+
+#ifndef __EVENTTRACEPRIV_H__
+#define __EVENTTRACEPRIV_H__
+
+#ifdef FEATURE_REDHAWK
+#include "holder.h"
+#endif // FEATURE_REDHAWK
+
+#ifndef _countof
+#define _countof(_array) (sizeof(_array)/sizeof(_array[0]))
+#endif
+
+const UINT cbMaxEtwEvent = 64 * 1024;
+
+//---------------------------------------------------------------------------------------
+// C++ copies of ETW structures
+//---------------------------------------------------------------------------------------
+
+// !!!!!!! NOTE !!!!!!!!
+// The EventStruct* structs are described in the ETW manifest event templates, and the
+// LAYOUT MUST MATCH THE MANIFEST EXACTLY!
+// !!!!!!! NOTE !!!!!!!!
+
+#pragma pack(push, 1)
+
+struct EventStructGCBulkRootEdgeValue
+{
+    LPVOID RootedNodeAddress;
+    BYTE GCRootKind;
+    DWORD GCRootFlag;
+    LPVOID GCRootID;
+};
+
+struct EventStructGCBulkRootConditionalWeakTableElementEdgeValue
+{
+    LPVOID GCKeyNodeID;
+    LPVOID GCValueNodeID;
+    LPVOID GCRootID;
+};
+
+struct EventStructGCBulkNodeValue
+{
+    LPVOID Address;
+    ULONGLONG Size;
+    ULONGLONG TypeID;
+    ULONGLONG EdgeCount;
+};
+
+struct EventStructGCBulkEdgeValue
+{
+    LPVOID Value;
+    ULONG ReferencingFieldID;
+};
+
+struct EventStructGCBulkSurvivingObjectRangesValue
+{
+    LPVOID RangeBase;
+    ULONGLONG RangeLength;
+};
+
+struct EventStructGCBulkMovedObjectRangesValue
+{
+    LPVOID OldRangeBase;
+    LPVOID NewRangeBase;
+    ULONGLONG RangeLength;
+};
+
+// This only contains the fixed-size data at the top of each struct in
+// the bulk type event.  These fields must still match exactly the initial
+// fields of the struct described in the manifest.
+struct EventStructBulkTypeFixedSizedData
+{
+    ULONGLONG TypeID;
+    ULONGLONG ModuleID;
+    ULONG TypeNameID;
+    ULONG Flags;
+    BYTE CorElementType;
+};
+
+#pragma pack(pop)
+
+
+
+// Represents one instance of the Value struct inside a single BulkType event
+class BulkTypeValue
+{
+public:
+    BulkTypeValue();
+    void Clear();
+
+    // How many bytes will this BulkTypeValue take up when written into the actual ETW
+    // event?
+    int GetByteCountInEvent()
+    {
+        return 
+            sizeof(fixedSizedData) +
+            sizeof(cTypeParameters) +
+#ifdef FEATURE_REDHAWK
+            sizeof(WCHAR) +                                 // No name in event, so just the null terminator
+            cTypeParameters * sizeof(ULONGLONG);            // Type parameters
+#else
+            (sName.GetCount() + 1) * sizeof(WCHAR) +        // Size of name, including null terminator
+            rgTypeParameters.GetCount() * sizeof(ULONGLONG);// Type parameters
+#endif
+    }
+
+    EventStructBulkTypeFixedSizedData fixedSizedData;
+
+    // Below are the remainder of each struct in the bulk type event (i.e., the
+    // variable-sized data). The var-sized fields are copied into the event individually
+    // (not directly), so they don't need to have the same layout as in the ETW manifest
+
+    // This is really a denorm of the size already stored in rgTypeParameters, but we
+    // need a persistent place to stash this away so EventDataDescCreate & EventWrite
+    // have a reliable place to copy it from.  This is filled in at the last minute,
+    // when sending the event.  (On ProjectN, which doesn't have StackSArray, this is
+    // filled in earlier and used in more places.)
+    ULONG cTypeParameters;
+
+#ifdef FEATURE_REDHAWK
+    // If > 1 type parameter, this is an array of their EEType*'s
+    NewArrayHolder<ULONGLONG> rgTypeParameters;
+    
+    // If exactly one type parameter, this is its EEType*.  (If != 1 type parameter,
+    // this is 0.)
+    ULONGLONG ullSingleTypeParameter;
+#else   // FEATURE_REDHAWK
+    StackSString sName;
+    StackSArray<ULONGLONG> rgTypeParameters;
+#endif // FEATURE_REDHAWK
+};
+
+// Encapsulates all the type event batching we need to do. This is used by
+// ETW::TypeSystemLog, which calls LogTypeAndParameters for each type to be logged.
+// BulkTypeEventLogger will batch each type and its generic type parameters, and flush to
+// ETW as necessary. ETW::TypeSystemLog also calls FireBulkTypeEvent directly to force a
+// flush (e.g., once at end of GC heap traversal, or on each object allocation).
+class BulkTypeEventLogger
+{
+private:
+
+    // Estimate of how many bytes we can squeeze in the event data for the value struct
+    // array.  (Intentionally overestimate the size of the non-array parts to keep it safe.)
+    static const int kMaxBytesTypeValues = (cbMaxEtwEvent - 0x30);
+
+    // Estimate of how many type value elements we can put into the struct array, while
+    // staying under the ETW event size limit. Note that this is impossible to calculate
+    // perfectly, since each element of the struct array has variable size.
+    // 
+    // In addition to the byte-size limit per event, Windows always forces on us a
+    // max-number-of-descriptors per event, which in the case of BulkType, will kick in
+    // far sooner. There's a max number of 128 descriptors allowed per event. 2 are used
+    // for Count + ClrInstanceID. Then 4 per batched value. (Might actually be 3 if there
+    // are no type parameters to log, but let's overestimate at 4 per value).
+    static const int kMaxCountTypeValues = (128 - 2) / 4;
+    // Note: This results in a relatively small batch (about 31 types per event). We
+    // could increase this substantially by creating a single, contiguous buffer, which
+    // would let us max out the number of type values to batch by allowing the byte-size
+    // limit to kick in before the max-descriptor limit. We could esimate that as
+    // follows:
+    // 
+    //     static const int kMaxCountTypeValues = kMaxBytesTypeValues / 
+    //        (sizeof(EventStructBulkTypeFixedSizedData) +
+    //         200 * sizeof(WCHAR) +       // Assume 199 + 1 terminating-NULL character in type name
+    //         sizeof(UINT) +              // Type parameter count
+    //         10 * sizeof(ULONGLONG));    // Assume 10 type parameters
+    // 
+    // The downside, though, is that we would have to do a lot more copying to fill out
+    // that buffer before sending the event. It's unclear that increasing the batch size
+    // is enough of a win to offset all the extra buffer copying. So for now, we'll keep
+    // the batch size low and avoid extra copying.
+
+    // How many types have we batched?
+    int m_nBulkTypeValueCount;
+    
+    // What is the byte size of all the types we've batched?
+    int m_nBulkTypeValueByteCount;
+    
+    // List of types we've batched.
+    BulkTypeValue m_rgBulkTypeValues[kMaxCountTypeValues];
+
+#ifdef FEATURE_REDHAWK
+    int LogSingleType(EEType * pEEType);
+#else
+    int LogSingleType(TypeHandle th);
+#endif
+
+public:
+    BulkTypeEventLogger() :
+        m_nBulkTypeValueCount(0),
+        m_nBulkTypeValueByteCount(0)
+    {
+        LIMITED_METHOD_CONTRACT;
+    }
+
+    void LogTypeAndParameters(ULONGLONG thAsAddr, ETW::TypeSystemLog::TypeLogBehavior typeLogBehavior);
+    void FireBulkTypeEvent();
+    void Cleanup();
+};
+
+#endif // __EVENTTRACEPRIV_H__
diff --git a/src/coreclr/src/nativeaot/Runtime/forward_declarations.h b/src/coreclr/src/nativeaot/Runtime/forward_declarations.h
new file mode 100644
index 0000000000000..4fa221acff928
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/forward_declarations.h
@@ -0,0 +1,52 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// This file may be included by header files to forward declare common
+// public types. The intent here is that .CPP files should need to
+// include fewer header files.
+
+#define FWD_DECL(x)             \
+    class x;                    \
+    typedef DPTR(x) PTR_##x;
+
+// rtu
+FWD_DECL(AllocHeap)
+FWD_DECL(CObjectHeader)
+FWD_DECL(CLREventStatic)
+FWD_DECL(CrstHolder)
+FWD_DECL(CrstStatic)
+FWD_DECL(EEMethodInfo)
+FWD_DECL(EECodeManager)
+FWD_DECL(EEThreadId)
+FWD_DECL(MethodInfo)
+FWD_DECL(Module)
+FWD_DECL(Object)
+FWD_DECL(OBJECTHANDLEHolder)
+FWD_DECL(PageEntry)
+FWD_DECL(PAL_EnterHolder)
+FWD_DECL(PAL_LeaveHolder)
+FWD_DECL(SpinLock)
+FWD_DECL(RCOBJECTHANDLEHolder)
+FWD_DECL(RedhawkGCInterface)
+FWD_DECL(RtuObjectRef)
+FWD_DECL(RuntimeInstance)
+FWD_DECL(StackFrameIterator)
+FWD_DECL(SyncClean)
+FWD_DECL(SyncState)
+FWD_DECL(Thread)
+FWD_DECL(ThreadStore)
+
+#ifdef FEATURE_RWX_MEMORY
+namespace rh { 
+    namespace util {
+        FWD_DECL(MemRange)
+        FWD_DECL(MemAccessMgr)
+        FWD_DECL(WriteAccessHolder)
+    }
+}
+#endif // FEATURE_RWX_MEMORY
+
+// inc
+FWD_DECL(EEInterfaceInfo)
+FWD_DECL(EEType)
+
diff --git a/src/coreclr/src/nativeaot/Runtime/gcdump.cpp b/src/coreclr/src/nativeaot/Runtime/gcdump.cpp
new file mode 100644
index 0000000000000..5bf25316cbc5f
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/gcdump.cpp
@@ -0,0 +1,709 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+/*****************************************************************************
+ *                                  GCDump.cpp
+ *
+ * Defines functions to display the GCInfo as defined by the GC-encoding 
+ * spec. The GC information may be either dynamically created by a 
+ * Just-In-Time compiler conforming to the standard code-manager spec,
+ * or may be persisted by a managed native code compiler conforming
+ * to the standard code-manager spec.
+ */
+#include "common.h"
+
+#if (defined(_DEBUG) || defined(DACCESS_COMPILE))
+
+#include "gcenv.h"
+#include "varint.h"
+#include "gcinfo.h"
+#include "gcdump.h"
+
+/*****************************************************************************/
+
+#ifdef DACCESS_COMPILE
+static void DacNullPrintf(const char* , ...) {}
+#endif
+
+GCDump::GCDump()
+{
+#ifndef DACCESS_COMPILE
+    // By default, use the standard printf function to dump 
+    GCDump::gcPrintf = (printfFtn) ::printf;
+#else
+    // Default for DAC is a no-op.
+    GCDump::gcPrintf = DacNullPrintf;
+#endif
+}
+
+
+
+/*****************************************************************************/
+
+static const char * const calleeSaveRegMaskBitNumberToName[] = 
+{
+#if defined(TARGET_X86)
+    "EBX",
+    "ESI",
+    "EDI",
+    "EBP",
+#elif defined(TARGET_AMD64)
+    "RBX",
+    "RSI",
+    "RDI",
+    "RBP",
+    "R12",
+    "R13",
+    "R14",
+    "R15"
+#elif defined(TARGET_ARM)
+    "R4",
+    "R5",
+    "R6",
+    "R7",
+    "R8",
+    "R9",
+    "R10",
+    "R11",
+    "LR",
+#elif defined(TARGET_ARM64)
+    "LR",
+    "X19",
+    "X20",
+    "X21",
+    "X22",
+    "X23",
+    "X24",
+    "X25",
+    "X26",
+    "X27",
+    "X28",
+    "FP",
+#else
+#error unknown architecture
+#endif
+};
+
+char const * GetReturnKindString(GCInfoHeader::MethodReturnKind returnKind)
+{
+    switch (returnKind)
+    {
+    case GCInfoHeader::MRK_ReturnsScalar:   return "scalar";
+    case GCInfoHeader::MRK_ReturnsObject:   return "object";
+    case GCInfoHeader::MRK_ReturnsByref:    return "byref";
+    case GCInfoHeader::MRK_ReturnsToNative: return "native";
+#if defined(TARGET_ARM64)
+    case GCInfoHeader::MRK_Scalar_Obj:      return "{scalar, object}";
+    case GCInfoHeader::MRK_Obj_Obj:         return "{object, object}";
+    case GCInfoHeader::MRK_Byref_Obj:       return "{byref, object}";
+    case GCInfoHeader::MRK_Scalar_Byref:    return "{scalar, byref}";
+    case GCInfoHeader::MRK_Obj_Byref:       return "{object, byref}";
+    case GCInfoHeader::MRK_Byref_Byref:     return "{byref, byref}";
+#endif // defined(TARGET_ARM64)
+    default:                                return "???";
+    }
+}
+
+char const * GetFramePointerRegister()
+{
+#if defined(TARGET_X86)
+    return "EBP";
+#elif defined(TARGET_AMD64)
+    return "RBP";
+#elif defined(TARGET_ARM)
+    return "R7";
+#elif defined(TARGET_ARM64)
+    return "FP";
+#else
+#error unknown architecture
+#endif
+}
+
+char const * GetStackPointerRegister()
+{
+#if defined(TARGET_X86)
+    return "ESP";
+#elif defined(TARGET_AMD64)
+    return "RSP";
+#elif defined(TARGET_ARM) || defined(TARGET_ARM64)
+    return "SP";
+#else
+#error unknown architecture
+#endif
+}
+
+size_t FASTCALL   GCDump::DumpInfoHeader (PTR_UInt8      gcInfo,
+                                          Tables *       pTables,
+                                          GCInfoHeader * pHeader         /* OUT */
+                                          )
+{
+    size_t    headerSize = 0;
+    PTR_UInt8 gcInfoStart = gcInfo;
+    PTR_UInt8 pbStackChanges = 0;
+    PTR_UInt8 pbUnwindInfo = 0;
+
+    unsigned unwindInfoBlobOffset = VarInt::ReadUnsigned(gcInfo);
+    bool    inlineUnwindInfo = (unwindInfoBlobOffset == 0);
+
+    if (inlineUnwindInfo)
+    {
+        // it is inline..
+        pbUnwindInfo = gcInfo;
+    }
+    else
+    {
+        // The offset was adjusted by 1 to reserve the 0 encoding for the inline case, so we re-adjust it to
+        // the actual offset here.
+        pbUnwindInfo = pTables->pbUnwindInfoBlob + unwindInfoBlobOffset - 1;
+    }
+
+    // @TODO: decode all funclet headers as well.
+    pbStackChanges = pHeader->DecodeHeader(0, pbUnwindInfo, &headerSize );
+
+    if (inlineUnwindInfo)
+        gcInfo += headerSize;
+
+    unsigned epilogCount = pHeader->GetEpilogCount();
+    bool     epilogAtEnd = pHeader->IsEpilogAtEnd();
+
+    gcPrintf("   prologSize:     %d\n", pHeader->GetPrologSize());
+    if (pHeader->HasVaryingEpilogSizes())
+        gcPrintf("   epilogSize:     (varies)\n");
+    else
+        gcPrintf("   epilogSize:     %d\n", pHeader->GetFixedEpilogSize());
+
+    gcPrintf("   epilogCount:    %d %s\n", epilogCount, epilogAtEnd ? "[end]" : "");
+    gcPrintf("   returnKind:     %s\n", GetReturnKindString(pHeader->GetReturnKind()));
+    gcPrintf("   frameKind:      %s", pHeader->HasFramePointer() ? GetFramePointerRegister() : GetStackPointerRegister());
+#ifdef TARGET_AMD64
+    if (pHeader->HasFramePointer())
+        gcPrintf(" offset: %d", pHeader->GetFramePointerOffset());
+#endif // HOST_AMD64
+    gcPrintf("\n");
+    gcPrintf("   frameSize:      %d\n", pHeader->GetFrameSize());
+
+    if (pHeader->HasDynamicAlignment()) {
+        gcPrintf("   alignment:      %d\n", (1 << pHeader->GetDynamicAlignment()));
+        if (pHeader->GetParamPointerReg() != RN_NONE) {
+            gcPrintf("   paramReg:       %d\n", pHeader->GetParamPointerReg());
+        }
+    }
+
+    gcPrintf("   savedRegs:      ");
+    CalleeSavedRegMask savedRegs = pHeader->GetSavedRegs();
+    CalleeSavedRegMask mask = (CalleeSavedRegMask) 1;
+    for (int i = 0; i < RBM_CALLEE_SAVED_REG_COUNT; i++)
+    {
+        if (savedRegs & mask)
+        {
+            gcPrintf("%s ", calleeSaveRegMaskBitNumberToName[i]);
+        }
+        mask = (CalleeSavedRegMask)(mask << 1);
+    }
+    gcPrintf("\n");
+
+#ifdef TARGET_ARM
+    gcPrintf("   parmRegsPushedCount: %d\n", pHeader->ParmRegsPushedCount());
+#endif
+
+#ifdef TARGET_X86
+    gcPrintf("   returnPopSize:  %d\n", pHeader->GetReturnPopSize());
+    if (pHeader->HasStackChanges())
+    {
+        // @TODO: need to read the stack changes string that follows
+        ASSERT(!"NYI -- stack changes for ESP frames");
+    }
+#endif
+
+    if (pHeader->ReturnsToNative())
+    {
+        gcPrintf("   reversePinvokeFrameOffset: 0x%02x\n", pHeader->GetReversePinvokeFrameOffset());
+    }
+
+
+    if (!epilogAtEnd && !pHeader->IsFunclet())
+    {
+        gcPrintf("   epilog offsets: ");
+        unsigned previousOffset = 0;
+        for (unsigned idx = 0; idx < epilogCount; idx++)
+        {
+            unsigned newOffset = previousOffset + VarInt::ReadUnsigned(gcInfo);
+            gcPrintf("0x%04x ", newOffset);
+            if (pHeader->HasVaryingEpilogSizes())
+                gcPrintf("(%u bytes) ", VarInt::ReadUnsigned(gcInfo));
+            previousOffset = newOffset;
+        }
+        gcPrintf("\n");
+    }
+
+    return gcInfo - gcInfoStart;
+}
+
+// TODO: Can we unify this code with ReportLocalSlot in RHCodeMan.cpp?
+void GCDump::PrintLocalSlot(UInt32 slotNum, GCInfoHeader const * pHeader)
+{
+    char const * baseReg;
+    Int32 offset;
+
+    if (pHeader->HasFramePointer())
+    {
+        baseReg = GetFramePointerRegister();
+#ifdef TARGET_ARM
+        offset = pHeader->GetFrameSize() - ((slotNum + 1) * POINTER_SIZE);
+#elif defined(TARGET_ARM64)
+        if (pHeader->AreFPLROnTop())
+        {
+            offset = -(Int32)((slotNum + 1) * POINTER_SIZE);
+        }
+        else
+        {
+            offset = (slotNum + 2) * POINTER_SIZE;
+        }
+#elif defined(TARGET_X86)
+        offset = -pHeader->GetPreservedRegsSaveSize() - (slotNum * POINTER_SIZE);
+#elif defined(TARGET_AMD64)
+        if (pHeader->GetFramePointerOffset() == 0)
+        {
+            offset = -pHeader->GetPreservedRegsSaveSize() - (slotNum * POINTER_SIZE);
+        }
+        else
+        {
+            offset = (slotNum * POINTER_SIZE);
+        }
+#else
+#error unknown architecture
+#endif
+    }
+    else
+    {
+        baseReg = GetStackPointerRegister();
+        offset = pHeader->GetFrameSize() - ((slotNum + 1) * POINTER_SIZE);
+    }
+
+    char const * sign = "+";
+    if (offset < 0)
+    {
+        sign = "-";
+        offset = -offset;
+    }
+    gcPrintf("local slot 0n%d, [%s%s%02X]\n", slotNum, baseReg, sign, offset);
+}
+
+// Reads a 7-bit-encoded register mask:
+// - 0RRRRRRR for non-ARM64 registers and { x0-x6 } ARM64 registers
+// - 1RRRRRRR 0RRRRRRR for { x0-x13 } ARM64 registers
+// - 1RRRRRRR 1RRRRRRR 000RRRRR for { x0-x15, xip0, xip1, lr } ARM64 registers
+// Returns the number of bytes read.
+size_t ReadRegisterMaskBy7Bit(PTR_UInt8 pCursor, UInt32* pMask)
+{
+    UInt32 byte0 = *pCursor;
+    if (!(byte0 & 0x80))
+    {
+        *pMask = byte0;
+        return 1;
+    }
+
+#if defined(TARGET_ARM64)
+    UInt32 byte1 = *(pCursor + 1);
+    if (!(byte1 & 0x80))
+    {
+        // XOR with 0x80 discards the most significant bit of byte0
+        *pMask = (byte1 << 7) ^ byte0 ^ 0x80;
+        return 2;
+    }
+
+    UInt32 byte2 = *(pCursor + 2);
+    if (!(byte2 & 0x80))
+    {
+        // XOR with 0x4080 discards the most significant bits of byte0 and byte1
+        *pMask = (byte2 << 14) ^ (byte1 << 7) ^ byte0 ^ 0x4080;
+        return 3;
+    }
+#endif
+
+    UNREACHABLE_MSG("Register mask is too long");
+}
+
+void GCDump::DumpCallsiteString(UInt32 callsiteOffset, PTR_UInt8 pbCallsiteString,
+                                GCInfoHeader const * pHeader)
+{
+    gcPrintf("%04x: ", callsiteOffset);
+
+    int count = 0;
+    UInt8 b;
+    PTR_UInt8 pCursor = pbCallsiteString;
+
+    bool last = false;
+    bool first = true;
+
+    do
+    {
+        if (!first)
+            gcPrintf("      ");
+
+        first = false;
+
+        b = *pCursor++;
+        last = ((b & 0x20) == 0x20);
+
+        switch (b & 0xC0)
+        {
+        case 0x00:
+            {
+                // case 2 -- "register set"
+                gcPrintf("%02x          | 2  ", b);
+#ifdef TARGET_ARM
+                if (b & CSR_MASK_R4) { gcPrintf("R4 "); count++; }
+                if (b & CSR_MASK_R5) { gcPrintf("R5 "); count++; }
+                if (b & CSR_MASK_R6) { gcPrintf("R6 "); count++; }
+                if (b & CSR_MASK_R7) { gcPrintf("R7 "); count++; }
+                if (b & CSR_MASK_R8) { gcPrintf("R8 "); count++; }
+#elif defined(TARGET_ARM64)
+                UInt16 regs = (b & 0xF);
+                if (b & 0x10) { regs |= (*pCursor++ << 4); }
+
+                ASSERT(!(regs & CSR_MASK_LR));
+                if (regs & CSR_MASK_X19) { gcPrintf("X19 "); count++; }
+                if (regs & CSR_MASK_X20) { gcPrintf("X20 "); count++; }
+                if (regs & CSR_MASK_X21) { gcPrintf("X21 "); count++; }
+                if (regs & CSR_MASK_X22) { gcPrintf("X22 "); count++; }
+                if (regs & CSR_MASK_X23) { gcPrintf("X23 "); count++; }
+                if (regs & CSR_MASK_X24) { gcPrintf("X24 "); count++; }
+                if (regs & CSR_MASK_X25) { gcPrintf("X25 "); count++; }
+                if (regs & CSR_MASK_X26) { gcPrintf("X26 "); count++; }
+                if (regs & CSR_MASK_X27) { gcPrintf("X27 "); count++; }
+                if (regs & CSR_MASK_X28) { gcPrintf("X28 "); count++; }
+                if (regs & CSR_MASK_FP ) { gcPrintf("FP " ); count++; }
+#elif defined(TARGET_AMD64)
+                if (b & CSR_MASK_RBX) { gcPrintf("RBX "); count++; }
+                if (b & CSR_MASK_RSI) { gcPrintf("RSI "); count++; }
+                if (b & CSR_MASK_RDI) { gcPrintf("RDI "); count++; }
+                if (b & CSR_MASK_RBP) { gcPrintf("RBP "); count++; }
+                if (b & CSR_MASK_R12) { gcPrintf("R12 "); count++; }
+#elif defined(TARGET_X86)
+                if (b & CSR_MASK_RBX) { gcPrintf("EBX "); count++; }
+                if (b & CSR_MASK_RSI) { gcPrintf("ESI "); count++; }
+                if (b & CSR_MASK_RDI) { gcPrintf("EDI "); count++; }
+                if (b & CSR_MASK_RBP) { gcPrintf("EBP "); count++; }
+#else
+#error unknown architecture
+#endif
+                gcPrintf("\n");
+            }
+            break;
+
+        case 0x40:
+            {
+                // case 3 -- "register"
+                const char* regName = "???";
+                const char* interior = (b & 0x10) ? "+" : "";
+                const char* pinned   = (b & 0x08) ? "!" : "";
+
+                switch (b & 0x7)
+                {
+#ifdef TARGET_ARM
+                case CSR_NUM_R4: regName = "R4"; break;
+                case CSR_NUM_R5: regName = "R5"; break;
+                case CSR_NUM_R6: regName = "R6"; break;
+                case CSR_NUM_R7: regName = "R7"; break;
+                case CSR_NUM_R8: regName = "R8"; break;
+                case CSR_NUM_R9: regName = "R9"; break;
+                case CSR_NUM_R10: regName = "R10"; break;
+                case CSR_NUM_R11: regName = "R11"; break;
+#elif defined(TARGET_ARM64)
+                case CSR_NUM_X19: regName = "X19"; break;
+                case CSR_NUM_X20: regName = "X20"; break;
+                case CSR_NUM_X21: regName = "X21"; break;
+                case CSR_NUM_X22: regName = "X22"; break;
+                case CSR_NUM_X23: regName = "X23"; break;
+                case CSR_NUM_X24: regName = "X24"; break;
+                case CSR_NUM_X25: regName = "X25"; break;
+                case 0:
+                    switch (*pCursor++)
+                    {
+                    case CSR_NUM_X26: regName = "X26"; break;
+                    case CSR_NUM_X27: regName = "X27"; break;
+                    case CSR_NUM_X28: regName = "X28"; break;
+                    case CSR_NUM_FP : regName = "FP" ; break;
+                    }
+                    break;
+#elif defined(TARGET_AMD64)
+                case CSR_NUM_RBX: regName = "RBX"; break;
+                case CSR_NUM_RSI: regName = "RSI"; break;
+                case CSR_NUM_RDI: regName = "RDI"; break;
+                case CSR_NUM_RBP: regName = "RBP"; break;
+                case CSR_NUM_R12: regName = "R12"; break;
+                case CSR_NUM_R13: regName = "R13"; break;
+                case CSR_NUM_R14: regName = "R14"; break;
+                case CSR_NUM_R15: regName = "R15"; break;
+#elif defined(TARGET_X86)
+                case CSR_NUM_RBX: regName = "EBX"; break;
+                case CSR_NUM_RSI: regName = "ESI"; break;
+                case CSR_NUM_RDI: regName = "EDI"; break;
+                case CSR_NUM_RBP: regName = "EBP"; break;
+#else
+#error unknown architecture
+#endif
+                }
+                gcPrintf("%02x          | 3  %s%s%s \n", b, regName, interior, pinned);
+                count++; 
+            }
+            break;
+
+        case 0x80:
+            {
+                if (b & 0x10)
+                {
+                    // case 4 -- "local slot set" or "common var tail"
+                    if ((b & 0x0f) != 0)
+                    {
+                        gcPrintf("%02x          | 4  ", b);
+                        bool isFirst = true;
+
+                        int mask = 0x01;
+                        int slotNum = 0;
+                        while (mask <= 0x08)
+                        {
+                            if (b & mask)
+                            {
+                                if (!isFirst)
+                                {
+                                    if (!first)
+                                        gcPrintf("      ");
+                                    gcPrintf("            |    ");
+                                }
+
+                                PrintLocalSlot(slotNum, pHeader);
+
+                                isFirst = false;
+                                count++; 
+                            }
+                            mask <<= 1;
+                            slotNum++;
+                        }
+                    }
+                    else
+                    {
+                        unsigned commonVarInx = 0;
+                        if ((b & 0x20) == 0)
+                            commonVarInx = VarInt::ReadUnsigned(pCursor);
+
+                        gcPrintf("%02x          | 8  set #%04u\n", b, commonVarInx);
+                    }
+                }
+                else
+                {
+                    // case 5 -- "local slot"
+                    int slotNum = (int)(b & 0xF) + 4;
+                    gcPrintf("%02x          | 5  ", b);
+                    PrintLocalSlot(slotNum, pHeader);
+
+                    count++;
+                }
+            }
+            break;
+        case 0xC0:
+            {
+                if ((b & 0xC7) == 0xC2)
+                {
+                    // case 7 - live scratch regs
+                    gcPrintf("%02x          | 7  ", b);
+
+                    UInt32 regs, byrefRegs = 0, pinnedRegs = 0;
+                    pCursor += ReadRegisterMaskBy7Bit(pCursor, &regs);
+                    if (b & 0x10)
+                        pCursor += ReadRegisterMaskBy7Bit(pCursor, &byrefRegs);
+                    if (b & 0x08)
+                        pCursor += ReadRegisterMaskBy7Bit(pCursor, &pinnedRegs);
+
+                    for (UInt32 reg = 0; ; reg++)
+                    {
+                        UInt32 regMask = (1 << reg);
+                        if (regMask > regs)
+                            break;
+
+                        if (regs & regMask)
+                        {
+                            char* pinned = (pinnedRegs & regMask) ? "!" : "";
+                            char* interior = (byrefRegs  & regMask) ? "+" : "";
+                            char* regStr = "???";
+
+                            switch (reg)
+                            {
+#if defined(TARGET_ARM)
+                            case SR_NUM_R0:   regStr = "R0";   break;
+                            case SR_NUM_R1:   regStr = "R1";   break;
+                            case SR_NUM_R2:   regStr = "R2";   break;
+                            case SR_NUM_R3:   regStr = "R3";   break;
+                            case SR_NUM_R12:  regStr = "R12";  break;
+                            case SR_NUM_LR:   regStr = "LR";   break;
+#elif defined(TARGET_ARM64)
+                            case SR_NUM_X0:   regStr = "X0";   break;
+                            case SR_NUM_X1:   regStr = "X1";   break;
+                            case SR_NUM_X2:   regStr = "X2";   break;
+                            case SR_NUM_X3:   regStr = "X3";   break;
+                            case SR_NUM_X4:   regStr = "X4";   break;
+                            case SR_NUM_X5:   regStr = "X5";   break;
+                            case SR_NUM_X6:   regStr = "X6";   break;
+                            case SR_NUM_X7:   regStr = "X7";   break;
+                            case SR_NUM_X8:   regStr = "X8";   break;
+                            case SR_NUM_X9:   regStr = "X9";   break;
+                            case SR_NUM_X10:  regStr = "X10";  break;
+                            case SR_NUM_X11:  regStr = "X11";  break;
+                            case SR_NUM_X12:  regStr = "X12";  break;
+                            case SR_NUM_X13:  regStr = "X13";  break;
+                            case SR_NUM_X14:  regStr = "X14";  break;
+                            case SR_NUM_X15:  regStr = "X15";  break;
+                            case SR_NUM_XIP0: regStr = "XIP0"; break;
+                            case SR_NUM_XIP1: regStr = "XIP1"; break;
+                            case SR_NUM_LR:   regStr = "LR";   break;
+#elif defined(TARGET_AMD64)
+                            case SR_NUM_RAX:  regStr = "RAX";  break;
+                            case SR_NUM_RCX:  regStr = "RCX";  break;
+                            case SR_NUM_RDX:  regStr = "RDX";  break;
+                            case SR_NUM_R8:   regStr = "R8";   break;
+                            case SR_NUM_R9:   regStr = "R9";   break;
+                            case SR_NUM_R10:  regStr = "R10";  break;
+                            case SR_NUM_R11:  regStr = "R11";  break;
+#elif defined(TARGET_X86)
+                            case SR_NUM_RAX:  regStr = "EAX";  break;
+                            case SR_NUM_RCX:  regStr = "ECX";  break;
+                            case SR_NUM_RDX:  regStr = "EDX";  break;
+#else
+#error unknown architecture
+#endif
+                            }
+                            gcPrintf("%s%s%s ", regStr, interior, pinned);
+                            count++;
+                        }
+                    }
+                }
+                else
+                {
+                    // case 6 - stack slot / stack slot set
+                    gcPrintf("%02x ", b);
+                    unsigned mask = 0;
+                    PTR_UInt8 pInts = pCursor;
+                    unsigned offset = VarInt::ReadUnsigned(pCursor);
+                    const char* interior = (b & 0x10) ? "+" : "";
+                    const char* pinned   = (b & 0x08) ? "!" : "";
+                    const char* baseReg  = (b & 0x04) ? GetFramePointerRegister() : GetStackPointerRegister();
+                    const char* sign     = (b & 0x02) ? "-" : "+";
+                    if (b & 0x01)
+                    {
+                        mask = VarInt::ReadUnsigned(pCursor);
+                    }
+
+                    int c = 1;
+                    while (pInts != pCursor)
+                    {
+                        gcPrintf("%02x ", *pInts++);
+                        c++;
+                    }
+
+                    for (; c < 4; c++)
+                    {
+                        gcPrintf("   ");
+                    }
+
+                    gcPrintf("| 6  [%s%s%02X]%s%s\n", baseReg, sign, offset, interior, pinned);
+                    count++; 
+
+                    while (mask > 0)
+                    {
+                        offset += POINTER_SIZE;
+                        if (mask & 1)
+                        {
+                            if (!first)
+                                gcPrintf("      ");
+
+                            gcPrintf("            |    [%s%s%02X]%s%s\n", baseReg, sign, offset, interior, pinned);
+                            count++; 
+                        }
+                        mask >>= 1;
+                    }
+                }
+            }
+            break;
+        }
+    }
+    while (!last);
+
+    //gcPrintf("\n");
+}
+
+size_t   FASTCALL   GCDump::DumpGCTable (PTR_UInt8              gcInfo,
+                                         Tables *               pTables,
+                                         const GCInfoHeader&    header)
+{
+    PTR_UInt8 pCursor = gcInfo;
+
+    if (header.HasCommonVars())
+    {
+        UInt32 commonVarCount = VarInt::ReadUnsigned(pCursor);
+        for (UInt32 i = 0; i < commonVarCount; i++)
+        {
+            VarInt::SkipUnsigned(pCursor);
+        }
+    }
+
+    //
+    // Decode the method GC info 
+    //
+    // 0ddddccc -- SMALL ENCODING
+    // 
+    //              -- dddd is an index into the delta shortcut table
+    //              -- ccc is an offset into the callsite strings blob
+    //
+    // 1ddddddd { info offset } -- BIG ENCODING
+    //
+    //              -- ddddddd is a 7-bit delta
+    //              -- { info offset } is a variable-length unsigned encoding of the offset into the callsite
+    //                 strings blob for this callsite.
+    //
+    // 10000000 { delta } -- FORWARDER
+    //
+    //              -- { delta } is a variable-length unsigned encoding of the offset to the next callsite
+    //
+    // 11111111 -- STRING TERMINATOR
+    //
+
+    UInt32 curOffset = 0;
+
+    for (;;)
+    {
+        UInt8 b = *pCursor++;
+        unsigned infoOffset;
+
+        if (b & 0x80)
+        {
+            UInt8 lowBits = (b & 0x7F);
+            // FORWARDER
+            if (lowBits == 0)
+            {
+                curOffset += VarInt::ReadUnsigned(pCursor);
+                continue;
+            }
+            else 
+            if (lowBits == 0x7F) // STRING TERMINATOR
+                break;
+
+            // BIG ENCODING
+            curOffset += lowBits;
+            infoOffset = VarInt::ReadUnsigned(pCursor);
+        }
+        else
+        {
+            // SMALL ENCODING
+            infoOffset = (b & 0x7);
+            curOffset += pTables->pbDeltaShortcutTable[b >> 3];
+        }
+
+        DumpCallsiteString(curOffset, pTables->pbCallsiteInfoBlob + infoOffset, &header);
+    }
+
+    gcPrintf("-------\n");
+
+    return 0;
+}
+
+#endif // _DEBUG || DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/gcdump.h b/src/coreclr/src/nativeaot/Runtime/gcdump.h
new file mode 100644
index 0000000000000..34617ae2fa1b6
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/gcdump.h
@@ -0,0 +1,77 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+/*****************************************************************************
+ *                                  GCDump.h
+ *
+ * Defines functions to display the GCInfo as defined by the GC-encoding 
+ * spec. The GC information may be either dynamically created by a 
+ * Just-In-Time compiler conforming to the standard code-manager spec,
+ * or may be persisted by a managed native code compiler conforming
+ * to the standard code-manager spec.
+ */
+
+/*****************************************************************************/
+#ifndef __GCDUMP_H__
+#define __GCDUMP_H__
+/*****************************************************************************/
+
+struct GCInfoHeader;
+
+#ifndef FASTCALL
+#define FASTCALL __fastcall
+#endif
+
+
+class GCDump
+{
+public:
+
+    struct Tables
+    {
+        PTR_UInt8 pbDeltaShortcutTable;
+        PTR_UInt8 pbUnwindInfoBlob;
+        PTR_UInt8 pbCallsiteInfoBlob;
+    };
+
+
+    GCDump                     ();
+
+    /*-------------------------------------------------------------------------
+     * Dumps the GCInfoHeader to 'stdout'
+     * gcInfo           : Start of the GC info block
+     * Return value     : Size in bytes of the header encoding
+     */
+
+    size_t FASTCALL DumpInfoHeader(PTR_UInt8      gcInfo,
+                                   Tables *       pTables,
+                                   GCInfoHeader * header         /* OUT */
+                                   );
+
+    /*-------------------------------------------------------------------------
+     * Dumps the GC tables to 'stdout'
+     * gcInfo           : Ptr to the start of the table part of the GC info.
+     *                      This immediately follows the GCinfo header
+     * Return value     : Size in bytes of the GC table encodings
+     */
+
+    size_t FASTCALL DumpGCTable(PTR_UInt8           gcInfo,
+                                Tables *            pTables,
+                                const GCInfoHeader& header
+                                );
+
+
+    typedef void (*printfFtn)(const char* fmt, ...);
+    printfFtn gcPrintf;
+
+
+
+    //-------------------------------------------------------------------------
+protected:
+
+    void PrintLocalSlot(UInt32 slotNum, GCInfoHeader const * pHeader);
+    void DumpCallsiteString(UInt32 callsiteOffset, PTR_UInt8 pbCallsiteString, GCInfoHeader const * pHeader);
+};
+
+/*****************************************************************************/
+#endif // __GC_DUMP_H__
+/*****************************************************************************/
diff --git a/src/coreclr/src/nativeaot/Runtime/gcenv.h b/src/coreclr/src/nativeaot/Runtime/gcenv.h
new file mode 100644
index 0000000000000..0f00ed4b9984e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/gcenv.h
@@ -0,0 +1,199 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#ifndef __GCENV_H__
+#define __GCENV_H__
+
+#define FEATURE_PREMORTEM_FINALIZATION
+
+#ifdef _MSC_VER
+#pragma warning( disable: 4189 )  // 'hp': local variable is initialized but not referenced -- common in GC
+#pragma warning( disable: 4127 )  // conditional expression is constant -- common in GC
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+#include <cstddef>
+#include <string.h>
+
+#include "sal.h"
+#include "gcenv.structs.h"
+#include "gcenv.interlocked.h"
+#include "gcenv.base.h"
+#include "gcenv.os.h"
+
+#include "Crst.h"
+#include "event.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "TargetPtrs.h"
+#include "eetype.h"
+#include "ObjectLayout.h"
+#include "rheventtrace.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "gcrhinterface.h"
+#include "gcenv.interlocked.inl"
+
+#include "slist.h"
+#include "RWLock.h"
+#include "shash.h"
+#include "TypeManager.h"
+#include "RuntimeInstance.h"
+#include "eetype.inl"
+#include "volatile.h"
+
+#include "gcenv.inl"
+
+#include "stressLog.h"
+#ifdef FEATURE_ETW
+
+    #ifndef _INC_WINDOWS
+        typedef void* LPVOID;
+        typedef uint32_t UINT;
+        typedef void* PVOID;
+        typedef uint64_t ULONGLONG;
+        typedef uint32_t ULONG;
+        typedef int64_t LONGLONG;
+        typedef uint8_t BYTE;
+        typedef uint16_t UINT16;
+    #endif // _INC_WINDOWS
+
+    #include "etwevents.h"
+    #include "eventtrace.h"
+
+#else // FEATURE_ETW
+
+    #include "etmdummy.h"
+    #define ETW_EVENT_ENABLED(e,f) false
+
+#endif // FEATURE_ETW
+
+#define MAX_LONGPATH 1024
+#define LOG(x)
+
+#ifndef YieldProcessor
+#define YieldProcessor PalYieldProcessor
+#endif
+
+// Adapter for GC's view of Array
+class ArrayBase : Array
+{
+public:
+    DWORD GetNumComponents()
+    {
+        return m_Length;
+    }
+
+    static size_t GetOffsetOfNumComponents()
+    {
+        return offsetof(ArrayBase, m_Length);
+    }
+};
+
+//
+// -----------------------------------------------------------------------------------------------------------
+//
+// Bridge GC/HandleTable's version of MethodTable to Redhawk's EEType. Neither component tries to access any
+// fields of MethodTable directly so this is mostly just a case of providing all the CLR-style accessors they
+// need implemented on top of EEType functionality (we can simply recast the 'this' pointer into an EEType
+// pointer).
+//
+// ****** NOTE: Do NOT attempt to add fields or virtual methods to this class! The pointer passed in 'this'
+// ****** really does point to an EEType (there's no such thing as a MethodTable structure in RH).
+//
+class MethodTable
+{
+public:
+    UInt32 GetBaseSize() { return ((EEType*)this)->get_BaseSize(); }
+    UInt16 GetComponentSize() { return ((EEType*)this)->get_ComponentSize(); }
+    UInt16 RawGetComponentSize() { return ((EEType*)this)->get_ComponentSize(); }
+    UInt32 ContainsPointers() { return ((EEType*)this)->HasReferenceFields(); }
+    UInt32 ContainsPointersOrCollectible() { return ((EEType*)this)->HasReferenceFields(); }
+    UInt32_BOOL HasComponentSize() const { return TRUE; }
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+    UInt32_BOOL HasFinalizer() { return ((EEType*)this)->HasFinalizer(); }
+    UInt32_BOOL HasCriticalFinalizer() { return FALSE; }
+#endif // FEATURE_PREMORTEM_FINALIZATION
+#ifdef FEATURE_STRUCTALIGN
+#ifdef FEATURE_BARTOK
+    UInt32 GetRequiredAlignment() const { return ((EEType*)this)->get_BaseAlignment(); }
+#else // FEATURE_BARTOK
+    UInt32 GetRequiredAlignment() const { return sizeof(void*); }
+#endif // FEATURE_BARTOK
+#endif // FEATURE_STRUCTALIGN
+    bool RequiresAlign8() { return ((EEType*)this)->RequiresAlign8(); }
+    bool IsValueType() { return ((EEType*)this)->get_IsValueType(); }
+    UInt32_BOOL SanityCheck() { return ((EEType*)this)->Validate(); }
+};
+
+class EEConfig
+{
+    UInt8 m_gcStressMode;
+
+public:
+    enum HeapVerifyFlags {
+        HEAPVERIFY_NONE             = 0,
+        HEAPVERIFY_GC               = 1,   // Verify the heap at beginning and end of GC
+        HEAPVERIFY_BARRIERCHECK     = 2,   // Verify the brick table
+        HEAPVERIFY_SYNCBLK          = 4,   // Verify sync block scanning
+
+        // the following options can be used to mitigate some of the overhead introduced
+        // by heap verification.  some options might cause heap verifiction to be less
+        // effective depending on the scenario.
+
+        HEAPVERIFY_NO_RANGE_CHECKS  = 0x10,   // Excludes checking if an OBJECTREF is within the bounds of the managed heap
+        HEAPVERIFY_NO_MEM_FILL      = 0x20,   // Excludes filling unused segment portions with fill pattern
+        HEAPVERIFY_POST_GC_ONLY     = 0x40,   // Performs heap verification post-GCs only (instead of before and after each GC)
+        HEAPVERIFY_DEEP_ON_COMPACT  = 0x80    // Performs deep object verfication only on compacting GCs.
+    };
+
+    enum  GCStressFlags {
+        GCSTRESS_NONE               = 0,
+        GCSTRESS_ALLOC              = 1,    // GC on all allocs and 'easy' places
+        GCSTRESS_TRANSITION         = 2,    // GC on transitions to preemtive GC
+        GCSTRESS_INSTR_JIT          = 4,    // GC on every allowable JITed instr
+        GCSTRESS_INSTR_NGEN         = 8,    // GC on every allowable NGEN instr
+        GCSTRESS_UNIQUE             = 16,   // GC only on a unique stack trace
+    };
+
+    // This is treated like a constructor--it is not allowed to fail.  We have it like this because we don't 
+    // have a CRT to run a static constructor for us.  For now, at least, we don't want to do any heavy-weight
+    // snooping of the environment to control any of these settings, so don't add any code like that here.
+    void Construct()
+    {
+        m_gcStressMode = GCSTRESS_NONE;
+    }
+
+    GCStressFlags GetGCStressLevel()        const { return (GCStressFlags) m_gcStressMode; }
+    void    SetGCStressLevel(int val)             { m_gcStressMode = (UInt8) val;}
+
+    bool    GetGCAllowVeryLargeObjects ()   const { return true; }
+
+    // We need conservative GC enabled for some edge cases around ICastable support. This doesn't have much
+    // impact, it just makes the GC slightly more flexible in dealing with interior references (e.g. we can
+    // conservatively report an interior reference inside a GC free object or in the non-valid tail of the
+    // heap).
+    bool    GetGCConservative()             const { return true; }
+};
+extern EEConfig* g_pConfig;
+
+EXTERN_C UInt32 _tls_index;
+inline UInt16 GetClrInstanceId()
+{
+    return (UInt16)_tls_index;
+}
+
+class IGCHeap;
+typedef DPTR(IGCHeap) PTR_IGCHeap;
+typedef DPTR(uint32_t) PTR_uint32_t;
+
+enum CLRDataEnumMemoryFlags : int;
+
+/* _TRUNCATE */
+#if !defined (_TRUNCATE)
+#define _TRUNCATE ((size_t)-1)
+#endif  /* !defined (_TRUNCATE) */
+
+#endif // __GCENV_H__
diff --git a/src/coreclr/src/nativeaot/Runtime/gchandleutilities.h b/src/coreclr/src/nativeaot/Runtime/gchandleutilities.h
new file mode 100644
index 0000000000000..5b994601888e2
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/gchandleutilities.h
@@ -0,0 +1,37 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef _GCHANDLEUTILITIES_H_
+#define _GCHANDLEUTILITIES_H_
+
+#include "gcinterface.h"
+
+extern "C" IGCHandleManager* g_pGCHandleManager;
+
+class GCHandleUtilities
+{
+public:
+    // Retrieves the GC handle table.
+    static IGCHandleManager* GetGCHandleManager() 
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        assert(g_pGCHandleManager != nullptr);
+        return g_pGCHandleManager;
+    }
+
+private:
+    // This class should never be instantiated.
+    GCHandleUtilities() = delete;
+};
+
+// Given a handle, returns an OBJECTREF for the object it refers to.
+inline OBJECTREF ObjectFromHandle(OBJECTHANDLE handle)
+{
+    _ASSERTE(handle);
+
+    // Wrap the raw OBJECTREF and return it
+    return UNCHECKED_OBJECTREF_TO_OBJECTREF(*PTR_UNCHECKED_OBJECTREF(handle));
+}
+
+#endif // _GCHANDLEUTILITIES_H_
diff --git a/src/coreclr/src/nativeaot/Runtime/gcheaputilities.cpp b/src/coreclr/src/nativeaot/Runtime/gcheaputilities.cpp
new file mode 100644
index 0000000000000..9e924b2d3d2d7
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/gcheaputilities.cpp
@@ -0,0 +1,79 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "common.h"
+#include "gcenv.h"
+#include "gcheaputilities.h"
+#include "gchandleutilities.h"
+
+#include "gceventstatus.h"
+
+// This is the global GC heap, maintained by the VM.
+GPTR_IMPL(IGCHeap, g_pGCHeap);
+
+// These globals are variables used within the GC and maintained
+// by the EE for use in write barriers. It is the responsibility
+// of the GC to communicate updates to these globals to the EE through
+// GCToEEInterface::StompWriteBarrier.
+GPTR_IMPL_INIT(uint32_t, g_card_table,      nullptr);
+GPTR_IMPL_INIT(uint8_t,  g_lowest_address,  nullptr);
+GPTR_IMPL_INIT(uint8_t,  g_highest_address, nullptr);
+GVAL_IMPL_INIT(GCHeapType, g_heap_type,     GC_HEAP_INVALID);
+uint8_t* g_ephemeral_low  = (uint8_t*)1;
+uint8_t* g_ephemeral_high = (uint8_t*)~0;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+uint32_t* g_card_bundle_table = nullptr;
+#endif
+
+IGCHandleManager* g_pGCHandleManager = nullptr;
+
+GcDacVars g_gc_dac_vars;
+GPTR_IMPL(GcDacVars, g_gcDacGlobals);
+
+// GC entrypoints for the the linked-in GC. These symbols are invoked
+// directly if we are not using a standalone GC.
+extern "C" HRESULT GC_Initialize(
+    /* In  */ IGCToCLR* clrToGC,
+    /* Out */ IGCHeap** gcHeap,
+    /* Out */ IGCHandleManager** gcHandleManager,
+    /* Out */ GcDacVars* gcDacVars
+);
+
+#ifndef DACCESS_COMPILE
+
+// Initializes a non-standalone GC. The protocol for initializing a non-standalone GC
+// is similar to loading a standalone one, except that the GC_VersionInfo and
+// GC_Initialize symbols are linked to directory and thus don't need to be loaded.
+//
+HRESULT GCHeapUtilities::InitializeDefaultGC()
+{
+    // we should only call this once on startup. Attempting to load a GC
+    // twice is an error.
+    assert(g_pGCHeap == nullptr);
+
+    IGCHeap* heap;
+    IGCHandleManager* manager;
+    HRESULT initResult = GC_Initialize(nullptr, &heap, &manager, &g_gc_dac_vars);
+    if (initResult == S_OK)
+    {
+        g_pGCHeap = heap;
+        g_pGCHandleManager = manager;
+        g_gcDacGlobals = &g_gc_dac_vars;
+        LOG((LF_GC, LL_INFO100, "GC load successful\n"));
+    }
+    else
+    {
+        LOG((LF_GC, LL_FATALERROR, "GC initialization failed with HR = 0x%X\n", initResult));
+    }
+
+    return initResult;
+}
+
+void GCHeapUtilities::RecordEventStateChange(bool isPublicProvider, GCEventKeyword keywords, GCEventLevel level)
+{
+    // CoreRT does not support standalone GC. Call GCEventStatus directly to keep things simple.
+    GCEventStatus::Set(isPublicProvider ? GCEventProvider_Default : GCEventProvider_Private, keywords, level);
+}
+
+#endif // DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/gcheaputilities.h b/src/coreclr/src/nativeaot/Runtime/gcheaputilities.h
new file mode 100644
index 0000000000000..75e0a14a79619
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/gcheaputilities.h
@@ -0,0 +1,99 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef _GCHEAPUTILITIES_H_
+#define _GCHEAPUTILITIES_H_
+
+#include "gcinterface.h"
+#include "daccess.h"
+
+// The singular heap instance.
+GPTR_DECL(IGCHeap, g_pGCHeap);
+
+#ifndef DACCESS_COMPILE
+extern "C" {
+#endif // !DACCESS_COMPILE
+GPTR_DECL(uint8_t,g_lowest_address);
+GPTR_DECL(uint8_t,g_highest_address);
+GPTR_DECL(uint32_t,g_card_table);
+GVAL_DECL(GCHeapType, g_heap_type);
+#ifndef DACCESS_COMPILE
+}
+#endif // !DACCESS_COMPILE
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+extern "C" uint32_t* g_card_bundle_table;
+#endif // FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+
+extern "C" uint8_t* g_ephemeral_low;
+extern "C" uint8_t* g_ephemeral_high;
+
+// g_gc_dac_vars is a structure of pointers to GC globals that the
+// DAC uses. It is not exposed directly to the DAC.
+extern GcDacVars g_gc_dac_vars;
+
+// Instead of exposing g_gc_dac_vars to the DAC, a pointer to it
+// is exposed here (g_gcDacGlobals). The reason for this is to avoid
+// a problem in which a debugger attaches to a program while the program
+// is in the middle of initializing the GC DAC vars - if the "publishing"
+// of DAC vars isn't atomic, the debugger could see a partially initialized
+// GcDacVars structure.
+//
+// Instead, the debuggee "publishes" GcDacVars by assigning a pointer to g_gc_dac_vars
+// to this global, and the DAC will read this global.
+typedef DPTR(GcDacVars) PTR_GcDacVars;
+GPTR_DECL(GcDacVars, g_gcDacGlobals);
+
+// GCHeapUtilities provides a number of static methods
+// that operate on the global heap instance. It can't be
+// instantiated.
+class GCHeapUtilities {
+public:
+    // Retrieves the GC heap.
+    inline static IGCHeap* GetGCHeap() 
+    {
+        assert(g_pGCHeap != nullptr);
+        return g_pGCHeap;
+    }
+
+    // Returns true if the heap has been initialized, false otherwise.
+    inline static bool IsGCHeapInitialized()
+    {
+        return g_pGCHeap != nullptr;
+    }
+
+    // Returns true if a the heap is initialized and a garbage collection
+    // is in progress, false otherwise.
+    inline static BOOL IsGCInProgress(BOOL bConsiderGCStart = FALSE)
+    {
+        return GetGCHeap()->IsGCInProgressHelper(bConsiderGCStart);
+    }
+
+    // Returns true if the held GC heap is a Server GC heap, false otherwise.
+    inline static bool IsServerHeap()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+#ifdef FEATURE_SVR_GC
+        _ASSERTE(g_heap_type != GC_HEAP_INVALID);
+        return (g_heap_type == GC_HEAP_SVR);
+#else
+        return false;
+#endif // FEATURE_SVR_GC
+    }
+
+#ifndef DACCESS_COMPILE
+    // Initializes a non-standalone GC.
+    static HRESULT InitializeDefaultGC();
+
+    // Records a change in eventing state. This ultimately will inform the GC that it needs to be aware
+    // of new events being enabled.
+    static void RecordEventStateChange(bool isPublicProvider, GCEventKeyword keywords, GCEventLevel level);
+#endif // DACCESS_COMPILE
+
+private:
+    // This class should never be instantiated.
+    GCHeapUtilities() = delete;
+};
+
+#endif // _GCHEAPUTILITIES_H_
diff --git a/src/coreclr/src/nativeaot/Runtime/gcrhenv.cpp b/src/coreclr/src/nativeaot/Runtime/gcrhenv.cpp
new file mode 100644
index 0000000000000..830f2a1613b41
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/gcrhenv.cpp
@@ -0,0 +1,1519 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This module provides data storage and implementations needed by gcrhenv.h to help provide an isolated build
+// and runtime environment in which GC and HandleTable code can exist with minimal modifications from the CLR
+// mainline. See gcrhenv.h for a more detailed explanation of how this all fits together.
+//
+
+#include "common.h"
+
+#include "gcenv.h"
+#include "gcheaputilities.h"
+#include "gchandleutilities.h"
+#include "profheapwalkhelper.h"
+
+#include "gcenv.ee.h"
+
+#include "RestrictedCallouts.h"
+
+#include "gcrhinterface.h"
+
+#include "slist.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+
+#include "thread.h"
+
+#include "shash.h"
+#include "RWLock.h"
+#include "TypeManager.h"
+#include "RuntimeInstance.h"
+#include "objecthandle.h"
+#include "eetype.inl"
+#include "RhConfig.h"
+
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "thread.inl"
+
+#include "gcdesc.h"
+#include "SyncClean.hpp"
+
+#include "daccess.h"
+
+#include "GCMemoryHelpers.h"
+
+#include "holder.h"
+#include "volatile.h"
+
+#ifdef FEATURE_ETW
+    #ifndef _INC_WINDOWS
+        typedef void* LPVOID;
+        typedef uint32_t UINT;
+        typedef void* PVOID;
+        typedef uint64_t ULONGLONG;
+        typedef uint32_t ULONG;
+        typedef int64_t LONGLONG;
+        typedef uint8_t BYTE;
+        typedef uint16_t UINT16;
+    #endif // _INC_WINDOWS
+
+    #include "etwevents.h"
+    #include "eventtrace.h"
+#else // FEATURE_ETW
+    #include "etmdummy.h"
+    #define ETW_EVENT_ENABLED(e,f) false
+#endif // FEATURE_ETW
+
+GPTR_IMPL(EEType, g_pFreeObjectEEType);
+
+#include "DebuggerHook.h"
+
+#include "gctoclreventsink.h"
+
+#ifndef DACCESS_COMPILE
+
+bool RhInitializeFinalization();
+bool RhStartFinalizerThread();
+void RhEnableFinalization();
+
+// Simplified EEConfig -- It is just a static member, which statically initializes to the default values and
+// has no dynamic initialization.  Some settings may change at runtime, however.  (Example: gcstress is
+// enabled via a compiled-in call from a given managed module, not through snooping an environment setting.)
+//
+static EEConfig s_sDummyConfig;
+EEConfig* g_pConfig = &s_sDummyConfig;
+
+// A few settings are now backed by the cut-down version of Redhawk configuration values.
+static RhConfig g_sRhConfig;
+RhConfig * g_pRhConfig = &g_sRhConfig;
+
+#ifdef FEATURE_ETW
+//
+// -----------------------------------------------------------------------------------------------------------
+//
+// The automatically generated part of the Redhawk ETW infrastructure (EtwEvents.h) calls the following
+// function whenever the system enables or disables tracing for this provider.
+//
+
+UInt32 EtwCallback(UInt32 IsEnabled, RH_ETW_CONTEXT * pContext)
+{
+    GCHeapUtilities::RecordEventStateChange(!!(pContext->RegistrationHandle == Microsoft_Windows_Redhawk_GC_PublicHandle),
+                                            static_cast<GCEventKeyword>(pContext->MatchAnyKeyword),
+                                            static_cast<GCEventLevel>(pContext->Level));
+
+    if (IsEnabled &&
+        (pContext->RegistrationHandle == Microsoft_Windows_Redhawk_GC_PrivateHandle) &&
+        GCHeapUtilities::IsGCHeapInitialized())
+    {
+        FireEtwGCSettings(GCHeapUtilities::GetGCHeap()->GetValidSegmentSize(FALSE),
+                          GCHeapUtilities::GetGCHeap()->GetValidSegmentSize(TRUE),
+                          GCHeapUtilities::IsServerHeap());
+        GCHeapUtilities::GetGCHeap()->DiagTraceGCSegments();
+    }
+
+    // Special check for the runtime provider's GCHeapCollectKeyword.  Profilers
+    // flick this to force a full GC.
+    if (IsEnabled && 
+        (pContext->RegistrationHandle == Microsoft_Windows_Redhawk_GC_PublicHandle) &&
+        GCHeapUtilities::IsGCHeapInitialized() &&
+        ((pContext->MatchAnyKeyword & CLR_GCHEAPCOLLECT_KEYWORD) != 0))
+    {
+        // Profilers may (optionally) specify extra data in the filter parameter
+        // to log with the GCStart event.
+        LONGLONG l64ClientSequenceNumber = 0;
+        if ((pContext->FilterData != NULL) &&
+            (pContext->FilterData->Type == 1) &&
+            (pContext->FilterData->Size == sizeof(l64ClientSequenceNumber)))
+        {
+            l64ClientSequenceNumber = *(LONGLONG *) (pContext->FilterData->Ptr);
+        }
+        ETW::GCLog::ForceGC(l64ClientSequenceNumber);
+    }
+
+    return 0;
+}
+#endif // FEATURE_ETW
+
+//
+// -----------------------------------------------------------------------------------------------------------
+//
+// The rest of Redhawk needs to be able to talk to the GC/HandleTable code (to initialize it, allocate
+// objects etc.) without pulling in the entire adaptation layer provided by this file and gcrhenv.h. To this
+// end the rest of Redhawk talks to us via a simple interface described in gcrhinterface.h. We provide the
+// implementation behind those APIs here.
+//
+
+// Perform any runtime-startup initialization needed by the GC, HandleTable or environmental code in gcrhenv.
+// The boolean parameter should be true if a server GC is required and false for workstation. Returns true on
+// success or false if a subsystem failed to initialize.
+
+#ifndef DACCESS_COMPILE
+CrstStatic g_SuspendEELock;
+#ifdef _MSC_VER
+#pragma warning(disable:4815) // zero-sized array in stack object will have no elements
+#endif // _MSC_VER
+EEType g_FreeObjectEEType;
+
+// static 
+bool RedhawkGCInterface::InitializeSubsystems()
+{
+    g_pConfig->Construct();
+
+#ifdef FEATURE_ETW
+    MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled = FALSE;
+    MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.IsEnabled = FALSE;
+
+    // Register the Redhawk event provider with the system.
+    RH_ETW_REGISTER_Microsoft_Windows_Redhawk_GC_Private();
+    RH_ETW_REGISTER_Microsoft_Windows_Redhawk_GC_Public();
+
+    MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.RegistrationHandle = Microsoft_Windows_Redhawk_GC_PrivateHandle;
+    MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context.RegistrationHandle = Microsoft_Windows_Redhawk_GC_PublicHandle;
+#endif // FEATURE_ETW
+
+    if (!InitializeSystemInfo())
+    {
+        return false;
+    }
+
+    // Initialize the special EEType used to mark free list entries in the GC heap.
+    g_FreeObjectEEType.InitializeAsGcFreeType();
+    g_pFreeObjectEEType = &g_FreeObjectEEType;
+
+    if (!g_SuspendEELock.InitNoThrow(CrstSuspendEE))
+        return false;
+
+#ifdef FEATURE_SVR_GC
+    // TODO: This should use the logical CPU count adjusted for process affinity and cgroup limits
+    g_heap_type = (g_pRhConfig->GetUseServerGC() && PalGetProcessCpuCount() > 1) ? GC_HEAP_SVR : GC_HEAP_WKS;
+#else
+    g_heap_type = GC_HEAP_WKS;
+#endif
+
+    HRESULT hr = GCHeapUtilities::InitializeDefaultGC();
+    if (FAILED(hr))
+        return false;
+
+    // Apparently the Windows linker removes global variables if they are never
+    // read from, which is a problem for g_gcDacGlobals since it's expected that
+    // only the DAC will read from it. This forces the linker to include
+    // g_gcDacGlobals.
+    volatile void* _dummy = g_gcDacGlobals;
+    
+    // Initialize the GC subsystem.
+    hr = g_pGCHeap->Initialize();
+    if (FAILED(hr))
+        return false;
+
+    if (!RhInitializeFinalization())
+        return false;
+
+    // Initialize HandleTable.
+    if (!GCHandleUtilities::GetGCHandleManager()->Initialize())
+        return false;
+
+    return true;
+}
+#endif // !DACCESS_COMPILE
+
+// Allocate an object on the GC heap.
+//  pEEType         -  type of the object
+//  uFlags          -  GC type flags (see gc.h GC_ALLOC_*)
+//  cbSize          -  size in bytes of the final object
+//  pTransitionFrame-  transition frame to make stack crawable
+// Returns a pointer to the object allocated or NULL on failure.
+
+COOP_PINVOKE_HELPER(void*, RhpGcAlloc, (EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame))
+{
+    Thread * pThread = ThreadStore::GetCurrentThread();
+
+    pThread->SetCurrentThreadPInvokeTunnelForGcAlloc(pTransitionFrame);
+
+    ASSERT(!pThread->IsDoNotTriggerGcSet());
+
+    size_t max_object_size;
+#ifdef HOST_64BIT
+    if (g_pConfig->GetGCAllowVeryLargeObjects())
+    {
+        max_object_size = (INT64_MAX - 7 - min_obj_size);
+    }
+    else
+#endif // HOST_64BIT
+    {
+        max_object_size = (INT32_MAX - 7 - min_obj_size);
+    }
+
+    if (cbSize >= max_object_size)
+        return NULL;
+
+    const int MaxArrayLength = 0x7FEFFFFF;
+    const int MaxByteArrayLength = 0x7FFFFFC7;
+
+    // Impose limits on maximum array length in each dimension to allow efficient
+    // implementation of advanced range check elimination in future. We have to allow
+    // higher limit for array of bytes (or one byte structs) for backward compatibility.
+    // Keep in sync with Array.MaxArrayLength in BCL.
+    if (cbSize > MaxByteArrayLength /* note: comparing allocation size with element count */)
+    {
+        // Ensure the above if check covers the minimal interesting size
+        static_assert(MaxByteArrayLength < (uint64_t)MaxArrayLength * 2, "");
+
+        if (pEEType->IsArray())
+        {
+            if (pEEType->get_ComponentSize() != 1)
+            {
+                size_t elementCount = (cbSize - pEEType->get_BaseSize()) / pEEType->get_ComponentSize();
+                if (elementCount > MaxArrayLength)
+                    return NULL;
+            }
+            else
+            {
+                size_t elementCount = cbSize - pEEType->get_BaseSize();
+                if (elementCount > MaxByteArrayLength)
+                    return NULL;
+            }
+        }
+    }
+
+    if (cbSize > RH_LARGE_OBJECT_SIZE)
+        uFlags |= GC_ALLOC_LARGE_OBJECT_HEAP;
+
+    // Save the EEType for instrumentation purposes.
+    RedhawkGCInterface::SetLastAllocEEType(pEEType);
+
+    Object * pObject = GCHeapUtilities::GetGCHeap()->Alloc(pThread->GetAllocContext(), cbSize, uFlags);
+
+    // NOTE: we cannot call PublishObject here because the object isn't initialized!
+
+    return pObject;
+}
+
+// returns the object pointer for caller's convenience
+COOP_PINVOKE_HELPER(void*, RhpPublishObject, (void* pObject, UIntNative cbSize))
+{
+    UNREFERENCED_PARAMETER(cbSize);
+    ASSERT(cbSize >= LARGE_OBJECT_SIZE);
+    GCHeapUtilities::GetGCHeap()->PublishObject((uint8_t*)pObject);
+    return pObject;
+}
+
+// static
+void RedhawkGCInterface::InitAllocContext(gc_alloc_context * pAllocContext)
+{
+    // NOTE: This method is currently unused because the thread's alloc_context is initialized via
+    // static initialization of tls_CurrentThread.  If the initial contents of the alloc_context
+    // ever change, then a matching change will need to be made to the tls_CurrentThread static
+    // initializer.
+
+    pAllocContext->init();
+}
+
+// static
+void RedhawkGCInterface::ReleaseAllocContext(gc_alloc_context * pAllocContext)
+{
+    s_DeadThreadsNonAllocBytes += pAllocContext->alloc_limit - pAllocContext->alloc_ptr;
+    GCHeapUtilities::GetGCHeap()->FixAllocContext(pAllocContext, NULL, NULL);
+}
+
+// static 
+void RedhawkGCInterface::WaitForGCCompletion()
+{
+    GCHeapUtilities::GetGCHeap()->WaitUntilGCComplete();
+}
+
+//-------------------------------------------------------------------------------------------------
+// Used only by GC initialization, this initializes the EEType used to mark free entries in the GC heap. It
+// should be an array type with a component size of one (so the GC can easily size it as appropriate) and
+// should be marked as not containing any references. The rest of the fields don't matter: the GC does not
+// query them and the rest of the runtime will never hold a reference to free object.
+
+void EEType::InitializeAsGcFreeType()
+{
+    m_usComponentSize = 1;
+    m_usFlags = ParameterizedEEType;
+    m_uBaseSize = sizeof(Array) + SYNC_BLOCK_SKEW;
+}
+
+#endif // !DACCESS_COMPILE
+
+extern void GcEnumObject(PTR_OBJECTREF pObj, UInt32 flags, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc);
+extern void GcEnumObjectsConservatively(PTR_OBJECTREF pLowerBound, PTR_OBJECTREF pUpperBound, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc);
+extern void GcBulkEnumObjects(PTR_OBJECTREF pObjs, DWORD cObjs, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc);
+
+struct EnumGcRefContext : GCEnumContext
+{
+    EnumGcRefCallbackFunc * f;
+    EnumGcRefScanContext * sc;
+};
+
+bool IsOnReadablePortionOfThread(EnumGcRefScanContext * pSc, PTR_VOID pointer)
+{
+    if (!pSc->thread_under_crawl->IsWithinStackBounds(pointer))
+    {
+        return false;
+    }
+    
+    // If the stack_limit is 0, then it wasn't set properly, and the check below will not
+    // operate correctly.
+    ASSERT(pSc->stack_limit != 0);
+
+    // This ensures that the pointer is not in a currently-unused portion of the stack
+    // because the above check is only verifying against the entire stack bounds,
+    // but stack_limit is describing the current bound of the stack
+    if (PTR_TO_TADDR(pointer) < pSc->stack_limit)
+    {
+        return false;
+    }
+    return true;
+}
+
+#ifdef HOST_64BIT
+#define CONSERVATIVE_REGION_MAGIC_NUMBER 0x87DF7A104F09E0A9ULL
+#else
+#define CONSERVATIVE_REGION_MAGIC_NUMBER 0x4F09E0A9
+#endif
+
+// This is a structure that is created by executing runtime code in order to report a conservative 
+// region. In managed code if there is a pinned byref pointer to one of this (with the appropriate
+// magic number set in it, and a hash that matches up) then the region from regionPointerLow to 
+// regionPointerHigh will be reported conservatively. This can only be used to report memory regions
+// on the current stack and the structure must itself be located on the stack.
+struct ConservativelyReportedRegionDesc
+{
+    // If this is really a ConservativelyReportedRegionDesc then the magic value will be
+    // CONSERVATIVE_REGION_MAGIC_NUMBER, and the hash will be the result of CalculateHash
+    // across magic, regionPointerLow, and regionPointerHigh
+    uintptr_t magic;
+    PTR_VOID regionPointerLow;
+    PTR_VOID regionPointerHigh;
+    uintptr_t hash;
+    
+    static uintptr_t CalculateHash(uintptr_t h1, uintptr_t h2, uintptr_t h3)
+    {
+        uintptr_t hash = h1;
+        hash = ((hash << 13) ^ hash) ^ h2;
+        hash = ((hash << 13) ^ hash) ^ h3;
+        return hash;
+    }
+};
+
+typedef DPTR(ConservativelyReportedRegionDesc) PTR_ConservativelyReportedRegionDesc;
+
+bool IsPtrAligned(TADDR value)
+{
+    return (value & (POINTER_SIZE - 1)) == 0;
+}
+
+// Logic to actually conservatively report a ConservativelyReportedRegionDesc
+// This logic is to be used when attempting to promote a pinned, interior pointer.
+// It will attempt to heuristically identify ConservativelyReportedRegionDesc structures
+// and if they exist, it will conservatively report a memory region.
+static void ReportExplicitConservativeReportedRegionIfValid(EnumGcRefContext * pCtx, PTR_PTR_VOID pObject)
+{
+    // If the stack_limit isn't set (which can only happen for frames which make a p/invoke call
+    // there cannot be a ConservativelyReportedRegionDesc
+    if (pCtx->sc->stack_limit == 0)
+        return;
+
+    PTR_ConservativelyReportedRegionDesc conservativeRegionDesc = (PTR_ConservativelyReportedRegionDesc)(*pObject);
+
+    // Ensure that conservativeRegionDesc pointer points at a readable memory region 
+    if (!IsPtrAligned(PTR_TO_TADDR(conservativeRegionDesc)))
+    {
+        return;
+    }
+
+    if (!IsOnReadablePortionOfThread(pCtx->sc, conservativeRegionDesc))
+    {
+        return;
+    }
+    if (!IsOnReadablePortionOfThread(pCtx->sc, conservativeRegionDesc + 1))
+    {
+        return;
+    }
+
+    // Now, check to see if what we're pointing at is actually a ConservativeRegionDesc
+    // First: check the magic number. If that doesn't match, it cannot be one
+    if (conservativeRegionDesc->magic != CONSERVATIVE_REGION_MAGIC_NUMBER)
+    {
+        return;
+    }
+
+    // Second: check to see that the region pointers point at memory which is aligned
+    // such that the pointers could be pointers to object references
+    if (!IsPtrAligned(PTR_TO_TADDR(conservativeRegionDesc->regionPointerLow)))
+    {
+        return;
+    }
+    if (!IsPtrAligned(PTR_TO_TADDR(conservativeRegionDesc->regionPointerHigh)))
+    {
+        return;
+    }
+
+    // Third: check that start is before end.
+    if (conservativeRegionDesc->regionPointerLow >= conservativeRegionDesc->regionPointerHigh)
+    {
+        return;
+    }
+
+#ifndef DACCESS_COMPILE
+    // This fails for cross-bitness dac compiles and isn't really needed in the DAC anyways.
+
+    // Fourth: Compute a hash of the above numbers. Check to see that the hash matches the hash
+    // value stored
+    if (ConservativelyReportedRegionDesc::CalculateHash(CONSERVATIVE_REGION_MAGIC_NUMBER, 
+                                                        (uintptr_t)PTR_TO_TADDR(conservativeRegionDesc->regionPointerLow),
+                                                        (uintptr_t)PTR_TO_TADDR(conservativeRegionDesc->regionPointerHigh)) 
+        != conservativeRegionDesc->hash)
+    {
+        return;
+    }
+#endif // DACCESS_COMPILE
+
+    // Fifth: Check to see that the region pointed at is within the bounds of the thread
+    if (!IsOnReadablePortionOfThread(pCtx->sc, conservativeRegionDesc->regionPointerLow))
+    {
+        return;
+    }
+    if (!IsOnReadablePortionOfThread(pCtx->sc, ((PTR_OBJECTREF)conservativeRegionDesc->regionPointerHigh) - 1))
+    {
+        return;
+    }
+
+    // At this point we're most likely working with a ConservativeRegionDesc. We'll assume
+    // that's true, and perform conservative reporting. (We've done enough checks to ensure that
+    // this conservative reporting won't itself cause an AV, even if our heuristics are wrong
+    // with the second and fifth set of checks)
+    GcEnumObjectsConservatively((PTR_OBJECTREF)conservativeRegionDesc->regionPointerLow, (PTR_OBJECTREF)conservativeRegionDesc->regionPointerHigh, pCtx->f, pCtx->sc);
+}
+
+static void EnumGcRefsCallback(void * hCallback, PTR_PTR_VOID pObject, UInt32 flags)
+{
+    EnumGcRefContext * pCtx = (EnumGcRefContext *)hCallback;
+
+    GcEnumObject((PTR_OBJECTREF)pObject, flags, pCtx->f, pCtx->sc);
+    
+    const UInt32 interiorPinned = GC_CALL_INTERIOR | GC_CALL_PINNED;
+    // If this is an interior pinned pointer, check to see if we're working with a ConservativeRegionDesc
+    // and if so, report a conservative region. NOTE: do this only during promotion as conservative
+    // reporting has no value during other GC phases.
+    if (((flags & interiorPinned) == interiorPinned) && (pCtx->sc->promotion))
+    {
+        ReportExplicitConservativeReportedRegionIfValid(pCtx, pObject);
+    }
+}
+
+// static 
+void RedhawkGCInterface::EnumGcRefs(ICodeManager * pCodeManager,
+                                    MethodInfo * pMethodInfo, 
+                                    PTR_VOID safePointAddress,
+                                    REGDISPLAY * pRegisterSet,
+                                    void * pfnEnumCallback,
+                                    void * pvCallbackData)
+{
+    EnumGcRefContext ctx;
+    ctx.pCallback = EnumGcRefsCallback;
+    ctx.f  = (EnumGcRefCallbackFunc *)pfnEnumCallback;
+    ctx.sc = (EnumGcRefScanContext *)pvCallbackData;
+    ctx.sc->stack_limit = pRegisterSet->GetSP();
+
+    pCodeManager->EnumGcRefs(pMethodInfo, 
+                             safePointAddress,
+                             pRegisterSet,
+                             &ctx);
+}
+
+// static
+void RedhawkGCInterface::EnumGcRefsInRegionConservatively(PTR_RtuObjectRef pLowerBound,
+                                                          PTR_RtuObjectRef pUpperBound,
+                                                          void * pfnEnumCallback,
+                                                          void * pvCallbackData)
+{
+    GcEnumObjectsConservatively((PTR_OBJECTREF)pLowerBound, (PTR_OBJECTREF)pUpperBound, (EnumGcRefCallbackFunc *)pfnEnumCallback, (EnumGcRefScanContext *)pvCallbackData);
+}
+
+// static 
+void RedhawkGCInterface::EnumGcRef(PTR_RtuObjectRef pRef, GCRefKind kind, void * pfnEnumCallback, void * pvCallbackData)
+{
+    ASSERT((GCRK_Object == kind) || (GCRK_Byref == kind));
+
+    DWORD flags = 0;
+
+    if (kind == GCRK_Byref)
+    {
+        flags |= GC_CALL_INTERIOR;
+    }
+
+    GcEnumObject((PTR_OBJECTREF)pRef, flags, (EnumGcRefCallbackFunc *)pfnEnumCallback, (EnumGcRefScanContext *)pvCallbackData);
+}
+
+#ifndef DACCESS_COMPILE
+
+// static
+void RedhawkGCInterface::BulkEnumGcObjRef(PTR_RtuObjectRef pRefs, UInt32 cRefs, void * pfnEnumCallback, void * pvCallbackData)
+{
+    GcBulkEnumObjects((PTR_OBJECTREF)pRefs, cRefs, (EnumGcRefCallbackFunc *)pfnEnumCallback, (EnumGcRefScanContext *)pvCallbackData);
+}
+
+// static 
+GcSegmentHandle RedhawkGCInterface::RegisterFrozenSegment(void * pSection, size_t SizeSection)
+{
+#ifdef FEATURE_BASICFREEZE
+    segment_info seginfo;
+
+    seginfo.pvMem           = pSection;
+    seginfo.ibFirstObject   = sizeof(ObjHeader);
+    seginfo.ibAllocated     = SizeSection;
+    seginfo.ibCommit        = seginfo.ibAllocated;
+    seginfo.ibReserved      = seginfo.ibAllocated;
+
+    return (GcSegmentHandle)GCHeapUtilities::GetGCHeap()->RegisterFrozenSegment(&seginfo);
+#else // FEATURE_BASICFREEZE
+    return NULL;
+#endif // FEATURE_BASICFREEZE    
+}
+
+// static 
+void RedhawkGCInterface::UnregisterFrozenSegment(GcSegmentHandle segment)
+{
+    GCHeapUtilities::GetGCHeap()->UnregisterFrozenSegment((segment_handle)segment);
+}
+
+EXTERN_C UInt32_BOOL g_fGcStressStarted = UInt32_FALSE; // UInt32_BOOL because asm code reads it
+#ifdef FEATURE_GC_STRESS
+// static 
+void RedhawkGCInterface::StressGc()
+{
+    // The GarbageCollect operation below may trash the last win32 error. We save the error here so that it can be
+    // restored after the GC operation;
+    Int32 lastErrorOnEntry = PalGetLastError();
+
+    if (g_fGcStressStarted && !ThreadStore::GetCurrentThread()->IsSuppressGcStressSet() && !ThreadStore::GetCurrentThread()->IsDoNotTriggerGcSet())
+    {
+        GCHeapUtilities::GetGCHeap()->GarbageCollect();
+    }
+
+    // Restore the saved error
+    PalSetLastError(lastErrorOnEntry);
+}
+#endif // FEATURE_GC_STRESS
+
+
+#ifdef FEATURE_GC_STRESS
+COOP_PINVOKE_HELPER(void, RhpInitializeGcStress, ())
+{
+    g_fGcStressStarted = UInt32_TRUE;
+    g_pConfig->SetGCStressLevel(EEConfig::GCSTRESS_INSTR_NGEN);   // this is the closest CLR equivalent to what we do.
+}
+#endif // FEATURE_GC_STRESS
+
+#endif // !DACCESS_COMPILE
+
+//
+// Support for scanning the GC heap, objects and roots.
+//
+
+// Enumerate every reference field in an object, calling back to the specified function with the given context
+// for each such reference found.
+// static
+void RedhawkGCInterface::ScanObject(void *pObject, GcScanObjectFunction pfnScanCallback, void *pContext)
+{
+#if !defined(DACCESS_COMPILE) && defined(FEATURE_EVENT_TRACE)
+    GCHeapUtilities::GetGCHeap()->DiagWalkObject((Object*)pObject, (walk_fn)pfnScanCallback, pContext);
+#else
+    UNREFERENCED_PARAMETER(pObject);
+    UNREFERENCED_PARAMETER(pfnScanCallback);
+    UNREFERENCED_PARAMETER(pContext);
+#endif // DACCESS_COMPILE
+}
+
+// When scanning for object roots we use existing GC APIs used for object promotion and moving. We use an
+// adapter callback to transform the promote function signature used for these methods into something simpler
+// that avoids exposing unnecessary implementation details. The pointer to a ScanContext normally passed to
+// promotion functions is actually a pointer to the structure below which serves to recall the actual function
+// pointer and context for the real context.
+struct ScanRootsContext
+{
+    GcScanRootFunction  m_pfnCallback;
+    void *              m_pContext;
+};
+
+// Callback with a EnumGcRefCallbackFunc signature that forwards the call to a callback with a GcScanFunction signature
+// and its own context.
+void ScanRootsCallbackWrapper(Object** pObject, EnumGcRefScanContext* pContext, DWORD dwFlags)
+{
+    UNREFERENCED_PARAMETER(dwFlags);
+
+    ScanRootsContext * pRealContext = (ScanRootsContext*)pContext;
+
+    (*pRealContext->m_pfnCallback)((void**)&pObject, pRealContext->m_pContext);
+}
+
+// Enumerate all the object roots located on the specified thread's stack. It is only safe to call this from
+// the context of a GC.
+//
+// static
+void RedhawkGCInterface::ScanStackRoots(Thread *pThread, GcScanRootFunction pfnScanCallback, void *pContext)
+{
+#ifndef DACCESS_COMPILE
+    ScanRootsContext sContext;
+    sContext.m_pfnCallback = pfnScanCallback;
+    sContext.m_pContext = pContext;
+
+    pThread->GcScanRoots(reinterpret_cast<void*>(ScanRootsCallbackWrapper), &sContext);
+#else
+    UNREFERENCED_PARAMETER(pThread);
+    UNREFERENCED_PARAMETER(pfnScanCallback);
+    UNREFERENCED_PARAMETER(pContext);
+#endif // !DACCESS_COMPILE
+}
+
+// Enumerate all the object roots located in statics. It is only safe to call this from the context of a GC.
+//
+// static
+void RedhawkGCInterface::ScanStaticRoots(GcScanRootFunction pfnScanCallback, void *pContext)
+{
+#ifndef DACCESS_COMPILE
+    ScanRootsContext sContext;
+    sContext.m_pfnCallback = pfnScanCallback;
+    sContext.m_pContext = pContext;
+
+    GetRuntimeInstance()->EnumAllStaticGCRefs(reinterpret_cast<void*>(ScanRootsCallbackWrapper), &sContext);
+#else
+    UNREFERENCED_PARAMETER(pfnScanCallback);
+    UNREFERENCED_PARAMETER(pContext);
+#endif // !DACCESS_COMPILE
+}
+
+// Enumerate all the object roots located in handle tables. It is only safe to call this from the context of a
+// GC.
+//
+// static
+void RedhawkGCInterface::ScanHandleTableRoots(GcScanRootFunction pfnScanCallback, void *pContext)
+{
+#if !defined(DACCESS_COMPILE) && defined(FEATURE_EVENT_TRACE)
+    ScanRootsContext sContext;
+    sContext.m_pfnCallback = pfnScanCallback;
+    sContext.m_pContext = pContext;
+    Ref_ScanPointers(2, 2, (EnumGcRefScanContext*)&sContext, ScanRootsCallbackWrapper);
+#else
+    UNREFERENCED_PARAMETER(pfnScanCallback);
+    UNREFERENCED_PARAMETER(pContext);
+#endif // !DACCESS_COMPILE
+}
+
+#ifndef DACCESS_COMPILE
+
+UInt32 RedhawkGCInterface::GetGCDescSize(void * pType)
+{
+    MethodTable * pMT = (MethodTable *)pType;
+
+    if (!pMT->ContainsPointersOrCollectible())
+        return 0;
+
+    return (UInt32)CGCDesc::GetCGCDescFromMT(pMT)->GetSize();
+}
+
+COOP_PINVOKE_HELPER(void, RhpCopyObjectContents, (Object* pobjDest, Object* pobjSrc))
+{
+    size_t cbDest = pobjDest->GetSize() - sizeof(ObjHeader);
+    size_t cbSrc = pobjSrc->GetSize() - sizeof(ObjHeader);
+    if (cbSrc != cbDest)
+        return;
+
+    ASSERT(pobjDest->get_EEType()->HasReferenceFields() == pobjSrc->get_EEType()->HasReferenceFields());
+
+    if (pobjDest->get_EEType()->HasReferenceFields())
+    {
+        GCSafeCopyMemoryWithWriteBarrier(pobjDest, pobjSrc, cbDest);
+    }
+    else
+    {
+        memcpy(pobjDest, pobjSrc, cbDest);
+    }
+}
+
+COOP_PINVOKE_HELPER(Boolean, RhCompareObjectContentsAndPadding, (Object* pObj1, Object* pObj2))
+{
+    ASSERT(pObj1->get_EEType()->IsEquivalentTo(pObj2->get_EEType()));
+    EEType * pEEType = pObj1->get_EEType();
+    size_t cbFields = pEEType->get_BaseSize() - (sizeof(ObjHeader) + sizeof(EEType*));
+
+    UInt8 * pbFields1 = (UInt8*)pObj1 + sizeof(EEType*);
+    UInt8 * pbFields2 = (UInt8*)pObj2 + sizeof(EEType*);
+
+    return (memcmp(pbFields1, pbFields2, cbFields) == 0) ? Boolean_true : Boolean_false;
+}
+
+// Thread static representing the last allocation.
+// This is used to log the type information for each slow allocation.
+DECLSPEC_THREAD
+EEType * RedhawkGCInterface::tls_pLastAllocationEEType = NULL;
+
+// Get the last allocation for this thread.
+EEType * RedhawkGCInterface::GetLastAllocEEType()
+{
+    return tls_pLastAllocationEEType;
+}
+
+// Set the last allocation for this thread.
+void RedhawkGCInterface::SetLastAllocEEType(EEType * pEEType)
+{
+    tls_pLastAllocationEEType = pEEType;
+}
+
+uint64_t RedhawkGCInterface::s_DeadThreadsNonAllocBytes = 0;
+
+uint64_t RedhawkGCInterface::GetDeadThreadsNonAllocBytes()
+{
+#ifdef HOST_64BIT
+    return s_DeadThreadsNonAllocBytes;
+#else
+    // As it could be noticed we read 64bit values that may be concurrently updated.
+    // Such reads are not guaranteed to be atomic on 32bit so extra care should be taken.
+    return PalInterlockedCompareExchange64((Int64*)&s_DeadThreadsNonAllocBytes, 0, 0);
+#endif
+}
+
+void RedhawkGCInterface::DestroyTypedHandle(void * handle)
+{
+    GCHandleUtilities::GetGCHandleManager()->DestroyHandleOfUnknownType((OBJECTHANDLE)handle);
+}
+
+void* RedhawkGCInterface::CreateTypedHandle(void* pObject, int type)
+{
+    return (void*)GCHandleUtilities::GetGCHandleManager()->GetGlobalHandleStore()->CreateHandleOfType((Object*)pObject, (HandleType)type);
+}
+
+void GCToEEInterface::SuspendEE(SUSPEND_REASON reason)
+{
+#ifdef FEATURE_EVENT_TRACE
+    ETW::GCLog::ETW_GC_INFO Info;
+    Info.SuspendEE.Reason = reason;
+    Info.SuspendEE.GcCount = (((reason == SUSPEND_FOR_GC) || (reason == SUSPEND_FOR_GC_PREP)) ?
+        (UInt32)GCHeapUtilities::GetGCHeap()->GetGcCount() : (UInt32)-1);
+#endif // FEATURE_EVENT_TRACE
+
+    FireEtwGCSuspendEEBegin_V1(Info.SuspendEE.Reason, Info.SuspendEE.GcCount, GetClrInstanceId());
+
+    g_SuspendEELock.Enter();
+
+    GCHeapUtilities::GetGCHeap()->SetGCInProgress(TRUE);
+
+    GetThreadStore()->SuspendAllThreads(true);
+
+    FireEtwGCSuspendEEEnd_V1(GetClrInstanceId());
+
+#ifdef APP_LOCAL_RUNTIME
+    // now is a good opportunity to retry starting the finalizer thread
+    RhStartFinalizerThread();
+#endif
+}
+
+void GCToEEInterface::RestartEE(bool /*bFinishedGC*/)
+{
+    FireEtwGCRestartEEBegin_V1(GetClrInstanceId());
+
+    SyncClean::CleanUp();
+
+    GetThreadStore()->ResumeAllThreads(true);
+    GCHeapUtilities::GetGCHeap()->SetGCInProgress(FALSE);
+
+    g_SuspendEELock.Leave();
+
+    FireEtwGCRestartEEEnd_V1(GetClrInstanceId());
+}
+
+void GCToEEInterface::GcStartWork(int condemned, int /*max_gen*/)
+{
+    DebuggerHook::OnBeforeGcCollection();
+    
+    // Invoke any registered callouts for the start of the collection.
+    RestrictedCallouts::InvokeGcCallouts(GCRC_StartCollection, condemned);
+}
+
+// EE can perform post stack scanning action, while the user threads are still suspended 
+void GCToEEInterface::AfterGcScanRoots(int condemned, int /*max_gen*/, ScanContext* /*sc*/)
+{
+    // Invoke any registered callouts for the end of the mark phase.
+    RestrictedCallouts::InvokeGcCallouts(GCRC_AfterMarkPhase, condemned);
+}
+
+void GCToEEInterface::GcBeforeBGCSweepWork()
+{
+}
+
+void GCToEEInterface::GcDone(int condemned)
+{
+    // Invoke any registered callouts for the end of the collection.
+    RestrictedCallouts::InvokeGcCallouts(GCRC_EndCollection, condemned);
+}
+
+bool GCToEEInterface::RefCountedHandleCallbacks(Object * pObject)
+{
+    return RestrictedCallouts::InvokeRefCountedHandleCallbacks(pObject);
+}
+
+void GCToEEInterface::SyncBlockCacheWeakPtrScan(HANDLESCANPROC /*scanProc*/, uintptr_t /*lp1*/, uintptr_t /*lp2*/)
+{
+}
+
+void GCToEEInterface::SyncBlockCacheDemote(int /*max_gen*/)
+{
+}
+
+void GCToEEInterface::SyncBlockCachePromotionsGranted(int /*max_gen*/)
+{
+}
+
+uint32_t GCToEEInterface::GetActiveSyncBlockCount()
+{
+    return 0;
+}
+
+gc_alloc_context * GCToEEInterface::GetAllocContext()
+{
+    return ThreadStore::GetCurrentThread()->GetAllocContext();
+}
+#endif // !DACCESS_COMPILE
+
+uint8_t* GCToEEInterface::GetLoaderAllocatorObjectForGC(Object* pObject)
+{
+    return nullptr;
+}
+
+bool GCToEEInterface::IsPreemptiveGCDisabled()
+{
+    return ThreadStore::GetCurrentThread()->IsCurrentThreadInCooperativeMode();
+}
+
+bool GCToEEInterface::EnablePreemptiveGC()
+{
+#ifndef DACCESS_COMPILE
+    Thread* pThread = ThreadStore::GetCurrentThread();
+
+    if (pThread->IsCurrentThreadInCooperativeMode())
+    {
+        pThread->EnablePreemptiveMode();
+        return true;
+    }
+#else
+    UNREFERENCED_PARAMETER(pThread);
+#endif
+    return false;
+}
+
+void GCToEEInterface::DisablePreemptiveGC()
+{
+#ifndef DACCESS_COMPILE
+    ThreadStore::GetCurrentThread()->DisablePreemptiveMode();
+#else
+    UNREFERENCED_PARAMETER(pThread);
+#endif
+}
+
+Thread* GCToEEInterface::GetThread()
+{
+#ifndef DACCESS_COMPILE
+    return ThreadStore::GetCurrentThread();
+#else
+    return NULL;
+#endif
+}
+
+#ifndef DACCESS_COMPILE
+
+#ifdef FEATURE_EVENT_TRACE
+void ProfScanRootsHelper(Object** ppObject, ScanContext* pSC, uint32_t dwFlags)
+{
+    Object* pObj = *ppObject;
+    if (dwFlags& GC_CALL_INTERIOR)
+    {
+        pObj = GCHeapUtilities::GetGCHeap()->GetContainingObject(pObj, true);
+        if (pObj == nullptr)
+            return;
+    }
+    ScanRootsHelper(pObj, ppObject, pSC, dwFlags);
+}
+
+void GcScanRootsForETW(promote_func* fn, int condemned, int max_gen, ScanContext* sc)
+{
+    UNREFERENCED_PARAMETER(condemned);
+    UNREFERENCED_PARAMETER(max_gen);
+
+    FOREACH_THREAD(pThread)
+    {
+        if (pThread->IsGCSpecial())
+            continue;
+
+        if (GCHeapUtilities::GetGCHeap()->IsThreadUsingAllocationContextHeap(pThread->GetAllocContext(), sc->thread_number))
+            continue;
+
+        sc->thread_under_crawl = pThread;
+        sc->dwEtwRootKind = kEtwGCRootKindStack;
+        pThread->GcScanRoots(reinterpret_cast<void*>(fn), sc);
+        sc->dwEtwRootKind = kEtwGCRootKindOther;
+    }
+    END_FOREACH_THREAD
+}
+
+void ScanHandleForETW(Object** pRef, Object* pSec, uint32_t flags, ScanContext* context, bool isDependent)
+{
+    ProfilingScanContext* pSC = (ProfilingScanContext*)context;
+
+    // Notify ETW of the handle
+    if (ETW::GCLog::ShouldWalkHeapRootsForEtw())
+    {
+        ETW::GCLog::RootReference(
+            pRef,
+            *pRef,          // object being rooted
+            pSec,           // pSecondaryNodeForDependentHandle
+            isDependent,
+            pSC,
+            0,              // dwGCFlags,
+            flags);     // ETW handle flags
+    }
+}
+
+// This is called only if we've determined that either:
+//     a) The Profiling API wants to do a walk of the heap, and it has pinned the
+//     profiler in place (so it cannot be detached), and it's thus safe to call into the
+//     profiler, OR
+//     b) ETW infrastructure wants to do a walk of the heap either to log roots,
+//     objects, or both.
+// This can also be called to do a single walk for BOTH a) and b) simultaneously.  Since
+// ETW can ask for roots, but not objects
+void GCProfileWalkHeapWorker(BOOL fShouldWalkHeapRootsForEtw, BOOL fShouldWalkHeapObjectsForEtw)
+{
+    ProfilingScanContext SC(FALSE);
+    unsigned max_generation = GCHeapUtilities::GetGCHeap()->GetMaxGeneration();
+
+    // **** Scan roots:  Only scan roots if profiling API wants them or ETW wants them.
+    if (fShouldWalkHeapRootsForEtw)
+    {
+        GcScanRootsForETW(&ProfScanRootsHelper, max_generation, max_generation, &SC);
+        SC.dwEtwRootKind = kEtwGCRootKindFinalizer;
+        GCHeapUtilities::GetGCHeap()->DiagScanFinalizeQueue(&ProfScanRootsHelper, &SC);
+
+        // Handles are kept independent of wks/svr/concurrent builds
+        SC.dwEtwRootKind = kEtwGCRootKindHandle;
+        GCHeapUtilities::GetGCHeap()->DiagScanHandles(&ScanHandleForETW, max_generation, &SC);
+    }
+
+    // **** Scan dependent handles: only if ETW wants roots
+    if (fShouldWalkHeapRootsForEtw)
+    {
+        // GcScanDependentHandlesForProfiler double-checks
+        // CORProfilerTrackConditionalWeakTableElements() before calling into the profiler
+
+        ProfilingScanContext* pSC = &SC;
+
+        // we'll re-use pHeapId (which was either unused (0) or freed by EndRootReferences2
+        // (-1)), so reset it to NULL
+        _ASSERTE((*((size_t *)(&pSC->pHeapId)) == (size_t)(-1)) ||
+                (*((size_t *)(&pSC->pHeapId)) == (size_t)(0)));
+        pSC->pHeapId = NULL;
+
+        GCHeapUtilities::GetGCHeap()->DiagScanDependentHandles(&ScanHandleForETW, max_generation, &SC);
+    }
+
+    ProfilerWalkHeapContext profilerWalkHeapContext(FALSE, SC.pvEtwContext);
+
+    // **** Walk objects on heap: only if ETW wants them.
+    if (fShouldWalkHeapObjectsForEtw)
+    {
+        GCHeapUtilities::GetGCHeap()->DiagWalkHeap(&HeapWalkHelper, &profilerWalkHeapContext, max_generation, true /* walk the large object heap */);
+    }
+
+    #ifdef FEATURE_EVENT_TRACE
+    // **** Done! Indicate to ETW helpers that the heap walk is done, so any buffers
+    // should be flushed into the ETW stream
+    if (fShouldWalkHeapObjectsForEtw || fShouldWalkHeapRootsForEtw)
+    {
+        ETW::GCLog::EndHeapDump(&profilerWalkHeapContext);
+    }
+#endif // FEATURE_EVENT_TRACE
+}
+#endif // defined(FEATURE_EVENT_TRACE)
+
+void GCProfileWalkHeap()
+{
+
+#ifdef FEATURE_EVENT_TRACE
+    if (ETW::GCLog::ShouldWalkStaticsAndCOMForEtw())
+        ETW::GCLog::WalkStaticsAndCOMForETW();
+
+    BOOL fShouldWalkHeapRootsForEtw = ETW::GCLog::ShouldWalkHeapRootsForEtw();
+    BOOL fShouldWalkHeapObjectsForEtw = ETW::GCLog::ShouldWalkHeapObjectsForEtw();
+#else // !FEATURE_EVENT_TRACE
+    BOOL fShouldWalkHeapRootsForEtw = FALSE;
+    BOOL fShouldWalkHeapObjectsForEtw = FALSE;
+#endif // FEATURE_EVENT_TRACE
+
+#ifdef FEATURE_EVENT_TRACE
+    // we need to walk the heap if one of GC_PROFILING or FEATURE_EVENT_TRACE
+    // is defined, since both of them make use of the walk heap worker.
+    if (fShouldWalkHeapRootsForEtw || fShouldWalkHeapObjectsForEtw)
+    {
+        GCProfileWalkHeapWorker(fShouldWalkHeapRootsForEtw, fShouldWalkHeapObjectsForEtw);
+    }
+#endif // defined(FEATURE_EVENT_TRACE)
+}
+
+
+void GCToEEInterface::DiagGCStart(int gen, bool isInduced)
+{
+    UNREFERENCED_PARAMETER(gen);
+    UNREFERENCED_PARAMETER(isInduced);
+}
+
+void GCToEEInterface::DiagUpdateGenerationBounds()
+{
+}
+
+void GCToEEInterface::DiagWalkFReachableObjects(void* gcContext)
+{
+    UNREFERENCED_PARAMETER(gcContext);
+}
+
+void GCToEEInterface::DiagGCEnd(size_t index, int gen, int reason, bool fConcurrent)
+{
+    UNREFERENCED_PARAMETER(index);
+    UNREFERENCED_PARAMETER(gen);
+    UNREFERENCED_PARAMETER(reason);
+
+    if (!fConcurrent)
+    {
+        GCProfileWalkHeap();
+    }
+}
+
+// Note on last parameter: when calling this for bgc, only ETW
+// should be sending these events so that existing profapi profilers
+// don't get confused.
+void WalkMovedReferences(uint8_t* begin, uint8_t* end, 
+                         ptrdiff_t reloc,
+                         void* context, 
+                         bool fCompacting,
+                         bool fBGC)
+{
+    UNREFERENCED_PARAMETER(begin);
+    UNREFERENCED_PARAMETER(end);
+    UNREFERENCED_PARAMETER(reloc);
+    UNREFERENCED_PARAMETER(context);
+    UNREFERENCED_PARAMETER(fCompacting);
+    UNREFERENCED_PARAMETER(fBGC);
+}
+
+//
+// Diagnostics code
+//
+
+#ifdef FEATURE_EVENT_TRACE
+// Tracks all surviving objects (moved or otherwise).
+inline bool ShouldTrackSurvivorsForProfilerOrEtw()
+{
+    if (ETW::GCLog::ShouldTrackMovementForEtw())
+        return true;
+
+    return false;
+}
+#endif // FEATURE_EVENT_TRACE
+
+void GCToEEInterface::DiagWalkSurvivors(void* gcContext, bool fCompacting)
+{
+#ifdef FEATURE_EVENT_TRACE
+    if (ShouldTrackSurvivorsForProfilerOrEtw())
+    {
+        size_t context = 0;
+        ETW::GCLog::BeginMovedReferences(&context);
+        GCHeapUtilities::GetGCHeap()->DiagWalkSurvivorsWithType(gcContext, &WalkMovedReferences, (void*)context, walk_for_gc);
+        ETW::GCLog::EndMovedReferences(context);
+    }
+#else
+    UNREFERENCED_PARAMETER(gcContext);
+#endif // FEATURE_EVENT_TRACE
+}
+
+void GCToEEInterface::DiagWalkUOHSurvivors(void* gcContext, int gen)
+{
+#ifdef FEATURE_EVENT_TRACE
+    if (ShouldTrackSurvivorsForProfilerOrEtw())
+    {
+        size_t context = 0;
+        ETW::GCLog::BeginMovedReferences(&context);
+        GCHeapUtilities::GetGCHeap()->DiagWalkSurvivorsWithType(gcContext, &WalkMovedReferences, (void*)context, walk_for_uoh, gen);
+        ETW::GCLog::EndMovedReferences(context);
+    }
+#else
+    UNREFERENCED_PARAMETER(gcContext);
+#endif // FEATURE_EVENT_TRACE
+}
+
+void GCToEEInterface::DiagWalkBGCSurvivors(void* gcContext)
+{
+#ifdef FEATURE_EVENT_TRACE
+    if (ShouldTrackSurvivorsForProfilerOrEtw())
+    {
+        size_t context = 0;
+        ETW::GCLog::BeginMovedReferences(&context);
+        GCHeapUtilities::GetGCHeap()->DiagWalkSurvivorsWithType(gcContext, &WalkMovedReferences, (void*)context, walk_for_bgc);
+        ETW::GCLog::EndMovedReferences(context);
+    }
+#else
+    UNREFERENCED_PARAMETER(gcContext);
+#endif // FEATURE_EVENT_TRACE
+}
+
+void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args)
+{
+    // CoreRT doesn't patch the write barrier like CoreCLR does, but it
+    // still needs to record the changes in the GC heap.
+
+    bool is_runtime_suspended = args->is_runtime_suspended;
+
+    switch (args->operation)
+    {
+    case WriteBarrierOp::StompResize:
+        // StompResize requires a new card table, a new lowest address, and
+        // a new highest address
+        assert(args->card_table != nullptr);
+        assert(args->lowest_address != nullptr);
+        assert(args->highest_address != nullptr);
+
+        // We are sensitive to the order of writes here(more comments on this further in the method)
+        // In particular g_card_table must be written before writing the heap bounds.
+        // For platforms with weak memory ordering we will issue fences, for x64/x86 we are ok
+        // as long as compiler does not reorder these writes.
+        // That is unlikely since we have method calls in between.
+        // Just to be robust agains possible refactoring/inlining we will do a compiler-fenced store here.
+        VolatileStore(&g_card_table, args->card_table);
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        assert(args->card_bundle_table != nullptr);
+        g_card_bundle_table = args->card_bundle_table;
+#endif
+
+        // IMPORTANT: managed heap segments may surround unmanaged/stack segments. In such cases adding another managed 
+        //     heap segment may put a stack/unmanaged write inside the new heap range. However the old card table would 
+        //     not cover it. Therefore we must ensure that the write barriers see the new table before seeing the new bounds.
+        //
+        //     On architectures with strong ordering, we only need to prevent compiler reordering.
+        //     Otherwise we put a process-wide fence here (so that we could use an ordinary read in the barrier)
+
+#if defined(HOST_ARM64) || defined(HOST_ARM)
+        if (!is_runtime_suspended)
+        {
+            // If runtime is not suspended, force all threads to see the changed table before seeing updated heap boundaries.
+            // See: http://vstfdevdiv:8080/DevDiv2/DevDiv/_workitems/edit/346765
+            FlushProcessWriteBuffers();
+        }
+#endif
+
+        g_lowest_address = args->lowest_address;
+        g_highest_address = args->highest_address;
+
+#if defined(HOST_ARM64) || defined(HOST_ARM)
+        if (!is_runtime_suspended)
+        {
+            // If runtime is not suspended, force all threads to see the changed state before observing future allocations.
+            FlushProcessWriteBuffers();
+        }
+#endif
+        return;
+    case WriteBarrierOp::StompEphemeral:
+        // StompEphemeral requires a new ephemeral low and a new ephemeral high
+        assert(args->ephemeral_low != nullptr);
+        assert(args->ephemeral_high != nullptr);
+        g_ephemeral_low = args->ephemeral_low;
+        g_ephemeral_high = args->ephemeral_high;
+        return;
+    case WriteBarrierOp::Initialize:
+        // This operation should only be invoked once, upon initialization.
+        assert(g_card_table == nullptr);
+        assert(g_lowest_address == nullptr);
+        assert(g_highest_address == nullptr);
+        assert(args->card_table != nullptr);
+        assert(args->lowest_address != nullptr);
+        assert(args->highest_address != nullptr);
+        assert(args->ephemeral_low != nullptr);
+        assert(args->ephemeral_high != nullptr);
+        assert(args->is_runtime_suspended && "the runtime must be suspended here!");
+
+        g_card_table = args->card_table;
+        
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        assert(g_card_bundle_table == nullptr);
+        g_card_bundle_table = args->card_bundle_table;
+#endif
+
+        g_lowest_address = args->lowest_address;
+        g_highest_address = args->highest_address;
+        g_ephemeral_low = args->ephemeral_low;
+        g_ephemeral_high = args->ephemeral_high;
+        return;
+    case WriteBarrierOp::SwitchToWriteWatch:
+    case WriteBarrierOp::SwitchToNonWriteWatch:
+        assert(!"CoreRT does not have an implementation of non-OS WriteWatch");
+        return;
+    default:
+        assert(!"Unknokwn WriteBarrierOp enum");
+        return;
+    }
+}
+
+void GCToEEInterface::EnableFinalization(bool foundFinalizers)
+{
+    if (foundFinalizers)
+        RhEnableFinalization();
+}
+
+void GCToEEInterface::HandleFatalError(unsigned int exitCode)
+{
+    UNREFERENCED_PARAMETER(exitCode);
+    EEPOLICY_HANDLE_FATAL_ERROR(exitCode);
+}
+
+bool GCToEEInterface::EagerFinalized(Object* obj)
+{
+    UNREFERENCED_PARAMETER(obj);
+    return false;
+}
+
+bool GCToEEInterface::IsGCThread()
+{
+    Thread* pCurrentThread = ThreadStore::RawGetCurrentThread();
+    return pCurrentThread->IsGCSpecial() || pCurrentThread == ThreadStore::GetSuspendingThread();
+}
+
+bool GCToEEInterface::WasCurrentThreadCreatedByGC()
+{
+    return ThreadStore::RawGetCurrentThread()->IsGCSpecial();
+}
+
+struct ThreadStubArguments
+{
+    void (*m_pRealStartRoutine)(void*);
+    void* m_pRealContext;
+    bool m_isSuspendable;
+    CLREventStatic m_ThreadStartedEvent;
+};
+
+bool GCToEEInterface::CreateThread(void (*threadStart)(void*), void* arg, bool is_suspendable, const char* name)
+{
+    UNREFERENCED_PARAMETER(name);
+
+    ThreadStubArguments threadStubArgs;
+
+    threadStubArgs.m_pRealStartRoutine = threadStart;
+    threadStubArgs.m_pRealContext = arg;
+    threadStubArgs.m_isSuspendable = is_suspendable;
+
+    if (!threadStubArgs.m_ThreadStartedEvent.CreateAutoEventNoThrow(false))
+    {
+        return false;
+    }
+
+    // Helper used to wrap the start routine of background GC threads so we can do things like initialize the
+    // Redhawk thread state which requires running in the new thread's context.
+    auto threadStub = [](void* argument) -> DWORD
+    {
+        ThreadStubArguments* pStartContext = (ThreadStubArguments*)argument;
+
+        if (pStartContext->m_isSuspendable)
+        {
+            // Initialize the Thread for this thread. The false being passed indicates that the thread store lock
+            // should not be acquired as part of this operation. This is necessary because this thread is created in
+            // the context of a garbage collection and the lock is already held by the GC.
+            ASSERT(GCHeapUtilities::IsGCInProgress());
+
+            ThreadStore::AttachCurrentThread(false);
+        }
+
+        ThreadStore::RawGetCurrentThread()->SetGCSpecial(true);
+
+        auto realStartRoutine = pStartContext->m_pRealStartRoutine;
+        void* realContext = pStartContext->m_pRealContext;
+
+        pStartContext->m_ThreadStartedEvent.Set();
+
+        STRESS_LOG_RESERVE_MEM(GC_STRESSLOG_MULTIPLY);
+
+        realStartRoutine(realContext);
+
+        return 0;
+    };
+
+    if (!PalStartBackgroundGCThread(threadStub, &threadStubArgs))
+    {
+        threadStubArgs.m_ThreadStartedEvent.CloseEvent();
+        return false;
+    }
+
+    uint32_t res = threadStubArgs.m_ThreadStartedEvent.Wait(INFINITE, FALSE);
+    threadStubArgs.m_ThreadStartedEvent.CloseEvent();
+    ASSERT(res == WAIT_OBJECT_0);
+
+    return true;
+}
+
+// CoreRT does not use async pinned handles
+void GCToEEInterface::WalkAsyncPinnedForPromotion(Object* object, ScanContext* sc, promote_func* callback)
+{
+    UNREFERENCED_PARAMETER(object);
+    UNREFERENCED_PARAMETER(sc);
+    UNREFERENCED_PARAMETER(callback);
+}
+
+void GCToEEInterface::WalkAsyncPinned(Object* object, void* context, void (*callback)(Object*, Object*, void*))
+{
+    UNREFERENCED_PARAMETER(object);
+    UNREFERENCED_PARAMETER(context);
+    UNREFERENCED_PARAMETER(callback);
+}
+
+IGCToCLREventSink* GCToEEInterface::EventSink()
+{
+    return &g_gcToClrEventSink;
+}
+
+uint32_t GCToEEInterface::GetTotalNumSizedRefHandles()
+{
+    return -1;
+}
+
+bool GCToEEInterface::AnalyzeSurvivorsRequested(int condemnedGeneration)
+{
+    return false;
+}
+
+void GCToEEInterface::AnalyzeSurvivorsFinished(int condemnedGeneration)
+{
+}
+
+void GCToEEInterface::VerifySyncTableEntry()
+{
+}
+
+void GCToEEInterface::UpdateGCEventStatus(int currentPublicLevel, int currentPublicKeywords, int currentPrivateLevel, int currentPrivateKeywords)
+{
+    UNREFERENCED_PARAMETER(currentPublicLevel);
+    UNREFERENCED_PARAMETER(currentPublicKeywords);
+    UNREFERENCED_PARAMETER(currentPrivateLevel);
+    UNREFERENCED_PARAMETER(currentPrivateKeywords);
+    // TODO: Linux LTTng
+}
+
+MethodTable* GCToEEInterface::GetFreeObjectMethodTable()
+{
+    assert(g_pFreeObjectEEType != nullptr);
+    return (MethodTable*)g_pFreeObjectEEType;
+}
+
+bool GCToEEInterface::GetBooleanConfigValue(const char* privateKey, const char* publicKey, bool* value)
+{
+    // these configuration values are given to us via startup flags.
+    if (strcmp(privateKey, "gcServer") == 0)
+    {
+        *value = g_heap_type == GC_HEAP_SVR;
+        return true;
+    }
+
+    if (strcmp(privateKey, "gcConcurrent") == 0)
+    {
+        *value = !g_pRhConfig->GetDisableBGC();
+        return true;
+    }
+
+    if (strcmp(privateKey, "gcConservative") == 0)
+    {
+        *value = g_pConfig->GetGCConservative();
+        return true;
+    }
+
+    return false;
+}
+
+bool GCToEEInterface::GetIntConfigValue(const char* privateKey, const char* publicKey, int64_t* value)
+{
+    if (strcmp(privateKey, "HeapVerify") == 0)
+    {
+        *value = g_pRhConfig->GetHeapVerify();
+        return true;
+    }
+
+    if (strcmp(privateKey, "GCgen0size") == 0)
+    {
+#if defined(USE_PORTABLE_HELPERS) && !defined(HOST_WASM)
+        // CORERT-TODO: remove this
+        //              https://github.com/dotnet/corert/issues/2033
+        *value = 100 * 1024 * 1024;
+#else
+        *value = 0;
+#endif
+        return true;
+    }
+
+    return false;
+}
+
+bool GCToEEInterface::GetStringConfigValue(const char* privateKey, const char* publicKey, const char** value)
+{
+    UNREFERENCED_PARAMETER(privateKey);
+    UNREFERENCED_PARAMETER(publicKey);
+    UNREFERENCED_PARAMETER(value);
+    return false;
+}
+
+void GCToEEInterface::FreeStringConfigValue(const char* value)
+{
+    delete[] value;
+}
+
+#endif // !DACCESS_COMPILE
+
+// NOTE: this method is not in thread.cpp because it needs access to the layout of alloc_context for DAC to know the 
+// size, but thread.cpp doesn't generally need to include the GC environment headers for any other reason.
+gc_alloc_context * Thread::GetAllocContext()
+{
+    return dac_cast<DPTR(gc_alloc_context)>(dac_cast<TADDR>(this) + offsetof(Thread, m_rgbAllocContextBuffer));
+}
+
+GPTR_IMPL(Thread, g_pFinalizerThread);
+GPTR_IMPL(Thread, g_pGcThread);
+
+#ifndef DACCESS_COMPILE
+
+bool __SwitchToThread(uint32_t dwSleepMSec, uint32_t /*dwSwitchCount*/)
+{
+    if (dwSleepMSec > 0)
+    {
+        PalSleep(dwSleepMSec);
+        return true;
+    }
+    return !!PalSwitchToThread();
+}
+
+#endif // DACCESS_COMPILE
+
+void LogSpewAlways(const char * /*fmt*/, ...)
+{
+}
+
+#if defined(FEATURE_EVENT_TRACE) && !defined(DACCESS_COMPILE)
+ProfilingScanContext::ProfilingScanContext(BOOL fProfilerPinnedParam)
+    : ScanContext()
+{
+    pHeapId = NULL;
+    fProfilerPinned = fProfilerPinnedParam;
+    pvEtwContext = NULL;
+#ifdef FEATURE_CONSERVATIVE_GC
+    // To not confuse GCScan::GcScanRoots
+    promotion = g_pConfig->GetGCConservative();
+#endif
+}
+#endif // defined(FEATURE_EVENT_TRACE) && !defined(DACCESS_COMPILE)
diff --git a/src/coreclr/src/nativeaot/Runtime/gcrhinterface.h b/src/coreclr/src/nativeaot/Runtime/gcrhinterface.h
new file mode 100644
index 0000000000000..15090c81f33e7
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/gcrhinterface.h
@@ -0,0 +1,168 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This header contains the definition of an interface between the GC/HandleTable portions of the Redhawk
+// codebase and the regular Redhawk code. The former has all sorts of legacy environmental requirements (see
+// gcrhenv.h) that we don't wish to pull into the rest of Redhawk.
+//
+// Since this file is included in both worlds it has no dependencies and uses a very simple subset of types
+// etc. so that it will build cleanly in both. The actual implementation of the class defined here is in
+// gcrhenv.cpp, since the implementation needs access to the guts of the GC/HandleTable.
+//
+// This is just an initial stab at the interface.
+//
+
+#ifndef __GCRHINTERFACE_INCLUDED
+#define __GCRHINTERFACE_INCLUDED
+
+#ifndef DACCESS_COMPILE
+// Global data cells exported by the GC.
+extern "C" unsigned char *g_ephemeral_low;
+extern "C" unsigned char *g_ephemeral_high;
+extern "C" unsigned char *g_lowest_address;
+extern "C" unsigned char *g_highest_address;
+#endif
+
+struct gc_alloc_context;
+class MethodInfo;
+struct REGDISPLAY;
+class Thread;
+enum GCRefKind : unsigned char;
+class ICodeManager;
+class EEType;
+
+// -----------------------------------------------------------------------------------------------------------
+// RtuObjectRef
+// -----------------------------------------------------------------------------------------------------------
+//
+// READ THIS!
+//
+// This struct exists for type description purposes, but you must never directly refer to the object 
+// reference.  The only code allowed to do this is the code inherited directly from the CLR, which all 
+// includes gcrhenv.h.  If your code is outside the namespace of gcrhenv.h, direct object reference 
+// manipulation is prohibited--use C# instead.
+//
+// To enforce this, we declare RtuObjectRef as a class with no public members.
+//
+class RtuObjectRef
+{
+#ifndef DACCESS_COMPILE
+private:
+#else
+public:
+#endif 
+    TADDR pvObject;
+};
+
+typedef DPTR(RtuObjectRef) PTR_RtuObjectRef;
+
+// -----------------------------------------------------------------------------------------------------------
+
+// We provide various ways to enumerate GC objects or roots, each of which calls back to a user supplied
+// function for each object (within the context of a garbage collection). The following function types
+// describe these callbacks. Unfortunately the signatures aren't very specific: we don't want to reference
+// Object* or Object** from this module, see the comment for RtuObjectRef, but this very narrow category of
+// callers can't use RtuObjectRef (they really do need to drill down into the Object). The lesser evil here is
+// to be a bit loose in the signature rather than exposing the Object class to the rest of Redhawk.
+
+// Callback when enumerating objects on the GC heap or objects referenced from instance fields of another
+// object. The GC dictates the shape of this signature (we're hijacking functionality originally developed for
+// profiling). The real signature is:
+//      int ScanFunction(Object* pObject, void* pContext)
+// where:
+//      return      : treated as a boolean, zero indicates the enumeration should terminate, all other values
+//                    say continue
+//      pObject     : pointer to the current object being scanned
+//      pContext    : user context passed to the original scan function and otherwise uninterpreted
+typedef int (*GcScanObjectFunction)(void*, void*);
+
+// Callback when enumerating GC roots (stack locations, statics and handles). Similar to the callback above
+// except there is no means to terminate the scan (no return value) and the root location (pointer to pointer
+// to object) is returned instead of a direct pointer to the object:
+//      void ScanFunction(Object** pRoot, void* pContext)
+typedef void (*GcScanRootFunction)(void**, void*);
+
+typedef void * GcSegmentHandle;
+
+#define RH_LARGE_OBJECT_SIZE 85000
+
+// A 'clump' is defined as the size of memory covered by 1 byte in the card table.  These constants are 
+// verified against gcpriv.h in gcrhee.cpp.
+#if (POINTER_SIZE == 8)
+#define CLUMP_SIZE 0x800
+#define LOG2_CLUMP_SIZE 11
+#elif (POINTER_SIZE == 4)
+#define CLUMP_SIZE 0x400
+#define LOG2_CLUMP_SIZE 10
+#else
+#error unexpected pointer size
+#endif
+
+class RedhawkGCInterface
+{
+public:
+    // Perform any runtime-startup initialization needed by the GC, HandleTable or environmental code in
+    // gcrhenv. Returns true on success or false if a subsystem failed to initialize.
+    static bool InitializeSubsystems();
+
+    static void InitAllocContext(gc_alloc_context * pAllocContext);
+    static void ReleaseAllocContext(gc_alloc_context * pAllocContext);
+
+    static void WaitForGCCompletion();
+
+    static void EnumGcRef(PTR_RtuObjectRef pRef, GCRefKind kind, void * pfnEnumCallback, void * pvCallbackData);
+
+    static void BulkEnumGcObjRef(PTR_RtuObjectRef pRefs, UInt32 cRefs, void * pfnEnumCallback, void * pvCallbackData);
+
+    static void EnumGcRefs(ICodeManager * pCodeManager,
+                           MethodInfo * pMethodInfo, 
+                           PTR_VOID safePointAddress,
+                           REGDISPLAY * pRegisterSet,
+                           void * pfnEnumCallback,
+                           void * pvCallbackData);
+
+    static void EnumGcRefsInRegionConservatively(PTR_RtuObjectRef pLowerBound,
+                                                 PTR_RtuObjectRef pUpperBound,
+                                                 void * pfnEnumCallback,
+                                                 void * pvCallbackData);
+
+    static GcSegmentHandle RegisterFrozenSegment(void * pSection, size_t SizeSection);
+    static void UnregisterFrozenSegment(GcSegmentHandle segment);
+
+#ifdef FEATURE_GC_STRESS
+    static void StressGc();
+#endif // FEATURE_GC_STRESS
+
+    // Various routines used to enumerate objects contained within a given scope (on the GC heap, as reference
+    // fields of an object, on a thread stack, in a static or in one of the handle tables).
+    static void ScanObject(void *pObject, GcScanObjectFunction pfnScanCallback, void *pContext);
+    static void ScanStackRoots(Thread *pThread, GcScanRootFunction pfnScanCallback, void *pContext);
+    static void ScanStaticRoots(GcScanRootFunction pfnScanCallback, void *pContext);
+    static void ScanHandleTableRoots(GcScanRootFunction pfnScanCallback, void *pContext);
+
+    // Returns size GCDesc. Used by type cloning.
+    static UInt32 GetGCDescSize(void * pType);
+
+    // These methods are used to get and set the type information for the last allocation on each thread.
+    static EEType * GetLastAllocEEType();
+    static void SetLastAllocEEType(EEType *pEEType);
+
+    static uint64_t GetDeadThreadsNonAllocBytes();
+
+    // Used by debugger hook
+    static void* CreateTypedHandle(void* object, int type);
+    static void DestroyTypedHandle(void* handle);
+
+private:
+    // The EEType for the last allocation.  This value is used inside of the GC allocator
+    // to emit allocation ETW events with type information.  We set this value unconditionally to avoid
+    // race conditions where ETW is enabled after the value is set.
+    DECLSPEC_THREAD static EEType * tls_pLastAllocationEEType;
+
+    // Tracks the amount of bytes that were reserved for threads in their gc_alloc_context and went unused when they died.
+    // Used for GC.GetTotalAllocatedBytes
+    static uint64_t s_DeadThreadsNonAllocBytes;
+};
+
+#endif // __GCRHINTERFACE_INCLUDED
diff --git a/src/coreclr/src/nativeaot/Runtime/gcrhscan.cpp b/src/coreclr/src/nativeaot/Runtime/gcrhscan.cpp
new file mode 100644
index 0000000000000..8496e20ad09a9
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/gcrhscan.cpp
@@ -0,0 +1,176 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+
+#include "gcenv.h"
+#include "gcheaputilities.h"
+#include "objecthandle.h"
+
+#include "gcenv.ee.h"
+
+#include "PalRedhawkCommon.h"
+
+#include "gcrhinterface.h"
+
+#include "slist.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+
+#include "thread.h"
+
+#include "shash.h"
+#include "RWLock.h"
+#include "RuntimeInstance.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "thread.inl"
+
+#include "DebuggerHook.h"
+
+#ifndef DACCESS_COMPILE
+
+void GcEnumObjectsConservatively(PTR_PTR_Object ppLowerBound, PTR_PTR_Object ppUpperBound, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc);
+
+void EnumAllStaticGCRefs(EnumGcRefCallbackFunc * fn, EnumGcRefScanContext * sc)
+{
+    GetRuntimeInstance()->EnumAllStaticGCRefs(reinterpret_cast<void*>(fn), sc);
+}
+
+/*
+ * Scan all stack and statics roots
+ */
+ 
+void GCToEEInterface::GcScanRoots(EnumGcRefCallbackFunc * fn,  int condemned, int max_gen, EnumGcRefScanContext * sc)
+{
+    DebuggerProtectedBufferListNode* cursor = DebuggerHook::s_debuggerProtectedBuffers;
+    while (cursor != nullptr)
+    {
+        GcEnumObjectsConservatively((PTR_PTR_Object)cursor->address, (PTR_PTR_Object)(cursor->address + cursor->size), fn, sc);
+        cursor = cursor->next;
+    }
+
+    // STRESS_LOG1(LF_GCROOTS, LL_INFO10, "GCScan: Phase = %s\n", sc->promotion ? "promote" : "relocate");
+
+    FOREACH_THREAD(pThread)
+    {
+        // Skip "GC Special" threads which are really background workers that will never have any roots.
+        if (pThread->IsGCSpecial())
+            continue;
+
+#if !defined (ISOLATED_HEAPS)
+        // @TODO: it is very bizarre that this IsThreadUsingAllocationContextHeap takes a copy of the
+        // allocation context instead of a reference or a pointer to it. This seems very wasteful given how
+        // large the alloc_context is.
+        if (!GCHeapUtilities::GetGCHeap()->IsThreadUsingAllocationContextHeap(pThread->GetAllocContext(), 
+                                                                     sc->thread_number))
+        {
+            // STRESS_LOG2(LF_GC|LF_GCROOTS, LL_INFO100, "{ Scan of Thread %p (ID = %x) declined by this heap\n", 
+            //             pThread, pThread->GetThreadId());
+        }
+        else
+#endif
+        {
+            STRESS_LOG1(LF_GC|LF_GCROOTS, LL_INFO100, "{ Starting scan of Thread %p\n", pThread);
+            sc->thread_under_crawl = pThread;
+#if defined(FEATURE_EVENT_TRACE) && !defined(DACCESS_COMPILE)
+            sc->dwEtwRootKind = kEtwGCRootKindStack;
+#endif
+            pThread->GcScanRoots(reinterpret_cast<void*>(fn), sc);
+
+#if defined(FEATURE_EVENT_TRACE) && !defined(DACCESS_COMPILE)
+            sc->dwEtwRootKind = kEtwGCRootKindOther;
+#endif
+            STRESS_LOG1(LF_GC|LF_GCROOTS, LL_INFO100, "Ending scan of Thread %p }\n", pThread);
+        }
+    }
+    END_FOREACH_THREAD
+
+    sc->thread_under_crawl = NULL;
+
+    if ((!GCHeapUtilities::IsServerHeap() || sc->thread_number == 0) ||(condemned == max_gen && sc->promotion))
+    {
+#if defined(FEATURE_EVENT_TRACE) && !defined(DACCESS_COMPILE)
+        sc->dwEtwRootKind = kEtwGCRootKindHandle;
+#endif 
+        EnumAllStaticGCRefs(fn, sc);
+    }
+}
+
+void GCToEEInterface::GcEnumAllocContexts (enum_alloc_context_func* fn, void* param)
+{
+    FOREACH_THREAD(thread)
+    {
+        (*fn) (thread->GetAllocContext(), param);
+    }
+    END_FOREACH_THREAD
+}
+
+#endif //!DACCESS_COMPILE
+
+void PromoteCarefully(PTR_PTR_Object obj, UInt32 flags, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc)
+{
+    //
+    // Sanity check that the flags contain only these three values
+    //
+    assert((flags & ~(GC_CALL_INTERIOR|GC_CALL_PINNED|GC_CALL_CHECK_APP_DOMAIN)) == 0);
+
+    //
+    // Sanity check that GC_CALL_INTERIOR FLAG is set
+    //
+    assert(flags & GC_CALL_INTERIOR);
+
+    // If the object reference points into the stack, we 
+    // must not promote it, the GC cannot handle these.
+    if (pSc->thread_under_crawl->IsWithinStackBounds(*obj))
+        return;
+
+    fnGcEnumRef(obj, pSc, flags);
+}
+
+void GcEnumObject(PTR_PTR_Object ppObj, UInt32 flags, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc)
+{
+    //
+    // Sanity check that the flags contain only these three values
+    //
+    assert((flags & ~(GC_CALL_INTERIOR|GC_CALL_PINNED|GC_CALL_CHECK_APP_DOMAIN)) == 0);
+
+    // for interior pointers, we optimize the case in which
+    //  it points into the current threads stack area
+    //
+    if (flags & GC_CALL_INTERIOR)
+        PromoteCarefully (ppObj, flags, fnGcEnumRef, pSc);
+    else
+        fnGcEnumRef(ppObj, pSc, flags);
+}
+
+void GcBulkEnumObjects(PTR_PTR_Object pObjs, UInt32 cObjs, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc)
+{
+    PTR_PTR_Object ppObj = pObjs;
+
+    for (UInt32 i = 0; i < cObjs; i++)
+        fnGcEnumRef(ppObj++, pSc, 0);
+}
+
+// Scan a contiguous range of memory and report everything that looks like it could be a GC reference as a
+// pinned interior reference. Pinned in case we are wrong (so the GC won't try to move the object and thus
+// corrupt the original memory value by relocating it). Interior since we (a) can't easily tell whether a
+// real reference is interior or not and interior is the more conservative choice that will work for both and
+// (b) because it might not be a real GC reference at all and in that case falsely listing the reference as
+// non-interior will cause the GC to make assumptions and crash quite quickly.
+void GcEnumObjectsConservatively(PTR_PTR_Object ppLowerBound, PTR_PTR_Object ppUpperBound, EnumGcRefCallbackFunc * fnGcEnumRef, EnumGcRefScanContext * pSc)
+{
+    // Only report potential references in the promotion phase. Since we report everything as pinned there
+    // should be no work to do in the relocation phase.
+    if (pSc->promotion)
+    {
+        for (PTR_PTR_Object ppObj = ppLowerBound; ppObj < ppUpperBound; ppObj++)
+        {
+            // Only report values that lie in the GC heap range. This doesn't conclusively guarantee that the
+            // value is a GC heap reference but it's a cheap check that weeds out a lot of spurious values.
+            PTR_Object pObj = *ppObj;
+            if (((PTR_UInt8)pObj >= g_lowest_address) && ((PTR_UInt8)pObj <= g_highest_address))
+                fnGcEnumRef(ppObj, pSc, GC_CALL_INTERIOR|GC_CALL_PINNED);
+        }
+    }
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/gctoclreventsink.cpp b/src/coreclr/src/nativeaot/Runtime/gctoclreventsink.cpp
new file mode 100644
index 0000000000000..8838b3461007b
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/gctoclreventsink.cpp
@@ -0,0 +1,312 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "common.h"
+#include "gctoclreventsink.h"
+
+GCToCLREventSink g_gcToClrEventSink;
+
+void GCToCLREventSink::FireDynamicEvent(const char* eventName, void* payload, uint32_t payloadSize)
+{
+    LIMITED_METHOD_CONTRACT;
+
+#ifndef FEATURE_REDHAWK
+    const size_t EventNameMaxSize = 255;
+
+    WCHAR wideEventName[EventNameMaxSize];
+    if (MultiByteToWideChar(CP_ACP, 0, eventName, -1, wideEventName, EventNameMaxSize) == 0)
+    {
+        return;
+    }
+
+    FireEtwGCDynamicEvent(wideEventName, payloadSize, (const BYTE*)payload, GetClrInstanceId());
+#endif // !FEATURE_REDHAWK
+}
+
+void GCToCLREventSink::FireGCStart_V2(uint32_t count, uint32_t depth, uint32_t reason, uint32_t type)
+{
+    LIMITED_METHOD_CONTRACT;
+
+#ifdef FEATURE_ETW
+    ETW::GCLog::ETW_GC_INFO gcStartInfo;
+    gcStartInfo.GCStart.Count = count;
+    gcStartInfo.GCStart.Depth = depth;
+    gcStartInfo.GCStart.Reason = static_cast<ETW::GCLog::ETW_GC_INFO::GC_REASON>(reason);
+    gcStartInfo.GCStart.Type = static_cast<ETW::GCLog::ETW_GC_INFO::GC_TYPE>(type);
+    ETW::GCLog::FireGcStart(&gcStartInfo);
+#endif // FEATURE_ETW
+}
+
+void GCToCLREventSink::FireGCGenerationRange(uint8_t generation, void* rangeStart, uint64_t rangeUsedLength, uint64_t rangeReservedLength)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    FireEtwGCGenerationRange(generation, rangeStart, rangeUsedLength, rangeReservedLength, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireGCEnd_V1(uint32_t count, uint32_t depth)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    FireEtwGCEnd_V1(count, depth, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireGCHeapStats_V2(
+        uint64_t generationSize0,
+        uint64_t totalPromotedSize0,
+        uint64_t generationSize1,
+        uint64_t totalPromotedSize1,
+        uint64_t generationSize2,
+        uint64_t totalPromotedSize2,
+        uint64_t generationSize3,
+        uint64_t totalPromotedSize3,
+        uint64_t generationSize4,
+        uint64_t totalPromotedSize4,
+        uint64_t finalizationPromotedSize,
+        uint64_t finalizationPromotedCount,
+        uint32_t pinnedObjectCount,
+        uint32_t sinkBlockCount,
+        uint32_t gcHandleCount)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // TODO: FireEtwGCHeapStats_V2
+    FireEtwGCHeapStats_V1(generationSize0, totalPromotedSize0, generationSize1, totalPromotedSize1,
+                          generationSize2, totalPromotedSize2, generationSize3, totalPromotedSize3,
+                          finalizationPromotedSize, finalizationPromotedCount, pinnedObjectCount,
+                          sinkBlockCount, gcHandleCount, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireGCCreateSegment_V1(void* address, size_t size, uint32_t type)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    FireEtwGCCreateSegment_V1((uint64_t)address, static_cast<uint64_t>(size), type, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireGCFreeSegment_V1(void* address)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    FireEtwGCFreeSegment_V1((uint64_t)address, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireGCCreateConcurrentThread_V1()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    FireEtwGCCreateConcurrentThread_V1(GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireGCTerminateConcurrentThread_V1()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    FireEtwGCTerminateConcurrentThread_V1(GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireGCTriggered(uint32_t reason)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    FireEtwGCTriggered(reason, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireGCMarkWithType(uint32_t heapNum, uint32_t type, uint64_t bytes)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    FireEtwGCMarkWithType(heapNum, GetClrInstanceId(), type, bytes);
+}
+
+void GCToCLREventSink::FireGCJoin_V2(uint32_t heap, uint32_t joinTime, uint32_t joinType, uint32_t joinId)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    FireEtwGCJoin_V2(heap, joinTime, joinType, GetClrInstanceId(), joinId);
+}
+
+void GCToCLREventSink::FireGCGlobalHeapHistory_V3(uint64_t finalYoungestDesired,
+        int32_t numHeaps,
+        uint32_t condemnedGeneration,
+        uint32_t gen0reductionCount,
+        uint32_t reason,
+        uint32_t globalMechanisms,
+        uint32_t pauseMode,
+        uint32_t memoryPressure,
+        uint32_t condemnReasons0,
+        uint32_t condemnReasons1)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // TODO: FireEtwGCGlobalHeapHistory_V3
+    FireEtwGCGlobalHeapHistory_V2(finalYoungestDesired, numHeaps, condemnedGeneration, gen0reductionCount, reason,
+        globalMechanisms, GetClrInstanceId(), pauseMode, memoryPressure);
+}
+
+void GCToCLREventSink::FireGCAllocationTick_V1(uint32_t allocationAmount, uint32_t allocationKind)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    FireEtwGCAllocationTick_V1(allocationAmount, allocationKind, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireGCAllocationTick_V3(uint64_t allocationAmount, uint32_t allocationKind, uint32_t heapIndex, void* objectAddress)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    void * typeId = RedhawkGCInterface::GetLastAllocEEType();
+    const WCHAR * name = nullptr;
+
+    if (typeId != nullptr)
+    {
+        FireEtwGCAllocationTick_V3(static_cast<uint32_t>(allocationAmount),
+            allocationKind,
+            GetClrInstanceId(),
+            allocationAmount,
+            typeId,
+            name,
+            heapIndex,
+            objectAddress);
+    }
+}
+
+void GCToCLREventSink::FirePinObjectAtGCTime(void* object, uint8_t** ppObject)
+{
+    UNREFERENCED_PARAMETER(object);
+    UNREFERENCED_PARAMETER(ppObject);
+}
+
+void GCToCLREventSink::FirePinPlugAtGCTime(uint8_t* plugStart, uint8_t* plugEnd, uint8_t* gapBeforeSize)
+{
+    FireEtwPinPlugAtGCTime(plugStart, plugEnd, gapBeforeSize, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireGCPerHeapHistory_V3(void *freeListAllocated,
+                                               void *freeListRejected,
+                                               void *endOfSegAllocated,
+                                               void *condemnedAllocated,
+                                               void *pinnedAllocated,
+                                               void *pinnedAllocatedAdvance,
+                                               uint32_t runningFreeListEfficiency,
+                                               uint32_t condemnReasons0,
+                                               uint32_t condemnReasons1,
+                                               uint32_t compactMechanisms,
+                                               uint32_t expandMechanisms,
+                                               uint32_t heapIndex,
+                                               void *extraGen0Commit,
+                                               uint32_t count,
+                                               uint32_t valuesLen,
+                                               void *values)
+{    
+    FireEtwGCPerHeapHistory_V3(GetClrInstanceId(),
+                               freeListAllocated,
+                               freeListRejected,
+                               endOfSegAllocated,
+                               condemnedAllocated,
+                               pinnedAllocated,
+                               pinnedAllocatedAdvance,
+                               runningFreeListEfficiency,
+                               condemnReasons0,
+                               condemnReasons1,
+                               compactMechanisms,
+                               expandMechanisms,
+                               heapIndex,
+                               extraGen0Commit,
+                               count,
+                               valuesLen,
+                               values);
+}
+
+
+
+void GCToCLREventSink::FireBGCBegin()
+{
+    FireEtwBGCBegin(GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireBGC1stNonConEnd()
+{
+    FireEtwBGC1stNonConEnd(GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireBGC1stConEnd()
+{
+    FireEtwBGC1stConEnd(GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireBGC1stSweepEnd(uint32_t genNumber)
+{
+    //FireEtwBGC1stSweepEnd(genNumber, GetClrInstanceId()); TODO
+}
+
+void GCToCLREventSink::FireBGC2ndNonConBegin()
+{
+    FireEtwBGC2ndNonConBegin(GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireBGC2ndNonConEnd()
+{
+    FireEtwBGC2ndNonConEnd(GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireBGC2ndConBegin()
+{
+    FireEtwBGC2ndConBegin(GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireBGC2ndConEnd()
+{
+    FireEtwBGC2ndConEnd(GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireBGCDrainMark(uint64_t objects)
+{
+    FireEtwBGCDrainMark(objects, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireBGCRevisit(uint64_t pages, uint64_t objects, uint32_t isLarge)
+{
+    FireEtwBGCRevisit(pages, objects, isLarge, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireBGCOverflow_V1(uint64_t min, uint64_t max, uint64_t objects, uint32_t isLarge, uint32_t genNumber)
+{
+    // TODO: FireBGCOverflow_V1
+    FireEtwBGCOverflow(min, max, objects, isLarge, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireBGCAllocWaitBegin(uint32_t reason)
+{
+    FireEtwBGCAllocWaitBegin(reason, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireBGCAllocWaitEnd(uint32_t reason)
+{
+    FireEtwBGCAllocWaitEnd(reason, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireGCFullNotify_V1(uint32_t genNumber, uint32_t isAlloc)
+{
+    FireEtwGCFullNotify_V1(genNumber, isAlloc, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireSetGCHandle(void* handleID, void* objectID, uint32_t kind, uint32_t generation)
+{
+    FireEtwSetGCHandle(handleID, objectID, kind, generation, -1, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FirePrvSetGCHandle(void* handleID, void* objectID, uint32_t kind, uint32_t generation)
+{
+    FireEtwPrvSetGCHandle(handleID, objectID, kind, generation, -1, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FireDestroyGCHandle(void *handleID)
+{
+    FireEtwDestroyGCHandle(handleID, GetClrInstanceId());
+}
+
+void GCToCLREventSink::FirePrvDestroyGCHandle(void *handleID)
+{
+    FireEtwPrvDestroyGCHandle(handleID, GetClrInstanceId());
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/gctoclreventsink.h b/src/coreclr/src/nativeaot/Runtime/gctoclreventsink.h
new file mode 100644
index 0000000000000..1611f71250dbf
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/gctoclreventsink.h
@@ -0,0 +1,92 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef __GCTOCLREVENTSINK_H__
+#define __GCTOCLREVENTSINK_H__
+
+#include "gcenv.h"
+#include "gcinterface.h"
+
+class GCToCLREventSink : public IGCToCLREventSink
+{
+public:
+    void FireDynamicEvent(const char* eventName, void* payload, uint32_t payloadSize);
+    void FireGCStart_V2(uint32_t count, uint32_t depth, uint32_t reason, uint32_t type);
+    void FireGCEnd_V1(uint32_t count, uint32_t depth);
+    void FireGCGenerationRange(uint8_t generation, void* rangeStart, uint64_t rangeUsedLength, uint64_t rangeReservedLength);
+    void FireGCHeapStats_V2(uint64_t generationSize0,
+                            uint64_t totalPromotedSize0,
+                            uint64_t generationSize1,
+                            uint64_t totalPromotedSize1,
+                            uint64_t generationSize2,
+                            uint64_t totalPromotedSize2,
+                            uint64_t generationSize3,
+                            uint64_t totalPromotedSize3,
+                            uint64_t generationSize4,
+                            uint64_t totalPromotedSize4,
+                            uint64_t finalizationPromotedSize,
+                            uint64_t finalizationPromotedCount,
+                            uint32_t pinnedObjectCount,
+                            uint32_t sinkBlockCount,
+                            uint32_t gcHandleCount);
+    void FireGCCreateSegment_V1(void* address, size_t size, uint32_t type);
+    void FireGCFreeSegment_V1(void* address);
+    void FireGCCreateConcurrentThread_V1();
+    void FireGCTerminateConcurrentThread_V1();
+    void FireGCTriggered(uint32_t reason);
+    void FireGCMarkWithType(uint32_t heapNum, uint32_t type, uint64_t bytes);
+    void FireGCJoin_V2(uint32_t heap, uint32_t joinTime, uint32_t joinType, uint32_t joinId);
+    void FireGCGlobalHeapHistory_V3(uint64_t finalYoungestDesired,
+                                    int32_t numHeaps,
+                                    uint32_t condemnedGeneration,
+                                    uint32_t gen0reductionCount,
+                                    uint32_t reason,
+                                    uint32_t globalMechanisms,
+                                    uint32_t pauseMode,
+                                    uint32_t memoryPressure,
+                                    uint32_t condemnReasons0,
+                                    uint32_t condemnReasons1);
+    void FireGCAllocationTick_V1(uint32_t allocationAmount, uint32_t allocationKind);
+    void FireGCAllocationTick_V3(uint64_t allocationAmount, uint32_t allocationKind, uint32_t heapIndex, void* objectAddress);
+    void FirePinObjectAtGCTime(void* object, uint8_t** ppObject);
+    void FirePinPlugAtGCTime(uint8_t* plug_start, uint8_t* plug_end, uint8_t* gapBeforeSize);
+    void FireGCPerHeapHistory_V3(void *freeListAllocated,
+                                 void *freeListRejected,
+                                 void *endOfSegAllocated,
+                                 void *condemnedAllocated,
+                                 void *pinnedAllocated,
+                                 void *pinnedAllocatedAdvance,
+                                 uint32_t runningFreeListEfficiency,
+                                 uint32_t condemnReasons0,
+                                 uint32_t condemnReasons1,
+                                 uint32_t compactMechanisms,
+                                 uint32_t expandMechanisms,
+                                 uint32_t heapIndex,
+                                 void *extraGen0Commit,
+                                 uint32_t count,
+                                 uint32_t valuesLen,
+                                 void *values);
+    void FireBGCBegin();
+    void FireBGC1stNonConEnd();
+    void FireBGC1stConEnd();
+    void FireBGC1stSweepEnd(uint32_t genNumber);
+    void FireBGC2ndNonConBegin();
+    void FireBGC2ndNonConEnd();
+    void FireBGC2ndConBegin();
+    void FireBGC2ndConEnd();
+    void FireBGCDrainMark(uint64_t objects);
+    void FireBGCRevisit(uint64_t pages, uint64_t objects, uint32_t isLarge);
+    void FireBGCOverflow_V1(uint64_t min, uint64_t max, uint64_t objects, uint32_t isLarge, uint32_t genNumber);
+    void FireBGCAllocWaitBegin(uint32_t reason);
+    void FireBGCAllocWaitEnd(uint32_t reason);
+    void FireGCFullNotify_V1(uint32_t genNumber, uint32_t isAlloc);
+    void FireSetGCHandle(void *handleID, void *objectID, uint32_t kind, uint32_t generation);
+    void FirePrvSetGCHandle(void *handleID, void *objectID, uint32_t kind, uint32_t generation);
+    void FireDestroyGCHandle(void *handleID);
+    void FirePrvDestroyGCHandle(void *handleID);
+};
+
+extern GCToCLREventSink g_gcToClrEventSink;
+
+#endif // __GCTOCLREVENTSINK_H__
+
diff --git a/src/coreclr/src/nativeaot/Runtime/holder.h b/src/coreclr/src/nativeaot/Runtime/holder.h
new file mode 100644
index 0000000000000..a894ab32e1f0c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/holder.h
@@ -0,0 +1,183 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// -----------------------------------------------------------------------------------------------------------
+// Cut down versions of the Holder and Wrapper template classes used in the CLR. If this coding pattern is
+// also common in the Redhawk code then it might be worth investigating pulling the whole holder.h header file
+// over (a quick look indicates it might not drag in too many extra dependencies).
+//
+
+// -----------------------------------------------------------------------------------------------------------
+// This version of holder does not have a default constructor.
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define EQUALS_DEFAULT 
+#else
+#define EQUALS_DEFAULT = default
+#endif
+
+template <typename TYPE, void (*ACQUIRE_FUNC)(TYPE), void (*RELEASE_FUNC)(TYPE)>
+class HolderNoDefaultValue
+{
+public:
+    HolderNoDefaultValue(TYPE value, bool fTake = true) : m_value(value), m_held(false) 
+        { if (fTake) { ACQUIRE_FUNC(value); m_held = true; } }
+
+    ~HolderNoDefaultValue() { if (m_held) RELEASE_FUNC(m_value); }
+
+    TYPE GetValue() { return m_value; }
+
+    void Acquire() { ACQUIRE_FUNC(m_value); m_held = true; }
+    void Release() { if (m_held) { RELEASE_FUNC(m_value); m_held = false; } }
+    void SuppressRelease() { m_held = false; }
+    TYPE Extract() { m_held = false; return GetValue(); }
+
+    HolderNoDefaultValue(HolderNoDefaultValue && other) EQUALS_DEFAULT;
+
+protected:
+    TYPE    m_value;
+    bool    m_held;
+
+private:
+    // No one should be copying around holder types.
+    HolderNoDefaultValue & operator=(const HolderNoDefaultValue & other);
+    HolderNoDefaultValue(const HolderNoDefaultValue & other);
+};
+
+// -----------------------------------------------------------------------------------------------------------
+template <typename TYPE, void (*ACQUIRE_FUNC)(TYPE), void (*RELEASE_FUNC)(TYPE), TYPE DEFAULTVALUE = nullptr>
+class Holder : public HolderNoDefaultValue<TYPE, ACQUIRE_FUNC, RELEASE_FUNC>
+{
+    typedef HolderNoDefaultValue<TYPE, ACQUIRE_FUNC, RELEASE_FUNC> MY_PARENT;
+public:
+    Holder() : MY_PARENT(DEFAULTVALUE, false) {}
+    Holder(TYPE value, bool fTake = true) : MY_PARENT(value, fTake) {}
+
+    Holder(Holder && other) EQUALS_DEFAULT;
+
+private:
+    // No one should be copying around holder types.
+    Holder & operator=(const Holder & other);
+    Holder(const Holder & other);
+};
+
+// -----------------------------------------------------------------------------------------------------------
+template <typename TYPE, void (*ACQUIRE_FUNC)(TYPE), void (*RELEASE_FUNC)(TYPE), TYPE DEFAULTVALUE = nullptr>
+class Wrapper : public Holder<TYPE, ACQUIRE_FUNC, RELEASE_FUNC, DEFAULTVALUE>
+{
+    typedef Holder<TYPE, ACQUIRE_FUNC, RELEASE_FUNC, DEFAULTVALUE> MY_PARENT;
+
+public:
+    Wrapper() : MY_PARENT() {}
+    Wrapper(TYPE value, bool fTake = true) : MY_PARENT(value, fTake) {}
+    Wrapper(Wrapper && other) EQUALS_DEFAULT;
+
+    FORCEINLINE TYPE& operator=(TYPE const & value)
+    {
+        MY_PARENT::Release();
+        MY_PARENT::m_value = value;
+        MY_PARENT::Acquire();
+        return MY_PARENT::m_value;
+    }
+
+    FORCEINLINE const TYPE &operator->() { return MY_PARENT::m_value; }
+    FORCEINLINE const TYPE &operator*() { return MY_PARENT::m_value; }
+    FORCEINLINE operator TYPE() { return MY_PARENT::m_value; }
+
+private:
+    // No one should be copying around wrapper types.
+    Wrapper & operator=(const Wrapper & other);
+    Wrapper(const Wrapper & other);
+};
+
+// -----------------------------------------------------------------------------------------------------------
+template <typename TYPE>
+FORCEINLINE void DoNothing(TYPE /*value*/)
+{
+}
+
+// -----------------------------------------------------------------------------------------------------------
+template <typename TYPE> 
+FORCEINLINE void Delete(TYPE *value)
+{
+    delete value;
+}
+
+// -----------------------------------------------------------------------------------------------------------
+template <typename TYPE,
+          typename PTR_TYPE = TYPE *,
+          void (*ACQUIRE_FUNC)(PTR_TYPE) = DoNothing<PTR_TYPE>,
+          void (*RELEASE_FUNC)(PTR_TYPE) = Delete<TYPE>,
+          PTR_TYPE NULL_VAL = nullptr,
+          typename BASE = Wrapper<PTR_TYPE, ACQUIRE_FUNC, RELEASE_FUNC, NULL_VAL> >
+class NewHolder : public BASE
+{
+public:
+    NewHolder(PTR_TYPE p = NULL_VAL) : BASE(p)
+        { }
+
+    PTR_TYPE& operator=(PTR_TYPE p)
+        { return BASE::operator=(p); }
+
+    bool IsNull()
+        { return BASE::GetValue() == NULL_VAL; }
+};
+
+//-----------------------------------------------------------------------------
+// NewArrayHolder : New []'ed pointer holder
+//  {
+//      NewArrayHolder<Foo> foo = new (nothrow) Foo [30];
+//  } // delete [] foo on out of scope
+//-----------------------------------------------------------------------------
+
+template <typename TYPE> 
+FORCEINLINE void DeleteArray(TYPE *value)
+{
+    delete [] value;
+    value = NULL;
+}
+
+template <typename TYPE,
+          typename PTR_TYPE = TYPE *,
+          void (*ACQUIRE_FUNC)(PTR_TYPE) = DoNothing<PTR_TYPE>,
+          void (*RELEASE_FUNC)(PTR_TYPE) = DeleteArray<TYPE>,
+          PTR_TYPE NULL_VAL = nullptr,
+          typename BASE = Wrapper<PTR_TYPE, ACQUIRE_FUNC, RELEASE_FUNC, NULL_VAL> >
+class NewArrayHolder : public BASE
+{
+public:
+    NewArrayHolder(PTR_TYPE p = NULL_VAL) : BASE(p)
+        { }
+
+    PTR_TYPE& operator=(PTR_TYPE p)
+        { return BASE::operator=(p); }
+
+    bool IsNull()
+        { return BASE::GetValue() == NULL_VAL; }
+};
+
+// -----------------------------------------------------------------------------------------------------------
+template<typename TYPE>
+FORCEINLINE void Destroy(TYPE * value)
+{
+    value->Destroy();
+}
+
+// -----------------------------------------------------------------------------------------------------------
+template <typename TYPE,
+          typename PTR_TYPE = TYPE *,
+          void (*ACQUIRE_FUNC)(PTR_TYPE) = DoNothing<PTR_TYPE>,
+          void (*RELEASE_FUNC)(PTR_TYPE) = Destroy<TYPE>,
+          PTR_TYPE NULL_VAL = nullptr,
+          typename BASE = Wrapper<PTR_TYPE, ACQUIRE_FUNC, RELEASE_FUNC, NULL_VAL> >
+class CreateHolder : public BASE
+{
+public:
+    CreateHolder(PTR_TYPE p = NULL_VAL) : BASE(p)
+        { }
+
+    PTR_TYPE& operator=(PTR_TYPE p)
+        { return BASE::operator=(p); }
+};
+
+
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/AllocFast.S b/src/coreclr/src/nativeaot/Runtime/i386/AllocFast.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/AllocFast.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/AllocFast.asm b/src/coreclr/src/nativeaot/Runtime/i386/AllocFast.asm
new file mode 100644
index 0000000000000..505661a8df519
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/AllocFast.asm
@@ -0,0 +1,445 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+        .586
+        .model  flat
+        option  casemap:none
+        .code
+
+
+include AsmMacros.inc
+
+;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
+;; allocation context then automatically fallback to the slow allocation path.
+;;  ECX == EEType
+FASTCALL_FUNC   RhpNewFast, 4
+
+        ;; edx = GetThread(), TRASHES eax
+        INLINE_GETTHREAD edx, eax
+
+        ;;
+        ;; ecx contains EEType pointer
+        ;;
+        mov         eax, [ecx + OFFSETOF__EEType__m_uBaseSize]
+
+        ;;
+        ;; eax: base size
+        ;; ecx: EEType pointer
+        ;; edx: Thread pointer
+        ;;
+
+        add         eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        cmp         eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        ja          AllocFailed
+
+        ;; set the new alloc pointer
+        mov         [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], eax
+
+        ;; calc the new object pointer
+        sub         eax, [ecx + OFFSETOF__EEType__m_uBaseSize]
+
+        ;; set the new object's EEType pointer
+        mov         [eax], ecx
+        ret
+
+AllocFailed:
+
+        ;;
+        ;; SLOW PATH, call RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        ;;
+        ;; ecx: EEType pointer
+        ;;
+        push        ebp
+        mov         ebp, esp
+
+        PUSH_COOP_PINVOKE_FRAME edx
+
+        ;; Preserve EEType in ESI.
+        mov         esi, ecx
+
+        ;; Push alloc helper arguments
+        push        edx                                             ; transition frame
+        push        dword ptr [ecx + OFFSETOF__EEType__m_uBaseSize] ; Size
+        xor         edx, edx                                        ; Flags
+        ;; Passing EEType in ecx
+
+        ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        call        RhpGcAlloc
+
+        ;; Set the new object's EEType pointer on success.
+        test        eax, eax
+        jz          NewFast_OOM
+        mov         [eax + OFFSETOF__Object__m_pEEType], esi
+
+        ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        mov         edx, [esi + OFFSETOF__EEType__m_uBaseSize]
+        cmp         edx, RH_LARGE_OBJECT_SIZE
+        jb          NewFast_SkipPublish
+        mov         ecx, eax            ;; ecx: object
+                                        ;; edx: already contains object size
+        call        RhpPublishObject    ;; eax: this function returns the object that was passed-in
+NewFast_SkipPublish: 
+
+        POP_COOP_PINVOKE_FRAME
+
+        pop         ebp
+        ret
+
+NewFast_OOM:
+        ;; This is the failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         eax, esi            ; Preserve EEType pointer over POP_COOP_PINVOKE_FRAME
+
+        POP_COOP_PINVOKE_FRAME
+
+        ;; Cleanup our ebp frame
+        pop         ebp
+
+        mov         ecx, eax            ; EEType pointer
+        xor         edx, edx            ; Indicate that we should throw OOM.
+        jmp         RhExceptionHandling_FailedAllocation
+
+FASTCALL_ENDFUNC
+
+;; Allocate non-array object with finalizer.
+;;  ECX == EEType
+FASTCALL_FUNC   RhpNewFinalizable, 4
+        ;; Create EBP frame.
+        push        ebp
+        mov         ebp, esp
+
+        PUSH_COOP_PINVOKE_FRAME edx
+
+        ;; Preserve EEType in ESI
+        mov         esi, ecx
+
+        ;; Push alloc helper arguments
+        push        edx                                             ; transition frame
+        push        dword ptr [ecx + OFFSETOF__EEType__m_uBaseSize] ; Size
+        mov         edx, GC_ALLOC_FINALIZE                          ; Flags
+        ;; Passing EEType in ecx
+
+        ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        call        RhpGcAlloc
+
+        ;; Set the new object's EEType pointer on success.
+        test        eax, eax
+        jz          NewFinalizable_OOM
+        mov         [eax + OFFSETOF__Object__m_pEEType], esi
+
+        ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        mov         edx, [esi + OFFSETOF__EEType__m_uBaseSize]
+        cmp         edx, RH_LARGE_OBJECT_SIZE
+        jb          NewFinalizable_SkipPublish
+        mov         ecx, eax            ;; ecx: object
+                                        ;; edx: already contains object size
+        call        RhpPublishObject    ;; eax: this function returns the object that was passed-in
+NewFinalizable_SkipPublish: 
+
+        POP_COOP_PINVOKE_FRAME
+
+        ;; Collapse EBP frame and return
+        pop         ebp
+        ret
+        
+NewFinalizable_OOM:
+        ;; This is the failure path. We're going to tail-call to a managed helper that will throw
+        ;; an out of memory exception that the caller of this allocator understands.
+
+        mov         eax, esi            ; Preserve EEType pointer over POP_COOP_PINVOKE_FRAME
+
+        POP_COOP_PINVOKE_FRAME
+
+        ;; Cleanup our ebp frame
+        pop         ebp
+        
+        mov         ecx, eax            ; EEType pointer
+        xor         edx, edx            ; Indicate that we should throw OOM.
+        jmp         RhExceptionHandling_FailedAllocation
+        
+FASTCALL_ENDFUNC
+
+;; Allocate a new string.
+;;  ECX == EEType
+;;  EDX == element count
+FASTCALL_FUNC   RhNewString, 8
+
+        push        ecx
+        push        edx
+
+        ;; Make sure computing the aligned overall allocation size won't overflow
+        cmp         edx, MAX_STRING_LENGTH
+        ja          StringSizeOverflow
+
+        ; Compute overall allocation size (align(base size + (element size * elements), 4)).
+        lea         eax, [(edx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 3)]
+        and         eax, -4
+
+        ; ECX == EEType
+        ; EAX == allocation size
+        ; EDX == scratch
+
+        INLINE_GETTHREAD    edx, ecx        ; edx = GetThread(), TRASHES ecx
+
+        ; ECX == scratch
+        ; EAX == allocation size
+        ; EDX == thread
+
+        mov         ecx, eax
+        add         eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        jc          StringAllocContextOverflow
+        cmp         eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        ja          StringAllocContextOverflow
+
+        ; ECX == allocation size
+        ; EAX == new alloc ptr
+        ; EDX == thread
+
+        ; set the new alloc pointer
+        mov         [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], eax
+
+        ; calc the new object pointer
+        sub         eax, ecx
+
+        pop         edx
+        pop         ecx
+
+        ; set the new object's EEType pointer and element count
+        mov         [eax + OFFSETOF__Object__m_pEEType], ecx
+        mov         [eax + OFFSETOF__String__m_Length], edx
+        ret
+
+StringAllocContextOverflow:
+        ; ECX == string size
+        ;   original ECX pushed
+        ;   original EDX pushed
+
+        ; Re-push original ECX
+        push        [esp + 4]
+
+        ; Create EBP frame.
+        mov         [esp + 8], ebp
+        lea         ebp, [esp + 8]
+
+        PUSH_COOP_PINVOKE_FRAME edx
+
+        ; Preserve the string size in edi
+        mov         edi, ecx
+
+        ; Get the EEType and put it in ecx.
+        mov         ecx, dword ptr [ebp - 8]
+
+        ; Push alloc helper arguments (thread, size, flags, EEType).
+        push        edx                                             ; transition frame
+        push        edi                                             ; Size
+        xor         edx, edx                                        ; Flags
+        ;; Passing EEType in ecx
+
+        ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        call        RhpGcAlloc
+
+        ; Set the new object's EEType pointer and length on success.
+        test        eax, eax
+        jz          StringOutOfMemoryWithFrame
+
+        mov         ecx, [ebp - 8]
+        mov         edx, [ebp - 4]
+        mov         [eax + OFFSETOF__Object__m_pEEType], ecx
+        mov         [eax + OFFSETOF__String__m_Length], edx
+
+        ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        cmp         edi, RH_LARGE_OBJECT_SIZE
+        jb          NewString_SkipPublish
+        mov         ecx, eax            ;; ecx: object
+        mov         edx, edi            ;; edx: object size
+        call        RhpPublishObject    ;; eax: this function returns the object that was passed-in
+NewString_SkipPublish: 
+
+        POP_COOP_PINVOKE_FRAME
+        add         esp, 8          ; pop ecx / edx
+        pop         ebp
+        ret
+
+StringOutOfMemoryWithFrame:
+        ; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ; an out of memory exception that the caller of this allocator understands.
+
+        mov         eax, [ebp - 8]  ; Preserve EEType pointer over POP_COOP_PINVOKE_FRAME
+
+        POP_COOP_PINVOKE_FRAME
+        add         esp, 8          ; pop ecx / edx
+        pop         ebp             ; restore ebp
+
+        mov         ecx, eax        ; EEType pointer
+        xor         edx, edx        ; Indicate that we should throw OOM.
+        jmp         RhExceptionHandling_FailedAllocation
+
+StringSizeOverflow:
+        ;; We get here if the size of the final string object can't be represented as an unsigned 
+        ;; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ;; an OOM exception that the caller of this allocator understands.
+
+        add         esp, 8          ; pop ecx / edx
+
+        ;; ecx holds EEType pointer already
+        xor         edx, edx            ; Indicate that we should throw OOM.
+        jmp         RhExceptionHandling_FailedAllocation
+
+FASTCALL_ENDFUNC
+
+
+;; Allocate one dimensional, zero based array (SZARRAY).
+;;  ECX == EEType
+;;  EDX == element count
+FASTCALL_FUNC   RhpNewArray, 8
+
+        push        ecx
+        push        edx
+
+        ; Compute overall allocation size (align(base size + (element size * elements), 4)).
+        ; if the element count is <= 0x10000, no overflow is possible because the component size is
+        ; <= 0xffff, and thus the product is <= 0xffff0000, and the base size for the worst case
+        ; (32 dimensional MdArray) is less than 0xffff.
+        movzx       eax, word ptr [ecx + OFFSETOF__EEType__m_usComponentSize]
+        cmp         edx,010000h
+        ja          ArraySizeBig
+        mul         edx
+        add         eax, [ecx + OFFSETOF__EEType__m_uBaseSize]
+        add         eax, 3
+ArrayAlignSize:
+        and         eax, -4
+
+        ; ECX == EEType
+        ; EAX == array size
+        ; EDX == scratch
+
+        INLINE_GETTHREAD    edx, ecx        ; edx = GetThread(), TRASHES ecx
+
+        ; ECX == scratch
+        ; EAX == array size
+        ; EDX == thread
+
+        mov         ecx, eax
+        add         eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+        jc          ArrayAllocContextOverflow
+        cmp         eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit]
+        ja          ArrayAllocContextOverflow
+
+        ; ECX == array size
+        ; EAX == new alloc ptr
+        ; EDX == thread
+
+        ; set the new alloc pointer
+        mov         [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], eax
+
+        ; calc the new object pointer
+        sub         eax, ecx
+
+        pop         edx
+        pop         ecx
+
+        ; set the new object's EEType pointer and element count
+        mov         [eax + OFFSETOF__Object__m_pEEType], ecx
+        mov         [eax + OFFSETOF__Array__m_Length], edx
+        ret
+
+ArraySizeBig:
+        ; Compute overall allocation size (align(base size + (element size * elements), 4)).
+        ; if the element count is negative, it's an overflow, otherwise it's out of memory
+        cmp         edx, 0
+        jl          ArraySizeOverflow
+        mul         edx
+        jc          ArrayOutOfMemoryNoFrame
+        add         eax, [ecx + OFFSETOF__EEType__m_uBaseSize]
+        jc          ArrayOutOfMemoryNoFrame
+        add         eax, 3
+        jc          ArrayOutOfMemoryNoFrame
+        jmp         ArrayAlignSize
+
+ArrayAllocContextOverflow:
+        ; ECX == array size
+        ;   original ECX pushed
+        ;   original EDX pushed
+
+        ; Re-push original ECX
+        push        [esp + 4]
+
+        ; Create EBP frame.
+        mov         [esp + 8], ebp
+        lea         ebp, [esp + 8]
+
+        PUSH_COOP_PINVOKE_FRAME edx
+
+        ; Preserve the array size in edi
+        mov         edi, ecx
+
+        ; Get the EEType and put it in ecx.
+        mov         ecx, dword ptr [ebp - 8]
+
+        ; Push alloc helper arguments (thread, size, flags, EEType).
+        push        edx                                             ; transition frame
+        push        edi                                             ; Size
+        xor         edx, edx                                        ; Flags
+        ;; Passing EEType in ecx
+
+        ;; void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
+        call        RhpGcAlloc
+
+        ; Set the new object's EEType pointer and length on success.
+        test        eax, eax
+        jz          ArrayOutOfMemoryWithFrame
+
+        mov         ecx, [ebp - 8]
+        mov         edx, [ebp - 4]
+        mov         [eax + OFFSETOF__Object__m_pEEType], ecx
+        mov         [eax + OFFSETOF__Array__m_Length], edx
+
+        ;; If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
+        cmp         edi, RH_LARGE_OBJECT_SIZE
+        jb          NewArray_SkipPublish
+        mov         ecx, eax            ;; ecx: object
+        mov         edx, edi            ;; edx: object size
+        call        RhpPublishObject    ;; eax: this function returns the object that was passed-in
+NewArray_SkipPublish: 
+
+        POP_COOP_PINVOKE_FRAME
+        add         esp, 8          ; pop ecx / edx
+        pop         ebp
+        ret
+
+ArrayOutOfMemoryWithFrame:
+        ; This is the OOM failure path. We're going to tail-call to a managed helper that will throw
+        ; an out of memory exception that the caller of this allocator understands.
+
+        mov         eax, [ebp - 8]  ; Preserve EEType pointer over POP_COOP_PINVOKE_FRAME
+
+        POP_COOP_PINVOKE_FRAME
+        add         esp, 8          ; pop ecx / edx
+        pop         ebp             ; restore ebp
+
+        mov         ecx, eax        ; EEType pointer
+        xor         edx, edx        ; Indicate that we should throw OOM.
+        jmp         RhExceptionHandling_FailedAllocation
+
+ArrayOutOfMemoryNoFrame:
+        add         esp, 8          ; pop ecx / edx
+
+        ; ecx holds EEType pointer already
+        xor         edx, edx        ; Indicate that we should throw OOM.
+        jmp         RhExceptionHandling_FailedAllocation
+
+ArraySizeOverflow:
+        ; We get here if the size of the final array object can't be represented as an unsigned 
+        ; 32-bit value. We're going to tail-call to a managed helper that will throw
+        ; an overflow exception that the caller of this allocator understands.
+
+        add         esp, 8          ; pop ecx / edx
+
+        ; ecx holds EEType pointer already
+        mov         edx, 1          ; Indicate that we should throw OverflowException
+        jmp         RhExceptionHandling_FailedAllocation
+
+FASTCALL_ENDFUNC
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/AsmMacros.inc b/src/coreclr/src/nativeaot/Runtime/i386/AsmMacros.inc
new file mode 100644
index 0000000000000..1df5744d8e354
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/AsmMacros.inc
@@ -0,0 +1,218 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+include AsmOffsets.inc      ; generated by the build from AsmOffsets.cpp
+
+;;
+;; MACROS
+;;
+
+FASTCALL_FUNC macro FuncName,cbArgs
+    FuncNameReal EQU @&FuncName&@&cbArgs
+    FuncNameReal proc public
+endm
+
+FASTCALL_ENDFUNC macro
+    FuncNameReal endp
+endm
+
+ALTERNATE_ENTRY macro Name
+
+decoratedName TEXTEQU @CatStr( _, Name ) )
+
+decoratedName label proc
+PUBLIC decoratedName
+        endm
+
+LABELED_RETURN_ADDRESS macro Name
+
+decoratedName TEXTEQU @CatStr( _, Name ) )
+
+decoratedName label proc
+PUBLIC decoratedName
+        endm
+
+EXPORT_POINTER_TO_ADDRESS macro Name
+
+        local   AddressToExport
+
+AddressToExport  label   proc
+
+        .const
+
+        align   4
+
+Name    dd      offset AddressToExport
+
+        public  Name
+
+        .code
+
+        endm
+
+__tls_array     equ 2Ch     ;; offsetof(TEB, ThreadLocalStoragePointer)
+
+;;
+;; __declspec(thread) version
+;;
+INLINE_GETTHREAD macro destReg, trashReg
+    ASSUME fs : NOTHING
+    EXTERN __tls_index : DWORD
+    EXTERN _tls_CurrentThread : DWORD
+
+    mov         destReg, [__tls_index]
+    mov         trashReg, fs:[__tls_array]
+    mov         destReg, [trashReg + destReg * 4]
+    add         destReg, SECTIONREL _tls_CurrentThread
+endm
+
+
+INLINE_THREAD_UNHIJACK macro threadReg, trashReg1, trashReg2
+        ;;
+        ;; Thread::Unhijack()
+        ;;
+        mov         trashReg1, [threadReg + OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        cmp         trashReg1, 0
+        je          @F
+
+        mov         trashReg2, [threadReg + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        mov         [trashReg2], trashReg1
+        mov         dword ptr [threadReg + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0
+        mov         dword ptr [threadReg + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0
+
+@@:
+endm
+
+;;
+;; Macro used from unmanaged helpers called from managed code where the helper does not transition immediately
+;; into pre-emptive mode but may cause a GC and thus requires the stack is crawlable. This is typically the
+;; case for helpers that meddle in GC state (e.g. allocation helpers) where the code must remain in
+;; cooperative mode since it handles object references and internal GC state directly but a garbage collection
+;; may be inevitable. In these cases we need to be able to transition to pre-meptive mode deep within the
+;; unmanaged code but still be able to initialize the stack iterator at the first stack frame which may hold
+;; interesting GC references. In all our helper cases this corresponds to the most recent managed frame (e.g.
+;; the helper's caller).
+;;
+;; This macro builds a frame describing the current state of managed code.
+;;
+;; The macro assumes it is called from a helper that has already set up an EBP frame and that the values of
+;; EBX, ESI and EDI remain unchanged from their values in managed code. It pushes the frame at the top of the
+;; stack.
+;;
+;; EAX is trashed by this macro.
+;;
+PUSH_COOP_PINVOKE_FRAME macro transitionFrameReg
+    lea         eax, [ebp + 8]                      ; get the ESP of the caller
+    push        eax                                 ; save ESP
+    push        edi
+    push        esi
+    push        ebx
+    push        PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP
+    push        eax                                 ; Thread * (unused by stackwalker)
+    mov         eax, [ebp + 0]                      ; Find previous EBP value
+    push        eax                                 ; save EBP
+    mov         eax, [ebp + 4]                      ; Find the return address
+    push        eax                                 ; save m_RIP
+
+    lea         transitionFrameReg, [esp + 0]       ; transitionFrameReg == address of frame
+endm
+
+;;
+;; Remove the frame from a previous call to PUSH_COOP_PINVOKE_FRAME from the top of the stack and restore EBX,
+;; ESI and EDI to their previous values.
+;;
+;; TRASHES ECX
+;;
+POP_COOP_PINVOKE_FRAME macro
+    add         esp, 4*4
+    pop         ebx
+    pop         esi
+    pop         edi
+    pop         ecx 
+endm
+
+
+;;
+;; CONSTANTS -- INTEGER
+;;
+TSF_Attached                    equ 01h
+TSF_SuppressGcStress            equ 08h
+TSF_DoNotTriggerGc              equ 10h
+
+;; GC type flags
+GC_ALLOC_FINALIZE               equ 1
+
+;; Note: these must match the defs in PInvokeTransitionFrameFlags
+PTFF_SAVE_RBX           equ 00000001h
+PTFF_SAVE_RSI           equ 00000002h
+PTFF_SAVE_RDI           equ 00000004h
+PTFF_SAVE_ALL_PRESERVED equ 00000007h   ;; NOTE: RBP is not included in this set!
+PTFF_SAVE_RSP           equ 00008000h
+PTFF_SAVE_RAX           equ 00000100h   ;; RAX is saved if it contains a GC ref and we're in hijack handler
+PTFF_SAVE_ALL_SCRATCH   equ 00000700h
+PTFF_RAX_IS_GCREF       equ 00010000h   ;; iff PTFF_SAVE_RAX: set -> eax is Object, clear -> eax is scalar
+PTFF_RAX_IS_BYREF       equ 00020000h   ;; iff PTFF_SAVE_RAX: set -> eax is ByRef, clear -> eax is Object or scalar
+PTFF_THREAD_ABORT       equ 00040000h   ;; indicates that ThreadAbortException should be thrown when returning from the transition
+
+;; These must match the TrapThreadsFlags enum
+TrapThreadsFlags_None            equ 0
+TrapThreadsFlags_AbortInProgress equ 1
+TrapThreadsFlags_TrapThreads     equ 2
+
+;; This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT
+STATUS_REDHAWK_THREAD_ABORT      equ 43h
+
+;;
+;; Rename fields of nested structs
+;;
+OFFSETOF__Thread__m_alloc_context__alloc_ptr        equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr
+OFFSETOF__Thread__m_alloc_context__alloc_limit      equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit
+
+;;
+;; CONSTANTS -- SYMBOLS
+;;
+
+RhDebugBreak                                equ @RhDebugBreak@0
+RhpGcAlloc                                  equ @RhpGcAlloc@16
+G_LOWEST_ADDRESS                            equ _g_lowest_address
+G_HIGHEST_ADDRESS                           equ _g_highest_address
+G_EPHEMERAL_LOW                             equ _g_ephemeral_low
+G_EPHEMERAL_HIGH                            equ _g_ephemeral_high
+G_CARD_TABLE                                equ _g_card_table
+RhpWaitForSuspend2                          equ @RhpWaitForSuspend2@0
+RhpWaitForGC2                               equ @RhpWaitForGC2@4
+RhpReversePInvokeAttachOrTrapThread2        equ @RhpReversePInvokeAttachOrTrapThread2@4
+RhpTrapThreads                              equ _RhpTrapThreads
+RhpPublishObject                            equ @RhpPublishObject@8
+
+ifdef FEATURE_GC_STRESS
+THREAD__HIJACKFORGCSTRESS                   equ ?HijackForGcStress@Thread@@SGXPAUPAL_LIMITED_CONTEXT@@@Z
+REDHAWKGCINTERFACE__STRESSGC                equ ?StressGc@RedhawkGCInterface@@SGXXZ
+endif ;; FEATURE_GC_STRESS
+
+;;
+;; IMPORTS
+;;
+EXTERN RhpGcAlloc                               : PROC
+EXTERN RhDebugBreak                             : PROC
+EXTERN RhpWaitForSuspend2                       : PROC
+EXTERN RhpWaitForGC2                            : PROC
+EXTERN RhpReversePInvokeAttachOrTrapThread2     : PROC
+EXTERN RhExceptionHandling_FailedAllocation     : PROC
+EXTERN RhpPublishObject                         : PROC
+EXTERN RhpCalculateStackTraceWorker             : PROC
+EXTERN RhThrowHwEx                              : PROC
+EXTERN RhThrowEx                                : PROC
+EXTERN RhRethrow                                : PROC
+
+ifdef FEATURE_GC_STRESS
+EXTERN THREAD__HIJACKFORGCSTRESS                : PROC
+EXTERN REDHAWKGCINTERFACE__STRESSGC             : PROC
+endif ;; FEATURE_GC_STRESS
+
+EXTERN G_LOWEST_ADDRESS : DWORD
+EXTERN G_HIGHEST_ADDRESS : DWORD
+EXTERN G_EPHEMERAL_LOW : DWORD
+EXTERN G_EPHEMERAL_HIGH : DWORD
+EXTERN G_CARD_TABLE : DWORD
+EXTERN RhpTrapThreads : DWORD
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/AsmOffsetsCpu.h b/src/coreclr/src/nativeaot/Runtime/i386/AsmOffsetsCpu.h
new file mode 100644
index 0000000000000..a92f24d789b47
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/AsmOffsetsCpu.h
@@ -0,0 +1,48 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This file is used by AsmOffsets.h to validate that our
+// assembly-code offsets always match their C++ counterparts.
+//
+// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix
+
+PLAT_ASM_SIZEOF(c0, ExInfo)
+PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo)
+PLAT_ASM_OFFSET(4, ExInfo, m_pExContext)
+PLAT_ASM_OFFSET(8, ExInfo, m_exception)
+PLAT_ASM_OFFSET(0c, ExInfo, m_kind)
+PLAT_ASM_OFFSET(0d, ExInfo, m_passNumber)
+PLAT_ASM_OFFSET(10, ExInfo, m_idxCurClause)
+PLAT_ASM_OFFSET(14, ExInfo, m_frameIter)
+PLAT_ASM_OFFSET(bc, ExInfo, m_notifyDebuggerSP)
+
+PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP)
+PLAT_ASM_OFFSET(4, PInvokeTransitionFrame, m_FramePointer)
+PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_pThread)
+PLAT_ASM_OFFSET(0c, PInvokeTransitionFrame, m_Flags)
+PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_PreservedRegs)
+
+PLAT_ASM_SIZEOF(a8, StackFrameIterator)
+PLAT_ASM_OFFSET(08, StackFrameIterator, m_FramePointer)
+PLAT_ASM_OFFSET(0c, StackFrameIterator, m_ControlPC)
+PLAT_ASM_OFFSET(10, StackFrameIterator, m_RegDisplay)
+PLAT_ASM_OFFSET(a4, StackFrameIterator, m_OriginalControlPC)
+
+PLAT_ASM_SIZEOF(1c, PAL_LIMITED_CONTEXT)
+PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP)
+
+PLAT_ASM_OFFSET(4, PAL_LIMITED_CONTEXT, Rsp)
+PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, Rbp)
+PLAT_ASM_OFFSET(0c, PAL_LIMITED_CONTEXT, Rdi)
+PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, Rsi)
+PLAT_ASM_OFFSET(14, PAL_LIMITED_CONTEXT, Rax)
+PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, Rbx)
+
+PLAT_ASM_SIZEOF(28, REGDISPLAY)
+PLAT_ASM_OFFSET(1c, REGDISPLAY, SP)
+
+PLAT_ASM_OFFSET(0c, REGDISPLAY, pRbx)
+PLAT_ASM_OFFSET(10, REGDISPLAY, pRbp)
+PLAT_ASM_OFFSET(14, REGDISPLAY, pRsi)
+PLAT_ASM_OFFSET(18, REGDISPLAY, pRdi)
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/CallDescrWorker.S b/src/coreclr/src/nativeaot/Runtime/i386/CallDescrWorker.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/CallDescrWorker.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/CallDescrWorker.asm b/src/coreclr/src/nativeaot/Runtime/i386/CallDescrWorker.asm
new file mode 100644
index 0000000000000..628aa4e131c11
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/CallDescrWorker.asm
@@ -0,0 +1,96 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+        .586
+        .model  flat
+        option  casemap:none
+        .code
+
+include AsmMacros.inc
+
+ifdef FEATURE_DYNAMIC_CODE
+;;;;;;;;;;;;;;;;;;;;;;; CallingConventionConverter Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;------------------------------------------------------------------------------
+; This helper routine enregisters the appropriate arguments and makes the
+; actual call.
+;------------------------------------------------------------------------------
+; void __fastcall CallDescrWorker(CallDescrWorkerParams *  pParams)
+FASTCALL_FUNC   RhCallDescrWorker, 4
+        push    ebp
+        mov     ebp, esp
+        push    ebx
+        mov     ebx, ecx
+
+        mov     ecx, [ebx + OFFSETOF__CallDescrData__numStackSlots]
+        mov     eax, [ebx + OFFSETOF__CallDescrData__pSrc]            ; copy the stack
+        test    ecx, ecx
+        jz      donestack
+        lea     eax, [eax + 4 * ecx - 4]          ; last argument
+        push    dword ptr [eax]
+        dec     ecx
+        jz      donestack
+        sub     eax, 4
+        push    dword ptr [eax]
+        dec     ecx
+        jz      donestack
+stackloop:
+        sub     eax, 4
+        push    dword ptr [eax]
+        dec     ecx
+        jnz     stackloop
+donestack:
+
+        ; now we must push each field of the ArgumentRegister structure
+        mov     eax, [ebx + OFFSETOF__CallDescrData__pArgumentRegisters]
+        mov     edx, dword ptr [eax]
+        mov     ecx, dword ptr [eax + 4]
+        mov     eax,[ebx + OFFSETOF__CallDescrData__pTarget]
+        call    eax
+
+        EXPORT_POINTER_TO_ADDRESS _PointerToReturnFromCallDescrThunk
+
+        ; Symbol used to identify thunk call to managed function so the special 
+        ; case unwinder can unwind through this function. Sadly we cannot directly
+        ; export this symbol right now because it confuses DIA unwinder to believe
+        ; it's the beginning of a new method, therefore we export the address
+        ; by means of an auxiliary variable.
+        
+        ; Save FP return value if necessary
+        mov     ecx, [ebx + OFFSETOF__CallDescrData__fpReturnSize]
+        cmp     ecx, 0
+        je      ReturnsInt
+
+        cmp     ecx, 4
+        je      ReturnsFloat
+        cmp     ecx, 8
+        je      ReturnsDouble
+        ; unexpected
+        jmp     Epilog
+
+ReturnsInt:
+; Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself
+        mov     ebx, [ebx + OFFSETOF__CallDescrData__pReturnBuffer]
+        mov     [ebx], eax
+        mov     [ebx + 4], edx
+
+Epilog:
+        pop     ebx
+        pop     ebp
+        retn
+
+ReturnsFloat:
+        mov     ebx, [ebx + OFFSETOF__CallDescrData__pReturnBuffer]
+        fstp    dword ptr [ebx]    ; Spill the Float return value
+        jmp     Epilog
+
+ReturnsDouble:
+        mov     ebx, [ebx + OFFSETOF__CallDescrData__pReturnBuffer]
+        fstp    qword ptr [ebx]    ; Spill the Double return value
+        jmp     Epilog
+
+FASTCALL_ENDFUNC
+
+endif
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/CallingConventionConverterHelpers.S b/src/coreclr/src/nativeaot/Runtime/i386/CallingConventionConverterHelpers.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/CallingConventionConverterHelpers.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/CallingConventionConverterHelpers.asm b/src/coreclr/src/nativeaot/Runtime/i386/CallingConventionConverterHelpers.asm
new file mode 100644
index 0000000000000..3f596327345ef
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/CallingConventionConverterHelpers.asm
@@ -0,0 +1,126 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+.586
+.model  flat
+option  casemap:none
+.code
+
+;; -----------------------------------------------------------------------------------------------------------
+;; standard macros
+;; -----------------------------------------------------------------------------------------------------------
+LEAF_ENTRY macro Name, Section
+    Section segment para 'CODE'
+    public  Name
+    Name    proc
+endm
+
+LEAF_END macro Name, Section
+    Name    endp
+    Section ends
+endm
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  DATA SECTIONS  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;
+;; struct ReturnBlock
+;; {
+;;   8 bytes of space
+;;   Used to hold return information.
+;;   eax, and 32bit float returns use the first 4 bytes, 
+;;   eax,edx and 64bit float returns use the full 8 bytes
+;; };
+;;
+
+ReturnInformation__ReturnData EQU 4h
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; ? CallingConventionConverter_ReturnVoidReturnThunk(int cbBytesOfStackToPop)
+;;
+LEAF_ENTRY CallingConventionConverter_ReturnVoidReturnThunk, _TEXT
+        pop edx     ; pop return address into edx
+        add esp,ecx ; remove ecx bytes from the call stack
+        push edx    ; put the return address back on the stack
+        ret         ; return to it (use a push/ret pair here so that the return stack buffer still works)
+LEAF_END CallingConventionConverter_ReturnVoidReturnThunk, _TEXT
+
+;;
+;; int CallingConventionConverter_ReturnIntegerReturnThunk(int cbBytesOfStackToPop, ReturnBlock*)
+;;
+LEAF_ENTRY CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT
+        pop eax           ; pop return address into edx
+        add esp,ecx       ; remove ecx bytes from the call stack
+        push eax          ; put the return address back on the stack
+        mov eax, [edx]    ; setup eax and edx to hold the return value
+        mov edx, [edx + 4]
+        ret               ; return  (use a push/ret pair here so that the return stack buffer still works)
+LEAF_END CallingConventionConverter_ReturnIntegerReturnThunk, _TEXT
+
+;;
+;; float CallingConventionConverter_Return4ByteFloatReturnThunk(int cbBytesOfStackToPop, ReturnBlock*)
+;;
+LEAF_ENTRY CallingConventionConverter_Return4ByteFloatReturnThunk, _TEXT
+        pop eax            ; pop return address into edx
+        add esp,ecx        ; remove ecx bytes from the call stack
+        push eax           ; put the return address back on the stack
+        fld dword ptr [edx]; fill in the return value
+        ret                ; return (use a push/ret pair here so that the return stack buffer still works)
+LEAF_END CallingConventionConverter_Return4ByteFloatReturnThunk, _TEXT
+
+;;
+;; double CallingConventionConverter_Return4ByteFloatReturnThunk(int cbBytesOfStackToPop, ReturnBlock*)
+;;
+LEAF_ENTRY CallingConventionConverter_Return8ByteFloatReturnThunk, _TEXT
+        pop eax            ; pop return address into edx
+        add esp,ecx        ; remove ecx bytes from the call stack
+        push eax           ; put the return address back on the stack
+        fld qword ptr [edx]; fill in the return value
+        ret                ; return (use a push/ret pair here so that the return stack buffer still works)
+LEAF_END CallingConventionConverter_Return8ByteFloatReturnThunk, _TEXT
+
+;;
+;; Note: The "__jmpstub__" prefix is used to indicate to debugger
+;; that it must step-through this stub when it encounters it while
+;; stepping.
+;;
+
+;;
+;; __jmpstub__CallingConventionConverter_CommonCallingStub(?)
+;;
+LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT
+        ;; rax <- stub info
+        push        ebp
+        mov         ebp, esp
+        push        [eax]   ; First argument
+        mov         eax,[eax+4] ; 
+        push        [eax]   ; Pointer to CallingConventionConverter Managed thunk
+        mov         eax,[eax+4] ; Pointer to UniversalTransitionThunk
+        jmp         eax
+LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT
+
+    ;;
+    ;; void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr* commonCallingStub, IntPtr *return4ByteFloat, IntPtr *return8ByteFloat)
+    ;;
+LEAF_ENTRY CallingConventionConverter_GetStubs, _TEXT
+        lea     eax, [CallingConventionConverter_ReturnVoidReturnThunk]
+        mov     ecx, [esp+04h]
+        mov     [ecx], eax
+        lea     eax, [CallingConventionConverter_ReturnIntegerReturnThunk]
+        mov     ecx, [esp+08h]
+        mov     [ecx], eax
+        lea     eax, [__jmpstub__CallingConventionConverter_CommonCallingStub]
+        mov     ecx, [esp+0Ch]
+        mov     [ecx], eax
+        lea     eax, [CallingConventionConverter_Return4ByteFloatReturnThunk]
+        mov     ecx, [esp+10h]
+        mov     [ecx], eax
+        lea     eax, [CallingConventionConverter_Return8ByteFloatReturnThunk]
+        mov     ecx, [esp+14h]
+        mov     [ecx], eax
+        retn 14h
+LEAF_END CallingConventionConverter_GetStubs, _TEXT
+
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/DivModHelpers.asm b/src/coreclr/src/nativeaot/Runtime/i386/DivModHelpers.asm
new file mode 100644
index 0000000000000..edc292110e6ab
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/DivModHelpers.asm
@@ -0,0 +1,256 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+
+        .586
+        .model  flat
+        option  casemap:none
+        .code
+
+
+include AsmMacros.inc
+
+EXTERN RhExceptionHandling_ThrowClasslibOverflowException                              : PROC
+EXTERN RhExceptionHandling_ThrowClasslibDivideByZeroException                          : PROC
+EXTERN __alldiv : PROC
+EXTERN __allrem : PROC
+EXTERN __aulldiv : PROC
+EXTERN __aullrem : PROC
+EXTERN __aulldvrm : PROC
+EXTERN __alldvrm : PROC
+
+esp_offsetof_dividend_low     equ 4
+esp_offsetof_dividend_high    equ 8
+esp_offsetof_divisor_low      equ 12
+esp_offsetof_divisor_high     equ 16
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpLDiv
+;;
+;; INPUT:  [ESP+4]: dividend low
+;;         [ESP+8]: dividend high
+;;         [ESP+12]: divisor low
+;;         [ESP+16]: divisor high
+;;
+;; OUTPUT: EAX: result low
+;;         EDX: result high
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC  RhpLDiv, 16
+
+        ;; pretest for the problematic cases of overflow and divide by zero
+        ;; overflow: dividend = 0x80000000`00000000 and divisor = -1l = 0xffffffff`ffffffff
+        ;; divide by zero: divisor = 0x00000000`00000000
+        ;;
+        ;; quick pretest - if the two halves of the divisor are unequal, we cannot
+        ;; have one of the problematic cases
+        mov     eax,[esp+esp_offsetof_divisor_low]
+        cmp     eax,[esp+esp_offsetof_divisor_high]
+        je      LDivDoMoreTests
+LDivOkToDivide:
+        ;; tailcall to the actual divide routine
+        jmp     __alldiv
+LDivDoMoreTests:
+        ;; we know the high and low halves of the divisor are equal
+        ;;
+        ;; check for the divide by zero case
+        test    eax,eax
+        je      ThrowClasslibDivideByZeroException
+        ;;
+        ;; is the divisor == -1l? I.e., can we have the overflow case?
+        cmp     eax,-1
+        jne     LDivOkToDivide
+        ;;
+        ;; is the dividend == 0x80000000`00000000?
+        cmp     dword ptr [esp+esp_offsetof_dividend_low],0
+        jne     LDivOkToDivide
+        cmp     dword ptr [esp+esp_offsetof_dividend_high],80000000h
+        jne     LDivOkToDivide
+FASTCALL_ENDFUNC
+
+        ;; make it look like the managed code called this directly
+        ;; by popping the parameters and putting the return address in the proper place
+ThrowClasslibOverflowException proc
+        pop     ecx
+        add     esp,16
+        push    ecx
+        ;; passing return address in ecx
+        jmp     RhExceptionHandling_ThrowClasslibOverflowException
+ThrowClasslibOverflowException endp
+
+ThrowClasslibDivideByZeroException proc
+        pop     ecx
+        add     esp,16
+        push    ecx
+        ;; passing return address in ecx
+        jmp     RhExceptionHandling_ThrowClasslibDivideByZeroException
+ThrowClasslibDivideByZeroException endp
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpLMod
+;;
+;; INPUT:  [ESP+4]: dividend low
+;;         [ESP+8]: dividend high
+;;         [ESP+12]: divisor low
+;;         [ESP+16]: divisor high
+;;
+;; OUTPUT: EAX: result low
+;;         EDX: result high
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC  RhpLMod, 16
+
+        ;; pretest for the problematic cases of overflow and divide by zero
+        ;; overflow: dividend = 0x80000000`00000000 and divisor = -1l = 0xffffffff`ffffffff
+        ;; divide by zero: divisor = 0x00000000`00000000
+        ;;
+        ;; quick pretest - if the two halves of the divisor are unequal, we cannot
+        ;; have one of the problematic cases
+        mov     eax,[esp+esp_offsetof_divisor_low]
+        cmp     eax,[esp+esp_offsetof_divisor_high]
+        je      LModDoMoreTests
+LModOkToDivide:
+        jmp     __allrem
+LModDoMoreTests:
+        ;; we know the high and low halves of the divisor are equal
+        ;;
+        ;; check for the divide by zero case
+        test    eax,eax
+        je      ThrowClasslibDivideByZeroException
+        ;;
+        ;; is the divisor == -1l? I.e., can we have the overflow case?
+        cmp     eax,-1
+        jne     LModOkToDivide
+        ;;
+        ;; is the dividend == 0x80000000`00000000?
+        cmp     dword ptr [esp+esp_offsetof_dividend_low],0
+        jne     LModOkToDivide
+        cmp     dword ptr [esp+esp_offsetof_dividend_high],80000000h
+        jne     LModOkToDivide
+        jmp     ThrowClasslibOverflowException
+
+FASTCALL_ENDFUNC
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpLDivMod
+;;
+;; INPUT:  [ESP+4]: dividend low
+;;         [ESP+8]: dividend high
+;;         [ESP+12]: divisor low
+;;         [ESP+16]: divisor high
+;;
+;; OUTPUT: EAX: quotient low
+;;         EDX: quotient high
+;;         ECX: remainder high
+;;         EBX: remainder high
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC  RhpLDivMod, 16
+
+        ;; pretest for the problematic cases of overflow and divide by zero
+        ;; overflow: dividend = 0x80000000`00000000 and divisor = -1l = 0xffffffff`ffffffff
+        ;; divide by zero: divisor = 0x00000000`00000000
+        ;;
+        ;; quick pretest - if the two halves of the divisor are unequal, we cannot
+        ;; have one of the problematic cases
+        mov     eax,[esp+esp_offsetof_divisor_low]
+        cmp     eax,[esp+esp_offsetof_divisor_high]
+        je      LDivModDoMoreTests
+LDivModOkToDivide:
+        jmp     __alldvrm
+LDivModDoMoreTests:
+        ;; we know the high and low halves of the divisor are equal
+        ;;
+        ;; check for the divide by zero case
+        test    eax,eax
+        je      ThrowClasslibDivideByZeroException
+        ;;
+        ;; is the divisor == -1l? I.e., can we have the overflow case?
+        cmp     eax,-1
+        jne     LDivModOkToDivide
+        ;;
+        ;; is the dividend == 0x80000000`00000000?
+        cmp     dword ptr [esp+esp_offsetof_dividend_low],0
+        jne     LDivModOkToDivide
+        cmp     dword ptr [esp+esp_offsetof_dividend_high],80000000h
+        jne     LDivModOkToDivide
+        jmp     ThrowClasslibOverflowException
+
+FASTCALL_ENDFUNC
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpULDiv
+;;
+;; INPUT:  [ESP+4]: dividend low
+;;         [ESP+8]: dividend high
+;;         [ESP+12]: divisor low
+;;         [ESP+16]: divisor high
+;;
+;; OUTPUT: EAX: result low
+;;         EDX: result high
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC  RhpULDiv, 16
+
+        ;; pretest for divide by zero
+        mov     eax,[esp+esp_offsetof_divisor_low]
+        or      eax,[esp+esp_offsetof_divisor_high]
+        jne     __aulldiv
+        jmp     ThrowClasslibDivideByZeroException        
+
+FASTCALL_ENDFUNC
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpULMod
+;;
+;; INPUT:  [ESP+4]: dividend low
+;;         [ESP+8]: dividend high
+;;         [ESP+12]: divisor low
+;;         [ESP+16]: divisor high
+;;
+;; OUTPUT: EAX: result low
+;;         EDX: result high
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC  RhpULMod, 16
+
+        ;; pretest for divide by zero
+        mov     eax,[esp+esp_offsetof_divisor_low]
+        or      eax,[esp+esp_offsetof_divisor_high]
+        jne     __aullrem
+        jmp     ThrowClasslibDivideByZeroException        
+
+FASTCALL_ENDFUNC
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpULDivMod
+;;
+;; INPUT:  [ESP+4]: dividend low
+;;         [ESP+8]: dividend high
+;;         [ESP+12]: divisor low
+;;         [ESP+16]: divisor high
+;;
+;; OUTPUT: EAX: quotient low
+;;         EDX: quotient high
+;;         ECX: remainder high
+;;         EBX: remainder high
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC  RhpULDivMod, 16
+
+        ;; pretest for divide by zero
+        mov     eax,[esp+esp_offsetof_divisor_low]
+        or      eax,[esp+esp_offsetof_divisor_high]
+        jne     __aulldvrm
+        jmp     ThrowClasslibDivideByZeroException        
+
+FASTCALL_ENDFUNC
+
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/ExceptionHandling.S b/src/coreclr/src/nativeaot/Runtime/i386/ExceptionHandling.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/ExceptionHandling.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/ExceptionHandling.asm b/src/coreclr/src/nativeaot/Runtime/i386/ExceptionHandling.asm
new file mode 100644
index 0000000000000..500efdff66ac6
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/ExceptionHandling.asm
@@ -0,0 +1,480 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+
+        .586
+        .model  flat
+        option  casemap:none
+        .code
+
+
+include AsmMacros.inc
+
+RhpCallFunclet equ @RhpCallFunclet@0
+RhpThrowHwEx equ @RhpThrowHwEx@0
+
+extern RhpCallFunclet : proc
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpThrowHwEx
+;;
+;; INPUT:  ECX:  exception code of fault
+;;         EDX:  faulting RIP
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC  RhpThrowHwEx, 0
+
+        esp_offsetof_ExInfo     textequ %0
+        esp_offsetof_Context    textequ %SIZEOF__ExInfo
+
+        push    edx         ; make it look like we were called by pushing the faulting IP like a return address
+        push    ebp
+        mov     ebp, esp
+
+        lea     eax, [esp+8]    ;; calculate the RSP of the throw site
+                                ;; edx already contains the throw site IP
+
+;;  struct PAL_LIMITED_CONTEXT
+;;  {
+        push        ebx
+        push        eax
+        push        esi
+        push        edi
+        mov         ebx, [ebp]
+        push        ebx     ;; 'faulting' Rbp
+        push        eax     ;; 'faulting' Rsp
+        push        edx     ;; 'faulting' IP
+;;  };
+
+        sub         esp, SIZEOF__ExInfo
+
+        INLINE_GETTHREAD        eax, edx        ;; eax <- thread, edx <- trashed
+
+        lea     edx, [esp + esp_offsetof_ExInfo]                    ;; edx <- ExInfo*
+
+        xor     esi, esi
+        mov     [edx + OFFSETOF__ExInfo__m_exception], esi          ;; init the exception object to null
+        mov     byte ptr [edx + OFFSETOF__ExInfo__m_passNumber], 1  ;; init to the first pass 
+        mov     dword ptr [edx + OFFSETOF__ExInfo__m_idxCurClause], 0FFFFFFFFh
+        mov     byte ptr [edx + OFFSETOF__ExInfo__m_kind], 2        ;; ExKind.HardwareFault
+
+        ;; link the ExInfo into the thread's ExInfo chain
+        mov     ebx, [eax + OFFSETOF__Thread__m_pExInfoStackHead]
+        mov     [edx + OFFSETOF__ExInfo__m_pPrevExInfo], ebx        ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        mov     [eax + OFFSETOF__Thread__m_pExInfoStackHead], edx   ;; m_pExInfoStackHead = pExInfo
+
+        ;; set the exception context field on the ExInfo
+        lea     ebx, [esp + esp_offsetof_Context]                   ;; ebx <- PAL_LIMITED_CONTEXT*
+        mov     [edx + OFFSETOF__ExInfo__m_pExContext], ebx         ;; init ExInfo.m_pExContext
+
+        ;; ecx still contains the exception code
+        ;; edx contains the address of the ExInfo
+        call    RhThrowHwEx
+
+        EXPORT_POINTER_TO_ADDRESS _PointerToRhpThrowHwEx2
+
+        ;; no return
+        int 3
+
+FASTCALL_ENDFUNC
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpThrowEx
+;;
+;; INPUT:  ECX:  exception object
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC  RhpThrowEx, 0
+
+        esp_offsetof_ExInfo     textequ %0
+        esp_offsetof_Context    textequ %SIZEOF__ExInfo
+
+        push        ebp
+        mov         ebp, esp
+
+        lea         eax, [esp+8]    ;; calculate the RSP of the throw site
+        mov         edx, [esp+4]    ;; get the throw site IP via the return address
+
+;;  struct PAL_LIMITED_CONTEXT
+;;  {
+        push        ebx
+        push        eax
+        push        esi
+        push        edi
+        mov         ebx, [ebp]
+        push        ebx     ;; 'faulting' Rbp
+        push        eax     ;; 'faulting' Rsp
+        push        edx     ;; 'faulting' IP
+;;  };
+
+        sub         esp, SIZEOF__ExInfo
+
+        ;; -------------------------
+
+        lea                     ebx, [eax-4]    ;; ebx <- addr of return address
+        INLINE_GETTHREAD        eax, edx        ;; eax <- thread, edx <- trashed
+
+        ;; There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic.  So the return 
+        ;; address could have been hijacked when we were in that C# code and we must remove the hijack and
+        ;; reflect the correct return address in our exception context record.  The other throw helpers don't
+        ;; need this because they cannot be tail-called from C#.
+
+        INLINE_THREAD_UNHIJACK  eax, esi, edx       ;; trashes esi, edx
+
+        mov                     edx, [ebx]          ;; edx <- return address
+        mov                     [esp + esp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP], edx   ;; set 'faulting' IP after unhijack
+
+        lea     edx, [esp + esp_offsetof_ExInfo]    ;; edx <- ExInfo*
+
+        xor     esi, esi
+        mov     [edx + OFFSETOF__ExInfo__m_exception], esi          ;; init the exception object to null
+        mov     byte ptr [edx + OFFSETOF__ExInfo__m_passNumber], 1  ;; init to the first pass 
+        mov     dword ptr [edx + OFFSETOF__ExInfo__m_idxCurClause], 0FFFFFFFFh
+        mov     byte ptr [edx + OFFSETOF__ExInfo__m_kind], 1        ;; ExKind.Throw
+
+        ;; link the ExInfo into the thread's ExInfo chain
+        mov     ebx, [eax + OFFSETOF__Thread__m_pExInfoStackHead]
+        mov     [edx + OFFSETOF__ExInfo__m_pPrevExInfo], ebx        ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        mov     [eax + OFFSETOF__Thread__m_pExInfoStackHead], edx   ;; m_pExInfoStackHead = pExInfo
+
+        ;; set the exception context field on the ExInfo
+        lea     ebx, [esp + esp_offsetof_Context]                   ;; ebx <- PAL_LIMITED_CONTEXT*
+        mov     [edx + OFFSETOF__ExInfo__m_pExContext], ebx         ;; init ExInfo.m_pExContext
+
+        ;; ecx still contains the exception object
+        ;; edx contains the address of the ExInfo
+        call    RhThrowEx
+
+        EXPORT_POINTER_TO_ADDRESS _PointerToRhpThrowEx2
+
+        ;; no return
+        int 3
+
+FASTCALL_ENDFUNC
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void FASTCALL RhpRethrow()
+;;
+;; SUMMARY:  Similar to RhpThrowEx, except that it passes along the currently active ExInfo
+;;
+;; INPUT:
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC  RhpRethrow, 0
+
+
+        esp_offsetof_ExInfo     textequ %0
+        esp_offsetof_Context    textequ %SIZEOF__ExInfo
+
+        push        ebp
+        mov         ebp, esp
+
+        lea         eax, [esp+8]    ;; calculate the RSP of the throw site
+        mov         edx, [esp+4]    ;; get the throw site IP via the return address
+
+;;  struct PAL_LIMITED_CONTEXT
+;;  {
+        push        ebx
+        push        eax
+        push        esi
+        push        edi
+        mov         ebx, [ebp]
+        push        ebx     ;; 'faulting' Rbp
+        push        eax     ;; 'faulting' Rsp
+        push        edx     ;; 'faulting' IP
+;;  };
+
+        sub         esp, SIZEOF__ExInfo
+
+        ;; -------------------------
+
+        lea                     ebx, [eax-4]    ;; ebx <- addr of return address
+        INLINE_GETTHREAD        eax, edx        ;; eax <- thread, edx <- trashed
+
+        lea     edx, [esp + esp_offsetof_ExInfo]    ;; edx <- ExInfo*
+
+        xor     esi, esi
+        mov     [edx + OFFSETOF__ExInfo__m_exception], esi          ;; init the exception object to null
+        mov     byte ptr [edx + OFFSETOF__ExInfo__m_passNumber], 1  ;; init to the first pass 
+        mov     dword ptr [edx + OFFSETOF__ExInfo__m_idxCurClause], 0FFFFFFFFh
+        mov     byte ptr [edx + OFFSETOF__ExInfo__m_kind], 0        ;; init to a deterministic value (ExKind.None)
+
+        ;; link the ExInfo into the thread's ExInfo chain
+        mov     ecx, [eax + OFFSETOF__Thread__m_pExInfoStackHead]   ;; ecx <- currently active ExInfo
+        mov     [edx + OFFSETOF__ExInfo__m_pPrevExInfo], ecx        ;; pExInfo->m_pPrevExInfo = m_pExInfoStackHead
+        mov     [eax + OFFSETOF__Thread__m_pExInfoStackHead], edx   ;; m_pExInfoStackHead = pExInfo
+
+        ;; set the exception context field on the ExInfo
+        lea     ebx, [esp + esp_offsetof_Context]                   ;; ebx <- PAL_LIMITED_CONTEXT*
+        mov     [edx + OFFSETOF__ExInfo__m_pExContext], ebx         ;; init ExInfo.m_pExContext
+
+        ;; ecx contains the currently active ExInfo
+        ;; edx contains the address of the new ExInfo
+        call    RhRethrow
+
+        EXPORT_POINTER_TO_ADDRESS _PointerToRhpRethrow2
+
+        ;; no return
+        int 3
+
+FASTCALL_ENDFUNC
+
+;;
+;; Prologue of all funclet calling helpers (RhpCallXXXXFunclet)
+;;
+FUNCLET_CALL_PROLOGUE macro localsCount
+    push        ebp
+    mov         ebp, esp
+
+    push        ebx     ;; save preserved registers (for the stackwalker)
+    push        esi     ;; 
+    push        edi     ;; 
+
+    stack_alloc_size = localsCount * 4
+    
+    if stack_alloc_size ne 0
+    sub         esp, stack_alloc_size
+    endif
+endm
+
+;;
+;; Epilogue of all funclet calling helpers (RhpCallXXXXFunclet)
+;;
+FUNCLET_CALL_EPILOGUE macro
+    if stack_alloc_size ne 0
+    add         esp, stack_alloc_size
+    endif
+    pop         edi
+    pop         esi
+    pop         ebx
+    pop         ebp
+endm
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay,
+;;                                    ExInfo* pExInfo)
+;;
+;; INPUT:  ECX:         exception object
+;;         EDX:         handler funclet address
+;;         [ESP + 4]:   REGDISPLAY*
+;;         [ESP + 8]:   ExInfo*
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC  RhpCallCatchFunclet, 0
+
+        FUNCLET_CALL_PROLOGUE 2
+
+        esp_offsetof_ResumeIP                   textequ %00h        ;; [esp + 00h]: continuation address
+        esp_offsetof_is_handling_thread_abort   textequ %04h        ;; [esp + 04h]: set if we are handling ThreadAbortException
+                                                                    ;; [esp + 08h]: edi save
+                                                                    ;; [esp + 0ch]: esi save
+                                                                    ;; [esp + 10h]: ebx save
+        esp_offsetof_PrevEBP                    textequ %14h        ;; [esp + 14h]: prev ebp
+        esp_offsetof_RetAddr                    textequ %18h        ;; [esp + 18h]: return address
+        esp_offsetof_RegDisplay                 textequ %1ch        ;; [esp + 1Ch]: REGDISPLAY*
+        esp_offsetof_ExInfo                     textequ %20h        ;; [esp + 20h]: ExInfo*
+
+        ;; Clear the DoNotTriggerGc state before calling out to our managed catch funclet.
+        INLINE_GETTHREAD    eax, ebx        ;; eax <- Thread*, ebx is trashed
+        lock and            dword ptr [eax + OFFSETOF__Thread__m_ThreadStateFlags], NOT TSF_DoNotTriggerGc
+
+        cmp         ecx, [eax + OFFSETOF__Thread__m_threadAbortException]
+        setz        byte ptr [esp + esp_offsetof_is_handling_thread_abort]
+
+        mov         edi, [esp + esp_offsetof_RegDisplay]            ;; edi <- REGDISPLAY *
+
+        mov         eax, [edi + OFFSETOF__REGDISPLAY__pRbx]
+        mov         ebx, [eax]
+
+        mov         eax, [edi + OFFSETOF__REGDISPLAY__pRbp]
+        mov         eax, [eax]
+        push        eax     ; save the funclet's EBP value for later
+
+        mov         eax, [edi + OFFSETOF__REGDISPLAY__pRsi]
+        mov         esi, [eax]
+
+        mov         eax, [edi + OFFSETOF__REGDISPLAY__pRdi]
+        mov         edi, [eax]
+
+        pop         eax     ; get the funclet's EBP value
+
+        ;; ECX still contains the exception object
+        ;; EDX: funclet IP
+        ;; EAX: funclet EBP
+        call        RhpCallFunclet
+
+        EXPORT_POINTER_TO_ADDRESS _PointerToRhpCallCatchFunclet2
+
+        ;; eax: resume IP
+        mov         [esp + esp_offsetof_ResumeIP], eax              ;; save for later
+
+        INLINE_GETTHREAD edx, ecx                                   ;; edx <- Thread*, trash ecx
+
+        ;; We must unhijack the thread at this point because the section of stack where the hijack is applied
+        ;; may go dead.  If it does, then the next time we try to unhijack the thread, it will corrupt the stack.
+        INLINE_THREAD_UNHIJACK edx, ecx, eax                        ;; Thread in edx, trashes ecx and eax
+
+        mov         ecx, [esp + esp_offsetof_ExInfo]                ;; ecx <- current ExInfo *
+        mov         eax, [esp + esp_offsetof_RegDisplay]            ;; eax <- REGDISPLAY*
+        mov         eax, [eax + OFFSETOF__REGDISPLAY__SP]           ;; eax <- resume SP value
+
+    @@: mov         ecx, [ecx + OFFSETOF__ExInfo__m_pPrevExInfo]    ;; ecx <- next ExInfo
+        cmp         ecx, 0
+        je          @F                                              ;; we're done if it's null
+        cmp         ecx, eax
+        jl          @B                                              ;; keep looping if it's lower than the new SP
+
+    @@: mov         [edx + OFFSETOF__Thread__m_pExInfoStackHead], ecx   ;; store the new head on the Thread
+
+        test        [RhpTrapThreads], TrapThreadsFlags_AbortInProgress
+        jz          @f
+
+        ;; test if the exception handled by the catch was the ThreadAbortException
+        cmp         byte ptr [esp + esp_offsetof_is_handling_thread_abort], 0
+        je          @f
+
+        ;; RhpCallFunclet preserved our local EBP value, so let's fetch the correct one for the resume address
+        mov         ecx, [esp + esp_offsetof_RegDisplay]            ;; ecx <- REGDISPLAY *
+        mov         ecx, [ecx + OFFSETOF__REGDISPLAY__pRbp]
+        mov         ebp, [ecx]
+
+        ;; It was the ThreadAbortException, so rethrow it
+        mov         ecx, STATUS_REDHAWK_THREAD_ABORT
+        mov         edx, [esp + esp_offsetof_ResumeIP]
+        mov         esp, eax                                        ;; reset the SP to resume SP value
+        jmp         RhpThrowHwEx                                    ;; Throw the ThreadAbortException as a special kind of hardware exception        
+
+    @@:
+        ;; RhpCallFunclet preserved our local EBP value, so let's fetch the correct one for the resume address
+        mov         ecx, [esp + esp_offsetof_RegDisplay]            ;; ecx <- REGDISPLAY *
+        mov         ecx, [ecx + OFFSETOF__REGDISPLAY__pRbp]
+        mov         ebp, [ecx]
+
+        ;; reset ESP and jump to the continuation address
+        mov         ecx, [esp + esp_offsetof_ResumeIP]
+        mov         esp, eax
+        jmp         ecx
+
+FASTCALL_ENDFUNC
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay)
+;;
+;; INPUT:  ECX:  handler funclet address
+;;         EDX:  REGDISPLAY*
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC  RhpCallFinallyFunclet, 0
+
+        FUNCLET_CALL_PROLOGUE 0
+
+        push        edx     ;; save REGDISPLAY*
+
+        ;; Clear the DoNotTriggerGc state before calling out to our managed catch funclet.
+        INLINE_GETTHREAD    eax, ebx        ;; eax <- Thread*, ebx is trashed
+        lock and            dword ptr [eax + OFFSETOF__Thread__m_ThreadStateFlags], NOT TSF_DoNotTriggerGc
+
+        ;;
+        ;; load preserved registers for funclet
+        ;;
+
+        mov         eax, [edx + OFFSETOF__REGDISPLAY__pRbx]
+        mov         ebx, [eax]
+
+        mov         eax, [edx + OFFSETOF__REGDISPLAY__pRsi]
+        mov         esi, [eax]
+
+        mov         eax, [edx + OFFSETOF__REGDISPLAY__pRdi]
+        mov         edi, [eax]
+
+        mov         eax, [edx + OFFSETOF__REGDISPLAY__pRbp]
+        mov         eax, [eax]
+        mov         edx, ecx
+
+        ;; ECX: not used
+        ;; EDX: funclet IP
+        ;; EAX: funclet EBP
+        call        RhpCallFunclet
+
+        EXPORT_POINTER_TO_ADDRESS _PointerToRhpCallFinallyFunclet2
+
+        pop         edx     ;; restore REGDISPLAY*
+
+        ;;
+        ;; save preserved registers from funclet
+        ;;
+        mov         eax, [edx + OFFSETOF__REGDISPLAY__pRbx]
+        mov         [eax], ebx
+
+        mov         eax, [edx + OFFSETOF__REGDISPLAY__pRsi]
+        mov         [eax], esi
+
+        mov         eax, [edx + OFFSETOF__REGDISPLAY__pRdi]
+        mov         [eax], edi
+
+        INLINE_GETTHREAD    eax, ebx        ;; eax <- Thread*, ebx is trashed
+        lock or             dword ptr [eax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc
+
+        FUNCLET_CALL_EPILOGUE
+        ret
+
+FASTCALL_ENDFUNC
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay)
+;;
+;; INPUT:  ECX:         exception object
+;;         EDX:         filter funclet address
+;;         [ESP + 4]:   REGDISPLAY*
+;;
+;; OUTPUT:
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC  RhpCallFilterFunclet, 0
+
+        FUNCLET_CALL_PROLOGUE 0
+
+        push        edx     ;; save filter funclet address
+
+        ;;
+        ;; load preserved registers for funclet
+        ;;
+        mov         edx, [ebp + 8]
+        mov         eax, [edx + OFFSETOF__REGDISPLAY__pRbp]
+        mov         eax, [eax]
+
+        ;; ECX still contains exception object
+        ;; EAX contains the funclet EBP value
+        mov         edx, [esp + 0]                  ;; reload filter funclet address
+
+        call        RhpCallFunclet
+
+        EXPORT_POINTER_TO_ADDRESS _PointerToRhpCallFilterFunclet2
+
+        ;; EAX contains the result of the filter execution
+        mov         edx, [ebp + 8]
+
+        pop         ecx         ;; pop scratch slot
+
+        FUNCLET_CALL_EPILOGUE
+        ret
+
+FASTCALL_ENDFUNC
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/FloatingPoint.asm b/src/coreclr/src/nativeaot/Runtime/i386/FloatingPoint.asm
new file mode 100644
index 0000000000000..30977c5f9cd0f
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/FloatingPoint.asm
@@ -0,0 +1,77 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+        .586
+        .xmm
+        .model  flat
+        option  casemap:none
+        .code
+
+
+include AsmMacros.inc
+
+FASTCALL_FUNC   RhpFltRemRev, 8     ; float dividend, float divisor
+
+        fld         dword ptr [esp+8]       ; divisor
+        fld         dword ptr [esp+4]       ; dividend
+
+fremloop:
+        fprem
+        wait
+        fnstsw      ax
+        wait
+        sahf
+        jp          fremloop    ; Continue while the FPU status bit C2 is set
+        
+        fxch        st(1)       ; swap, so divisor is on top and result is in st(1)
+        fstp        st(0)       ; Pop the divisor from the FP stack
+
+        ret         8
+
+FASTCALL_ENDFUNC
+
+FASTCALL_FUNC   RhpDblRemRev, 16    ; double dividend, double divisor
+
+        fld         qword ptr [esp+0Ch]
+        fld         qword ptr [esp+4]
+
+fremloopd:
+        fprem
+        wait
+        fnstsw      ax
+        wait
+        sahf
+        jp          fremloopd   ; Continue while the FPU status bit C2 is set
+
+        fxch        st(1)       ; swap, so divisor is on top and result is in st(1)
+        fstp        st(0)       ; Pop the divisor from the FP stack
+
+        ret         10h
+        
+FASTCALL_ENDFUNC
+
+
+FASTCALL_FUNC   RhpFltRemRev_SSE2, 0     ; float dividend, float divisor
+        sub        esp, 12             ;; 4 bytes of our stack, 8 bytes args
+        movd       dword ptr [esp], xmm0
+        movd       dword ptr [esp+4], xmm1
+        call       @RhpFltRemRev@8     ;; pops 8 bytes of stack
+        fstp       dword ptr [esp]
+        movd       xmm0, dword ptr [esp]
+        add        esp, 4
+        ret
+FASTCALL_ENDFUNC
+
+FASTCALL_FUNC   RhpDblRemRev_SSE2, 0     ; float dividend, float divisor
+        sub        esp, 24               ;; 8 bytes of our stack, 16 bytes args
+        movq       qword ptr [esp], xmm0
+        movq       qword ptr [esp+8], xmm1
+        call       @RhpDblRemRev@16      ;; pops 16 bytes of stack
+        fstp       qword ptr [esp]
+        movq       xmm0, qword ptr [esp]
+        add        esp, 8
+        ret
+FASTCALL_ENDFUNC
+
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/GC.asm b/src/coreclr/src/nativeaot/Runtime/i386/GC.asm
new file mode 100644
index 0000000000000..e34c980cc3cb4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/GC.asm
@@ -0,0 +1,64 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+;;
+;; Unmanaged helpers used by the managed System.GC class.
+;;
+
+    .586
+    .model  flat
+    option  casemap:none
+    .code
+
+include AsmMacros.inc
+
+;; DWORD getcpuid(DWORD arg, unsigned char result[16])
+
+FASTCALL_FUNC getcpuid, 8
+
+        push    ebx
+        push    esi
+        mov     esi, edx
+        mov     eax, ecx
+        xor     ecx, ecx
+        cpuid
+        mov     [esi+ 0], eax
+        mov     [esi+ 4], ebx
+        mov     [esi+ 8], ecx
+        mov     [esi+12], edx
+        pop     esi
+        pop     ebx
+
+        ret
+
+FASTCALL_ENDFUNC
+
+;; The following function uses Deterministic Cache Parameter leafs to crack the cache hierarchy information on Prescott & Above platforms. 
+;;  This function takes 3 arguments:
+;;     Arg1 is an input to ECX. Used as index to specify which cache level to return infoformation on by CPUID.
+;;     Arg2 is an input to EAX. For deterministic code enumeration, we pass in 4H in arg2.
+;;     Arg3 is a pointer to the return buffer
+;;   No need to check whether or not CPUID is supported because we have already called CPUID with success to come here.
+
+;; DWORD getextcpuid(DWORD arg1, DWORD arg2, unsigned char result[16])
+
+FASTCALL_FUNC getextcpuid, 12
+
+        push    ebx
+        push    esi
+        mov     ecx, ecx
+        mov     eax, edx
+        cpuid
+        mov     esi, [esp + 12]
+        mov     [esi+ 0], eax
+        mov     [esi+ 4], ebx
+        mov     [esi+ 8], ecx
+        mov     [esi+12], edx
+        pop     esi
+        pop     ebx
+
+        ret
+
+FASTCALL_ENDFUNC
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/GcProbe.asm b/src/coreclr/src/nativeaot/Runtime/i386/GcProbe.asm
new file mode 100644
index 0000000000000..f8af02620a30d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/GcProbe.asm
@@ -0,0 +1,556 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+        .586
+        .xmm
+        .model  flat
+        option  casemap:none
+        .code
+
+
+include AsmMacros.inc
+
+DEFAULT_PROBE_SAVE_FLAGS        equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP
+PROBE_SAVE_FLAGS_EVERYTHING     equ DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH
+PROBE_SAVE_FLAGS_RAX_IS_GCREF   equ DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF
+;;
+;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this 
+;; thread if it finds it at an IP that isn't managed code.
+;;
+;; Register state on entry:
+;;  EDX: thread pointer
+;;  
+;; Register state on exit:
+;;  No changes
+;;
+ClearHijackState macro
+        mov         dword ptr [edx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0
+        mov         dword ptr [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0
+endm
+
+;;
+;; The prolog for all GC suspension hijackes (normal and stress). Sets up an EBP frame,
+;; fixes up the hijacked return address, and clears the hijack state.
+;;
+;; Register state on entry:
+;;  All registers correct for return to the original return address.
+;;  
+;; Register state on exit:
+;;  EAX: not trashed or saved
+;;  EBP: new EBP frame with correct return address
+;;  ESP: points to saved scratch registers (ECX & EDX)
+;;  ECX: trashed
+;;  EDX: thread pointer
+;;
+HijackFixupProlog macro
+        push        eax         ; save a slot for the repaired return address
+        push        ebp
+        mov         ebp, esp
+        push        ecx         ; save scratch registers
+        push        edx         ; save scratch registers
+
+        ;; edx <- GetThread(), TRASHES ecx
+        INLINE_GETTHREAD edx, ecx
+
+        ;;
+        ;; Fix the stack by pushing the original return address
+        ;;
+        mov         ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        mov         [ebp + 4], ecx
+
+        ClearHijackState
+endm
+
+;;
+;; Epilog for the normal and GC stress hijack functions. Restores scratch registers
+;; and returns to the original return address.
+;;
+;; Register state on entry:
+;;  ESP: points to saved scratch registers
+;;  EBP: ebp frame
+;;  ECX, EDX: trashed
+;;  All other registers correct for return to the original return address.
+;;  
+;; Register state on exit:
+;;  All registers restored as they were when the hijack was first reached.
+;;
+HijackFixupEpilog macro
+        pop         edx
+        pop         ecx
+        pop         ebp
+        ret
+endm
+
+;;
+;; Sets up a PInvokeTranstionFrame with room for all registers.
+;;
+;; Register state on entry:
+;;  EDX: thread pointer
+;;  BITMASK_REG_OR_VALUE: register bitmask, PTTR_SAVE_ALL_PRESERVED at a minimum
+;;  EBP: ebp frame setup with correct return address
+;;  ESP: points to saved scratch registers
+;;  
+;; Register state on exit:
+;;  ESP: pointer to a PInvokeTransitionFrame on the stack
+;;  EBX: thread pointer
+;;  EAX: trashed
+;;  ESI, EDI, EBX, EAX all saved in the frame
+;;
+;;  ECX is NOT trashed if BITMASK_REG_OR_VALUE is a literal value and not a register
+;;
+PushProbeFrame macro BITMASK_REG_OR_VALUE
+        push        eax                     ; EAX
+        lea         eax, [ebp + 8]                      ; get caller ESP
+        push        eax                     ; ESP
+        push        edi                     ; EDI
+        push        esi                     ; ESI
+        push        ebx                     ; EBX
+        push        BITMASK_REG_OR_VALUE    ; register bitmask
+ifdef _DEBUG
+        mov         eax, BITMASK_REG_OR_VALUE
+        and         eax, DEFAULT_PROBE_SAVE_FLAGS
+        cmp         eax, DEFAULT_PROBE_SAVE_FLAGS ; make sure we have at least the flags to match what the macro pushes
+        je          @F
+        call        RhDebugBreak
+@@:
+endif ;; _DEBUG
+        push        edx                     ; Thread *
+        mov         eax, [ebp + 0]                      ; find previous EBP value
+        push        eax                     ; m_FramePointer
+        mov         eax, [ebp + 4]                      ; get return address
+        push        eax                     ; m_RIP
+
+        mov         ebx, edx                            ; save Thread pointer for later
+endm
+
+;;
+;; Pops off the PInvokeTransitionFrame setup in PushProbeFrame above, restoring all registers.
+;;
+;; Register state on entry:
+;;  ESP: pointer to a PInvokeTransitionFrame on the stack
+;;  
+;; Register state on exit:
+;;  ESP: points to saved scratch registers, PInvokeTransitionFrame removed
+;;  EBX: restored
+;;  ESI: restored
+;;  EDI: restored
+;;  EAX: restored
+;;
+PopProbeFrame macro
+        add         esp, 4*4h
+        pop         ebx
+        pop         esi
+        pop         edi
+        pop         eax     ; discard ESP
+        pop         eax
+endm
+
+;;
+;; Set the Thread state and wait for a GC to complete.
+;;
+;; Register state on entry:
+;;  ESP: pointer to a PInvokeTransitionFrame on the stack
+;;  EBX: thread pointer
+;;  EBP: EBP frame
+;;  
+;; Register state on exit:
+;;  ESP: pointer to a PInvokeTransitionFrame on the stack
+;;  EBX: thread pointer
+;;  EBP: EBP frame
+;;  All other registers trashed
+;;
+
+EXTERN _RhpWaitForGCNoAbort : PROC
+
+WaitForGCCompletion macro
+        test        dword ptr [ebx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc
+        jnz         @F
+
+        mov         ecx, esp
+        call        _RhpWaitForGCNoAbort
+@@:
+
+endm
+
+RhpThrowHwEx equ @RhpThrowHwEx@0
+extern RhpThrowHwEx : proc
+
+;;
+;; Main worker for our GC probes.  Do not call directly!! This assumes that HijackFixupProlog has been done.
+;; Instead, go through RhpGcProbeHijack* or RhpGcStressHijack*. This waits for the
+;; GC to complete then returns to the original return address.
+;;
+;; Register state on entry:
+;;  ECX: register bitmask
+;;  EDX: thread pointer
+;;  EBP: EBP frame
+;;  ESP: scratch registers pushed (ECX & EDX)
+;;  
+;; Register state on exit:
+;;  All registers restored as they were when the hijack was first reached.
+;;
+RhpGcProbe  proc
+        test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
+        jnz         SynchronousRendezVous
+
+        HijackFixupEpilog
+
+SynchronousRendezVous:
+        PushProbeFrame ecx      ; bitmask in ECX
+
+        WaitForGCCompletion
+
+        mov         edx, [esp + OFFSETOF__PInvokeTransitionFrame__m_Flags]
+        ;;
+        ;; Restore preserved registers -- they may have been updated by GC 
+        ;;
+        PopProbeFrame
+
+        test        edx, PTFF_THREAD_ABORT
+        jnz         Abort
+
+        HijackFixupEpilog
+Abort:
+        mov         ecx, STATUS_REDHAWK_THREAD_ABORT
+        pop         edx
+        pop         eax         ;; ecx was pushed here, but we don't care for its value
+        pop         ebp
+        pop         edx         ;; return address as exception RIP
+        jmp         RhpThrowHwEx
+
+RhpGcProbe  endp
+
+ifdef FEATURE_GC_STRESS
+;;
+;; Set the Thread state and invoke RedhawkGCInterface::StressGC().
+;;
+;; Assumes EBX is the Thread pointer.
+;;
+;; Register state on entry:
+;;  EBX: thread pointer
+;;  EBP: EBP frame
+;;  ESP: pointer to a PInvokeTransitionFrame on the stack
+;;  
+;; Register state on exit:
+;;  ESP: pointer to a PInvokeTransitionFrame on the stack
+;;  EBP: EBP frame
+;;  All other registers trashed
+;;
+StressGC macro
+        mov         [ebx + OFFSETOF__Thread__m_pHackPInvokeTunnel], esp
+        call        REDHAWKGCINTERFACE__STRESSGC
+endm
+
+;;
+;; Worker for our GC stress probes.  Do not call directly!!  
+;; Instead, go through RhpGcStressHijack. This performs the GC Stress
+;; work and returns to the original return address.
+;;
+;; Register state on entry:
+;;  EDX: thread pointer
+;;  ECX: register bitmask
+;;  EBP: EBP frame
+;;  ESP: scratch registers pushed (ECX and EDX)
+;;  
+;; Register state on exit:
+;;  All registers restored as they were when the hijack was first reached.
+;;
+RhpGcStressProbe  proc
+        PushProbeFrame ecx      ; bitmask in ECX
+
+        StressGC
+
+        ;;
+        ;; Restore preserved registers -- they may have been updated by GC 
+        ;;
+        PopProbeFrame
+
+        HijackFixupEpilog
+
+RhpGcStressProbe  endp
+
+endif ;; FEATURE_GC_STRESS
+
+FASTCALL_FUNC RhpGcProbeHijackScalar, 0
+
+        HijackFixupProlog
+        mov         ecx, DEFAULT_PROBE_SAVE_FLAGS
+        jmp         RhpGcProbe
+
+FASTCALL_ENDFUNC
+
+FASTCALL_FUNC RhpGcProbeHijackObject, 0
+
+        HijackFixupProlog
+        mov         ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF
+        jmp         RhpGcProbe
+
+FASTCALL_ENDFUNC
+
+FASTCALL_FUNC RhpGcProbeHijackByref, 0
+
+        HijackFixupProlog
+        mov         ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF
+        jmp         RhpGcProbe
+
+FASTCALL_ENDFUNC
+
+ifdef FEATURE_GC_STRESS
+FASTCALL_FUNC RhpGcStressHijackScalar, 0
+
+        HijackFixupProlog
+        mov         ecx, DEFAULT_PROBE_SAVE_FLAGS
+        jmp         RhpGcStressProbe
+
+FASTCALL_ENDFUNC
+
+FASTCALL_FUNC RhpGcStressHijackObject, 0
+
+        HijackFixupProlog
+        mov         ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF
+        jmp         RhpGcStressProbe
+
+FASTCALL_ENDFUNC
+
+FASTCALL_FUNC RhpGcStressHijackByref, 0
+
+        HijackFixupProlog
+        mov         ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF
+        jmp         RhpGcStressProbe
+
+FASTCALL_ENDFUNC
+
+FASTCALL_FUNC RhpHijackForGcStress, 0
+        push        ebp
+        mov         ebp, esp
+        
+        ;;
+        ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the
+        ;; IP after the call to this helper.
+        ;;
+        
+        push        edx
+        push        ecx
+        push        ebx
+        push        eax
+        push        esi
+        push        edi
+
+        mov         eax, [ebp]
+        push        eax             ;; (caller) Ebp
+        lea         eax, [ebp + 8]
+        push        eax             ;; Esp
+        mov         eax, [ebp + 4]
+        push        eax             ;; Eip
+
+        push        esp        ;; address of PAL_LIMITED_CONTEXT
+        call        THREAD__HIJACKFORGCSTRESS
+
+        ;; Note: we only restore the scratch registers here. No GC has occured, so restoring
+        ;; the callee saved ones is unnecessary.
+        add         esp, 14h
+        pop         eax
+        pop         ebx
+        pop         ecx
+        pop         edx
+        pop         ebp
+        ret
+FASTCALL_ENDFUNC
+endif ;; FEATURE_GC_STRESS
+
+;;
+;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH 
+;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing
+;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of 
+;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the 
+;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be 
+;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the 
+;; handler in the caller.
+;; 
+;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to 
+;; complete. There are also variants for GC stress.
+;;
+;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to 
+;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack 
+;; another thread while the target thread is suspended in managed code, and this is _not_ managed code.
+;;
+;; Register state on entry:
+;;  EAX: handler address we want to jump to.
+;;  ECX: reference to the exception object.
+;;  EDX: what ESP should be after the return address and arg space are removed.
+;;  EBX, ESI, EDI, and EBP are all already correct for return to the caller.
+;;  The stack still contains the return address and the arguments to the call.
+;;  
+;; Register state on exit:
+;;  ESP: what it would be after a complete return to the caller.
+;;
+RTU_EH_JUMP_HELPER macro funcName, hijackFuncName, isStress, stressFuncName
+FASTCALL_FUNC funcName, 0
+        cmp         [esp], hijackFuncName
+        je          RhpGCProbeForEHJump
+
+IF isStress EQ 1
+        cmp         [esp], stressFuncName
+        je          RhpGCStressProbeForEHJump
+ENDIF
+
+        ;; We are not hijacked, so we can return to the handler.
+        ;; We return to keep the call/return prediction balanced.
+        mov         esp, edx        ; The stack is now as if we have returned from the call.
+        push eax                    ; Push the handler as the return address. 
+        ret
+
+FASTCALL_ENDFUNC
+endm
+
+
+;; We need an instance of the helper for each possible hijack function. The binder has enough
+;; information to determine which one we need to use for any function.
+RTU_EH_JUMP_HELPER RhpEHJumpScalar,         @RhpGcProbeHijackScalar@0,  0, 0
+RTU_EH_JUMP_HELPER RhpEHJumpObject,         @RhpGcProbeHijackObject@0,  0, 0
+RTU_EH_JUMP_HELPER RhpEHJumpByref,          @RhpGcProbeHijackByref@0,   0, 0
+ifdef FEATURE_GC_STRESS
+RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, @RhpGcProbeHijackScalar@0,  1, @RhpGcStressHijackScalar@0
+RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, @RhpGcProbeHijackObject@0,  1, @RhpGcStressHijackObject@0
+RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress,  @RhpGcProbeHijackByref@0,   1, @RhpGcStressHijackByref@0
+endif
+
+;;
+;; Macro to setup our EBP frame and adjust the location of the EH object reference for EH jump probe funcs.
+;;
+;; Register state on entry:
+;;  EAX: handler address we want to jump to.
+;;  ECX: reference to the exception object.
+;;  EDX: scratch
+;;  EBX, ESI, EDI, and EBP are all already correct for return to the caller.
+;;  The stack is as if we have returned from the call
+;;  
+;; Register state on exit:
+;;  ESP: ebp frame
+;;  EBP: ebp frame setup with space reserved for the repaired return address
+;;  EAX: reference to the exception object
+;;  ECX: scratch
+;;
+EHJumpProbeProlog macro
+        push        eax         ; save a slot for the repaired return address
+        push        ebp         ; setup an ebp frame to keep the stack nicely crawlable
+        mov         ebp, esp
+        push        eax         ; save the handler address so we can jump to it later
+        mov         eax, ecx    ; move the ex object reference into eax so we can report it
+endm
+
+;;
+;; Macro to re-adjust the location of the EH object reference, cleanup the EBP frame, and make the 
+;; final jump to the handler for EH jump probe funcs.
+;;
+;; Register state on entry:
+;;  EAX: reference to the exception object
+;;  ESP: ebp frame
+;;  EBP: ebp frame setup with the correct return (handler) address
+;;  ECX: scratch
+;;  EDX: scratch
+;;  
+;; Register state on exit:
+;;  ESP: correct for return to the caller
+;;  EBP: previous ebp frame
+;;  ECX: reference to the exception object
+;;  EDX: trashed
+;;
+EHJumpProbeEpilog macro
+        mov         ecx, eax    ; Put the EX obj ref back into ecx for the handler.
+        pop         eax         ; Recover the handler address.
+        pop         ebp         ; Pop the ebp frame we setup.
+        pop         edx         ; Pop the original return address, which we do not need.
+        push eax                ; Push the handler as the return address. 
+        ret
+endm
+
+;;
+;; We are hijacked for a normal GC (not GC stress), so we need to unhijcak and wait for the GC to complete.
+;;
+;; Register state on entry:
+;;  EAX: handler address we want to jump to.
+;;  ECX: reference to the exception object.
+;;  EDX: what ESP should be after the return address and arg space are removed.
+;;  EBX, ESI, EDI, and EBP are all already correct for return to the caller.
+;;  The stack is as if we have returned from the call
+;;        
+;; Register state on exit:
+;;  ESP: correct for return to the caller
+;;  EBP: previous ebp frame
+;;  ECX: reference to the exception object
+;;
+RhpGCProbeForEHJump proc
+        mov         esp, edx        ; The stack is now as if we have returned from the call.
+        EHJumpProbeProlog
+        
+        ;; edx <- GetThread(), TRASHES ecx
+        INLINE_GETTHREAD edx, ecx
+
+        ;; Fix the stack by pushing the original return address
+        mov         ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        mov         [ebp + 4], ecx
+
+        ClearHijackState
+
+ifdef _DEBUG
+        ;;
+        ;; If we get here, then we have been hijacked for a real GC, and our SyncState must
+        ;; reflect that we've been requested to synchronize.
+
+        test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
+        jnz         @F
+
+        call        RhDebugBreak
+@@:
+endif ;; _DEBUG
+
+
+        PushProbeFrame  PROBE_SAVE_FLAGS_RAX_IS_GCREF
+        WaitForGCCompletion
+        PopProbeFrame
+
+        EHJumpProbeEpilog
+
+RhpGCProbeForEHJump endp
+
+ifdef FEATURE_GC_STRESS
+;;
+;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper.
+;;
+;; Register state on entry:
+;;  EAX: handler address we want to jump to.
+;;  ECX: reference to the exception object.
+;;  EDX: what ESP should be after the return address and arg space are removed.
+;;  EBX, ESI, EDI, and EBP are all already correct for return to the caller.
+;;  The stack is as if we have returned from the call
+;;        
+;; Register state on exit:
+;;  ESP: correct for return to the caller
+;;  EBP: previous ebp frame
+;;  ECX: reference to the exception object
+;;
+RhpGCStressProbeForEHJump proc
+        mov         esp, edx        ; The stack is now as if we have returned from the call.
+        EHJumpProbeProlog
+        
+        ;; edx <- GetThread(), TRASHES ecx
+        INLINE_GETTHREAD edx, ecx
+
+        ;; Fix the stack by pushing the original return address
+        mov         ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        mov         [ebp + 4], ecx
+
+        ClearHijackState
+
+        PushProbeFrame  PROBE_SAVE_FLAGS_RAX_IS_GCREF
+        StressGC
+        PopProbeFrame
+
+        EHJumpProbeEpilog
+
+RhpGCStressProbeForEHJump endp
+
+endif ;; FEATURE_GC_STRESS
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/GetThread.asm b/src/coreclr/src/nativeaot/Runtime/i386/GetThread.asm
new file mode 100644
index 0000000000000..b330406b9a098
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/GetThread.asm
@@ -0,0 +1,31 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+        .586
+        .model  flat
+        option  casemap:none
+        .code
+
+
+include AsmMacros.inc
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpGetThread
+;;
+;;
+;; INPUT: none
+;;
+;; OUTPUT: EAX: Thread pointer
+;;
+;; MUST PRESERVE ARGUMENT REGISTERS
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC RhpGetThread, 0
+        push    ecx
+        INLINE_GETTHREAD eax, ecx ; eax dest, ecx trash
+        pop     ecx
+        ret
+FASTCALL_ENDFUNC
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/Interlocked.S b/src/coreclr/src/nativeaot/Runtime/i386/Interlocked.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/Interlocked.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/Interlocked.asm b/src/coreclr/src/nativeaot/Runtime/i386/Interlocked.asm
new file mode 100644
index 0000000000000..f9599b1b8666e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/Interlocked.asm
@@ -0,0 +1,3 @@
+;; TODO: Implement
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/InteropThunksHelpers.S b/src/coreclr/src/nativeaot/Runtime/i386/InteropThunksHelpers.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/InteropThunksHelpers.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/InteropThunksHelpers.asm b/src/coreclr/src/nativeaot/Runtime/i386/InteropThunksHelpers.asm
new file mode 100644
index 0000000000000..6fee9002e86db
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/InteropThunksHelpers.asm
@@ -0,0 +1,101 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+
+.586
+.model  flat
+option  casemap:none
+.code
+
+;; -----------------------------------------------------------------------------------------------------------
+;; standard macros
+;; -----------------------------------------------------------------------------------------------------------
+LEAF_ENTRY macro Name, Section
+    Section segment para 'CODE'
+    public  Name
+    Name    proc
+endm
+
+LEAF_END macro Name, Section
+    Name    endp
+    Section ends
+endm
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  DATA SECTIONS  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+__tls_array                         equ 2Ch     ;; offsetof(TEB, ThreadLocalStoragePointer)
+
+POINTER_SIZE                        equ 04h
+
+;; TLS variables
+_TLS    SEGMENT ALIAS(".tls$")
+    ThunkParamSlot  DD 00000000H
+_TLS    ENDS
+
+ASSUME  fs : NOTHING
+EXTRN   __tls_index:DWORD
+
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;
+;; RhCommonStub
+;;
+LEAF_ENTRY RhCommonStub, _TEXT
+        ;; There are arbitrary callers passing arguments with arbitrary signatures.
+        ;; Custom calling convention:
+        ;;      eax: pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers)
+
+        ;; Save context data into the ThunkParamSlot thread-local variable
+        ;; A pointer to the delegate and function pointer for open static delegate should have been saved in the thunk's context cell during thunk allocation
+        
+        ;; make some scratch regs
+        push    ecx
+        push    edx
+
+        mov     ecx, [__tls_index]
+        mov     edx, fs:[__tls_array]
+        mov     ecx, [edx + ecx * POINTER_SIZE]
+
+        ;; eax = address of context cell in thunk's data
+        ;; ecx = base address of TLS data
+        ;; edx = trashed
+
+        ;; store thunk address in thread static
+        mov     edx, [eax]
+        mov     eax, [eax + POINTER_SIZE]                          ;;   eax <- target slot data
+        mov     [ecx + OFFSET ThunkParamSlot], edx                 ;;   ThunkParamSlot <- context slot data
+        
+        ;; restore the regs we used
+        pop     edx
+        pop     ecx
+
+        ;; jump to the target
+        jmp     eax
+LEAF_END RhCommonStub, _TEXT
+
+
+;;
+;; IntPtr RhGetCommonStubAddress()
+;;
+LEAF_ENTRY RhGetCommonStubAddress, _TEXT
+        lea     eax, [RhCommonStub]
+        ret
+LEAF_END RhGetCommonStubAddress, _TEXT
+
+
+;;
+;; IntPtr RhGetCurrentThunkContext()
+;;
+LEAF_ENTRY RhGetCurrentThunkContext, _TEXT
+        mov     ecx, [__tls_index]
+        mov     edx, fs:[__tls_array]
+        mov     ecx, [edx + ecx * POINTER_SIZE]
+        mov     eax, [ecx + OFFSET ThunkParamSlot]                 ;;   eax <- ThunkParamSlot
+        ret
+LEAF_END RhGetCurrentThunkContext, _TEXT
+
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/MemClrForGC.asm b/src/coreclr/src/nativeaot/Runtime/i386/MemClrForGC.asm
new file mode 100644
index 0000000000000..cb23c1ce1625f
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/MemClrForGC.asm
@@ -0,0 +1,148 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+    .586
+    .xmm
+    .model  flat
+    option  casemap:none
+
+    
+EXTERN  _IsProcessorFeaturePresent@4 : PROC
+
+PF_XMMI64_INSTRUCTIONS_AVAILABLE equ 10
+
+    .data
+canUseSSE2  db  0
+
+    .code
+
+_memclr_for_gc@8 proc public
+
+;    x86 version
+
+;   we get the following parameters
+;   ecx = destination address
+;   edx = size to clear
+
+    push    ebx
+    push    edi
+
+    xor     eax, eax
+
+    ; load destination
+    mov     edi,[esp+8+4]
+
+    ; load size
+    mov     ebx,[esp+8+8]
+
+    ; check alignment of destination
+    test    edi,3
+    jnz     alignDest
+alignDone:
+    ; now destination is dword aligned
+
+    ; compute number of bytes to clear non-temporally
+    ; we wish to clear the first 8k or so with rep stos,
+    ; anything above that non-temporally
+    
+    xor     edx,edx
+    cmp     ebx,8*1024
+    jbe     noNonTempClear
+
+    ; can we use SSE2 instructions?
+    cmp     canUseSSE2,0
+    js      noNonTempClear
+    jz      computeCanUseSSE2
+
+computeNonTempClear:
+
+    ; compute the number of bytes above 8k
+    ; and round down to a multiple of 64
+    mov     edx,ebx
+    sub     edx,8*1024
+    and     edx,not 63
+
+    ; compute remaining size to clear temporally
+    sub     ebx,edx
+
+noNonTempClear:
+    ; do the temporal clear
+    mov     ecx,ebx
+    shr     ecx,2
+    rep     stosd
+
+    ; do the non-temporal clear
+    test    edx,edx
+    jne     nonTempClearLoop
+
+nonTempClearDone:
+
+    ; clear any remaining bytes
+    mov     ecx,ebx
+    and     ecx,3
+    rep     stosb
+
+    pop     edi
+    pop     ebx
+    ret     8
+
+    ; this is the infrequent case, hence out of line
+nonTempClearLoop:
+    movnti  [edi+ 0],eax
+    movnti  [edi+ 4],eax
+    movnti  [edi+ 8],eax
+    movnti  [edi+12],eax
+
+    movnti  [edi+16],eax
+    movnti  [edi+20],eax
+    movnti  [edi+24],eax
+    movnti  [edi+28],eax
+
+    movnti  [edi+32],eax
+    movnti  [edi+36],eax
+    movnti  [edi+40],eax
+    movnti  [edi+44],eax
+
+    movnti  [edi+48],eax
+    movnti  [edi+52],eax
+    movnti  [edi+56],eax
+    movnti  [edi+60],eax
+
+    add     edi,64
+    sub     edx,64
+    ja      nonTempClearLoop
+    jmp     nonTempClearDone
+
+alignDest:
+    test    ebx,ebx
+    je      alignDone
+alignLoop:
+    mov     [edi],al
+    add     edi,1
+    sub     ebx,1
+    jz      alignDone
+    test    edi,3
+    jnz     alignLoop
+    jmp     alignDone
+
+computeCanUseSSE2:
+    ; we are not using the sse2 register set,
+    ; just sse2 instructions (movnti),
+    ; thus we just ask the OS about the usability of the instructions
+    ; OS bugs about saving/restoring registers like in early versions
+    ; of Vista etc. in the WoW shouldn't matter
+
+    push    PF_XMMI64_INSTRUCTIONS_AVAILABLE
+    call    _IsProcessorFeaturePresent@4
+    mov     ecx,eax
+    xor     eax,eax         ; reset eax to 0
+    test    ecx,ecx
+    mov     canUseSSE2,1
+    jne     computeNonTempClear
+    mov     canUseSSE2,-1
+    xor     edx,edx
+    jmp     noNonTempClear
+
+_memclr_for_gc@8 endp
+
+    end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/MiscStubs.S b/src/coreclr/src/nativeaot/Runtime/i386/MiscStubs.S
new file mode 100644
index 0000000000000..6dee0d452b0a4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/MiscStubs.S
@@ -0,0 +1,42 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.intel_syntax noprefix
+#include <unixasmmacros.inc>
+
+// *********************************************************************/
+// JIT_StackProbe
+//
+// Purpose:
+//   the helper will access ("probe") a word on each page of the stack
+//   starting with the page right beneath esp down to the one pointed to by eax.
+//   The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame.
+//   The call to the helper will be emitted by JIT in the function prolog when large (larger than 0x3000 bytes) stack frame is required.
+//
+// NOTE: this helper will modify a value of esp and must establish the frame pointer.
+// NOTE: On Linux we must advance the stack pointer as we probe - it is not allowed to access 65535 bytes below esp.
+//
+#define PAGE_SIZE 0x1000
+NESTED_ENTRY JIT_StackProbe, _TEXT, NoHandler
+    // On entry:
+    //   eax - the lowest address of the stack frame being allocated (i.e. [InitialSp - FrameSize])
+    //
+    // NOTE: this helper will probe at least one page below the one pointed by esp.
+    PROLOG_BEG
+    PROLOG_END
+
+    and     esp, -PAGE_SIZE        // esp points to the **lowest address** on the last probed page
+                                   // This is done to make the loop end condition simpler.
+
+LOCAL_LABEL(ProbeLoop):
+    sub     esp, PAGE_SIZE         // esp points to the lowest address of the **next page** to probe
+    test    [esp], eax             // esp points to the lowest address on the **last probed** page
+    cmp     esp, eax
+    jg      LOCAL_LABEL(ProbeLoop) // if esp > eax, then we need to probe at least one more page.
+
+    EPILOG_BEG
+    mov     esp, ebp
+    EPILOG_END
+    ret
+
+NESTED_END JIT_StackProbe, _TEXT
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/MiscStubs.asm b/src/coreclr/src/nativeaot/Runtime/i386/MiscStubs.asm
new file mode 100644
index 0000000000000..ed06082e96896
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/MiscStubs.asm
@@ -0,0 +1,301 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+        .586
+        .model  flat
+        option  casemap:none
+        .code
+
+include AsmMacros.inc
+
+EXTERN @GetClasslibCCtorCheck@4 : PROC
+EXTERN _memcpy                  : PROC
+EXTERN _memcpyGCRefs            : PROC
+EXTERN _memcpyGCRefsWithWriteBarrier  : PROC
+EXTERN _memcpyAnyWithWriteBarrier     : PROC
+
+;;
+;; Checks whether the static class constructor for the type indicated by the context structure has been
+;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will
+;; execute the cctor and update the context to record this fact.
+;;
+;;  Input:
+;;      eax : Address of StaticClassConstructionContext structure
+;;
+;;  Output:
+;;      All volatile registers and the condition codes may be trashed.
+;;
+FASTCALL_FUNC RhpCheckCctor, 4
+
+        ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the
+        ;; initial state is 0 and the remaining values are reserved for classlib use). This check is
+        ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for
+        ;; synchronizing with other threads and re-checking the value.
+        cmp     dword ptr [eax + OFFSETOF__StaticClassConstructionContext__m_initialized], 1
+        jne     RhpCheckCctor__SlowPath
+        ret
+
+RhpCheckCctor__SlowPath:
+        mov     edx, eax ; RhpCheckCctor2 takes the static class construction context pointer in the edx register
+        jmp     @RhpCheckCctor2@4
+FASTCALL_ENDFUNC
+
+;;
+;; Checks whether the static class constructor for the type indicated by the context structure has been
+;; executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will
+;; execute the cctor and update the context to record this fact.
+;;
+;;  Input:
+;;      eax : Value that must be preserved in this register across the cctor check.
+;;      edx : Address of StaticClassConstructionContext structure
+;;
+;;  Output:
+;;      All volatile registers other than eax may be trashed and the condition codes may also be trashed.
+;;
+FASTCALL_FUNC RhpCheckCctor2, 4
+
+        ;; Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the
+        ;; initial state is 0 and the remaining values are reserved for classlib use). This check is
+        ;; unsynchronized; if we go down the slow path and call the classlib then it is responsible for
+        ;; synchronizing with other threads and re-checking the value.
+        cmp     dword ptr [edx + OFFSETOF__StaticClassConstructionContext__m_initialized], 1
+        jne     RhpCheckCctor2__SlowPath
+        ret
+
+;;  Input:
+;;      eax : Value that must be preserved in this register across the cctor check.
+;;      edx : Address of StaticClassConstructionContext structure
+;;
+;;  Output:
+;;      All volatile registers other than eax may be trashed and the condition codes may also be trashed.
+;;
+RhpCheckCctor2__SlowPath:
+        ;; Call a C++ helper to retrieve the address of the classlib callback. We need to preserve the context
+        ;; structure address in eax since it's needed for the actual call.
+        push    ebx
+        push    esi
+        mov     ebx, edx ; save cctor context pointer
+        mov     esi, eax ; save preserved return value
+
+        ;; The caller's return address is passed as the argument to the helper; it's an address in the module
+        ;; and is used by the helper to locate the classlib.
+        mov     ecx, [esp + 8] ; + 8 to skip past the saved ebx and esi
+
+        call    @GetClasslibCCtorCheck@4
+
+        ;; Eax now contains the address of the classlib method to call. The single argument is the context
+        ;; structure address currently in ebx. Clean up and tail call to the classlib callback so we're not on
+        ;; the stack should a GC occur (so we don't need to worry about transition frames).
+        mov     edx, ebx
+        mov     ecx, esi
+        pop     esi
+        pop     ebx
+        ;; Tail-call the classlib cctor check function. Note that the incoming eax value is moved to ecx
+        ;; and the classlib cctor check function is required to return that value, so that eax is preserved
+        ;; across a RhpCheckCctor call.
+        jmp     eax
+
+FASTCALL_ENDFUNC
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* __cdecl RhpCopyMultibyteNoGCRefs(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;;
+_RhpCopyMultibyteNoGCRefs PROC PUBLIC
+
+        ;    #locals, num_params, prolog bytes, #regs saved, use ebp, frame type (0 == FRAME_FPO)
+        .FPO(      0,          3,            0,           0,       0,          0)
+
+        ; [esp + 0] return address
+        ; [esp + 4] dest
+        ; [esp + 8] src
+        ; [esp + c] count
+
+        cmp         dword ptr [esp + 0Ch], 0        ; check for a zero-length copy
+        jz          NothingToCopy
+
+        mov         ecx, [esp + 4]  ; ecx <- dest
+        mov         edx, [esp + 8]  ; edx <- src
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsDestAVLocation
+        cmp         byte ptr [ecx], 0
+ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsSrcAVLocation
+        cmp         byte ptr [edx], 0
+
+        ; tail-call to plain-old-memcpy
+        jmp         _memcpy
+
+NothingToCopy:
+        mov         eax, [esp + 4]                  ; return dest
+        ret
+
+_RhpCopyMultibyteNoGCRefs ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* __cdecl RhpCopyMultibyte(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;;
+_RhpCopyMultibyte PROC PUBLIC
+
+        ;    #locals, num_params, prolog bytes, #regs saved, use ebp, frame type (0 == FRAME_FPO)
+        .FPO(      0,          3,            0,           0,       0,          0)
+
+        ; [esp + 0] return address
+        ; [esp + 4] dest
+        ; [esp + 8] src
+        ; [esp + c] count
+
+        cmp         dword ptr [esp + 0Ch], 0        ; check for a zero-length copy
+        jz          NothingToCopy
+
+        mov         ecx, [esp + 4]  ; ecx <- dest
+        mov         edx, [esp + 8]  ; edx <- src
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+ALTERNATE_ENTRY RhpCopyMultibyteDestAVLocation
+        cmp         byte ptr [ecx], 0
+ALTERNATE_ENTRY RhpCopyMultibyteSrcAVLocation
+        cmp         byte ptr [edx], 0
+
+        ; tail-call to the GC-safe memcpy implementation
+        ; NOTE: this is also a __cdecl function
+        jmp         _memcpyGCRefs
+
+NothingToCopy:
+        mov         eax, [esp + 4]                  ; return dest
+        ret
+
+_RhpCopyMultibyte ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* __cdecl RhpCopyMultibyteWithWriteBarrier(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;; Runs a card table update via RhpBulkWriteBarrier after the copy
+;;
+_RhpCopyMultibyteWithWriteBarrier PROC PUBLIC
+
+        ;    #locals, num_params, prolog bytes, #regs saved, use ebp, frame type (0 == FRAME_FPO)
+        .FPO(      0,          3,            0,           0,       0,          0)
+
+        ; [esp + 0] return address
+        ; [esp + 4] dest
+        ; [esp + 8] src
+        ; [esp + c] count
+
+        cmp         dword ptr [esp + 0Ch], 0        ; check for a zero-length copy
+        jz          NothingToCopy
+
+        mov         ecx, [esp + 4]  ; ecx <- dest
+        mov         edx, [esp + 8]  ; edx <- src
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierDestAVLocation
+        cmp         byte ptr [ecx], 0
+ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierSrcAVLocation
+        cmp         byte ptr [edx], 0
+
+        ; tail-call to the GC-safe memcpy implementation
+        ; NOTE: this is also a __cdecl function
+        jmp         _memcpyGCRefsWithWriteBarrier
+
+NothingToCopy:
+        mov         eax, [esp + 4]                  ; return dest
+        ret
+
+_RhpCopyMultibyteWithWriteBarrier ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; void* __cdecl RhpCopyAnyWithWriteBarrier(void*, void*, size_t)
+;;
+;; The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where
+;; the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch
+;; it to managed code.
+;; Runs a card table update via RhpBulkWriteBarrier after the copy if it contained GC pointers
+;;
+_RhpCopyAnyWithWriteBarrier PROC PUBLIC
+
+        ;    #locals, num_params, prolog bytes, #regs saved, use ebp, frame type (0 == FRAME_FPO)
+        .FPO(      0,          3,            0,           0,       0,          0)
+
+        ; [esp + 0] return address
+        ; [esp + 4] dest
+        ; [esp + 8] src
+        ; [esp + c] count
+
+        cmp         dword ptr [esp + 0Ch], 0        ; check for a zero-length copy
+        jz          NothingToCopy
+
+        mov         ecx, [esp + 4]  ; ecx <- dest
+        mov         edx, [esp + 8]  ; edx <- src
+
+        ; Now check the dest and src pointers.  If they AV, the EH subsystem will recognize the address of the AV,
+        ; unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be 
+        ; translated to a managed exception as usual.
+ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierDestAVLocation
+        cmp         byte ptr [ecx], 0
+ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierSrcAVLocation
+        cmp         byte ptr [edx], 0
+
+        ; tail-call to the GC-safe memcpy implementation
+        ; NOTE: this is also a __cdecl function
+        jmp         _memcpyAnyWithWriteBarrier
+
+NothingToCopy:
+        mov         eax, [esp + 4]                  ; return dest
+        ret
+
+_RhpCopyAnyWithWriteBarrier ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; The following helper will access ("probe") a word on each page of the stack
+; starting with the page right beneath esp down to the one pointed to by eax.
+; The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame.
+; The call to the helper will be emitted by JIT in the function prolog when large (larger than 0x3000 bytes) stack frame is required.
+;
+; NOTE: this helper will modify a value of esp and must establish the frame pointer.
+PAGE_SIZE equ 1000h
+
+_RhpStackProbe PROC public
+    ; On entry:
+    ;   eax - the lowest address of the stack frame being allocated (i.e. [InitialSp - FrameSize])
+    ;
+    ; NOTE: this helper will probe at least one page below the one pointed by esp.
+    push    ebp
+    mov     ebp, esp
+
+    and     esp, -PAGE_SIZE      ; esp points to the **lowest address** on the last probed page
+                                 ; This is done to make the loop end condition simpler.
+ProbeLoop:
+    sub     esp, PAGE_SIZE       ; esp points to the lowest address of the **next page** to probe
+    test    [esp], eax           ; esp points to the lowest address on the **last probed** page
+    cmp     esp, eax
+    jg      ProbeLoop            ; if esp > eax, then we need to probe at least one more page.
+
+    mov     esp, ebp
+    pop     ebp
+    ret
+
+_RhpStackProbe ENDP
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/PInvoke.S b/src/coreclr/src/nativeaot/Runtime/i386/PInvoke.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/PInvoke.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/PInvoke.asm b/src/coreclr/src/nativeaot/Runtime/i386/PInvoke.asm
new file mode 100644
index 0000000000000..947d3c1f26277
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/PInvoke.asm
@@ -0,0 +1,219 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+        .586
+        .model  flat
+        option  casemap:none
+        .code
+
+
+include AsmMacros.inc
+
+extern RhpReversePInvokeBadTransition : proc
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpWaitForSuspend -- rare path for RhpPInvoke and RhpReversePInvokeReturn
+;;
+;;
+;; INPUT: none
+;;
+;; TRASHES: none
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+_RhpWaitForSuspend proc public
+        push        ebp
+        mov         ebp, esp
+        push        eax
+        push        ecx
+        push        edx
+
+        call        RhpWaitForSuspend2
+        
+        pop         edx
+        pop         ecx
+        pop         eax
+        pop         ebp
+        ret
+_RhpWaitForSuspend endp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpWaitForGCNoAbort
+;;
+;;
+;; INPUT: ECX: transition frame
+;;
+;; OUTPUT: 
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+_RhpWaitForGCNoAbort proc public
+        push        ebp
+        mov         ebp, esp
+        push        eax
+        push        edx
+        push        ebx
+        push        esi
+
+        mov         esi, [ecx + OFFSETOF__PInvokeTransitionFrame__m_pThread]
+
+        test        dword ptr [esi + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc
+        jnz         Done
+
+        ; passing transition frame pointer in ecx
+        call        RhpWaitForGC2
+
+Done:
+        pop         esi
+        pop         ebx
+        pop         edx
+        pop         eax
+        pop         ebp
+        ret
+_RhpWaitForGCNoAbort endp
+
+RhpThrowHwEx equ @RhpThrowHwEx@0
+EXTERN RhpThrowHwEx : PROC
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpWaitForGC
+;;
+;;
+;; INPUT: ECX: transition frame
+;;
+;; OUTPUT: 
+;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+_RhpWaitForGC proc public
+        push        ebp
+        mov         ebp, esp
+        push        ebx
+
+        mov         ebx, ecx
+        test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
+        jz          NoWait
+
+        call        _RhpWaitForGCNoAbort
+NoWait:
+        test        [RhpTrapThreads], TrapThreadsFlags_AbortInProgress
+        jz          Done
+        test        dword ptr [ebx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT
+        jz          Done
+
+        mov         ecx, STATUS_REDHAWK_THREAD_ABORT
+        pop         ebx
+        pop         ebp
+        pop         edx                 ; return address as exception RIP
+        jmp         RhpThrowHwEx        ; Throw the ThreadAbortException as a special kind of hardware exception
+Done:
+        pop         ebx
+        pop         ebp
+        ret
+_RhpWaitForGC endp
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpReversePInvoke
+;;
+;; IN:  EAX: address of reverse pinvoke frame
+;;                  0: save slot for previous M->U transition frame
+;;                  4: save slot for thread pointer to avoid re-calc in epilog sequence
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC RhpReversePInvoke, 0
+        push        ecx         ; save arg regs -- we could omit this if we knew the calling convention wasn't fastcall.
+        push        edx         ; ...
+
+        ;; edx = GetThread(), TRASHES ecx
+        INLINE_GETTHREAD edx, ecx
+        mov         [eax + 4], edx          ; save thread pointer for RhpReversePInvokeReturn
+        
+        ; edx = thread
+        ; eax = prev save slot
+        ; ecx = scratch
+
+        test        dword ptr [edx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_Attached
+        jz          AttachThread
+
+ThreadAttached:
+        ;;
+        ;; Check for the correct mode.  This is accessible via various odd things that we cannot completely 
+        ;; prevent such as :
+        ;;     1) Registering a reverse pinvoke entrypoint as a vectored exception handler
+        ;;     2) Performing a managed delegate invoke on a reverse pinvoke delegate.
+        ;;
+        cmp         dword ptr [edx + OFFSETOF__Thread__m_pTransitionFrame], 0
+        je          CheckBadTransition
+
+        ; Save previous TransitionFrame prior to making the mode transition so that it is always valid 
+        ; whenever we might attempt to hijack this thread.
+        mov         ecx, [edx + OFFSETOF__Thread__m_pTransitionFrame]
+        mov         [eax], ecx
+
+ReverseRetry:
+        mov         dword ptr [edx + OFFSETOF__Thread__m_pTransitionFrame], 0
+        test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
+        jnz         ReverseTrapReturningThread
+
+AllDone:
+        pop         edx         ; restore arg reg
+        pop         ecx         ; restore arg reg
+        ret
+        
+CheckBadTransition:
+        ;; Allow 'bad transitions' in when the TSF_DoNotTriggerGc mode is set.  This allows us to have 
+        ;; [UnmanagedCallersOnly] methods that are called via the "restricted GC callouts" as well as from native,
+        ;; which is necessary because the methods are CCW vtable methods on interfaces passed to native.
+        test        dword ptr [edx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc
+        jz          BadTransition
+
+        ;; zero-out our 'previous transition frame' save slot
+        mov         dword ptr [eax], 0
+
+        ;; nothing more to do
+        jmp         AllDone
+
+ReverseTrapReturningThread:
+        ;; put the previous frame back (sets us back to preemptive mode)
+        mov         ecx, [eax]
+        mov         [edx + OFFSETOF__Thread__m_pTransitionFrame], ecx
+
+AttachThread:
+        mov         ecx, eax                    ; arg <- address of reverse pinvoke frame
+        call        RhpReversePInvokeAttachOrTrapThread2
+        jmp         AllDone
+
+BadTransition:
+        pop         edx
+        pop         ecx
+        mov         ecx, dword ptr [esp]        ; arg <- return address
+        jmp         RhpReversePInvokeBadTransition
+FASTCALL_ENDFUNC
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; RhpReversePInvokeReturn
+;;
+;; IN:  ECX: address of reverse pinvoke frame
+;;                  0: save slot for previous M->U transition frame
+;;                  4: save slot for thread pointer to avoid re-calc in epilog sequence
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+FASTCALL_FUNC RhpReversePInvokeReturn, 0
+        push        edx         ; save return value
+
+        mov         edx, [ecx + 4]  ; get Thread pointer
+        mov         ecx, [ecx + 0]  ; get previous M->U transition frame
+
+        mov         [edx + OFFSETOF__Thread__m_pTransitionFrame], ecx
+        test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
+        pop         edx         ; restore return value
+        jnz         _RhpWaitForSuspend
+        ret
+
+FASTCALL_ENDFUNC
+
+
+        end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/StubDispatch.S b/src/coreclr/src/nativeaot/Runtime/i386/StubDispatch.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/StubDispatch.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/StubDispatch.asm b/src/coreclr/src/nativeaot/Runtime/i386/StubDispatch.asm
new file mode 100644
index 0000000000000..8b1ba538eb169
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/StubDispatch.asm
@@ -0,0 +1,133 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+        .586
+        .model  flat
+        option  casemap:none
+        .code
+
+
+include AsmMacros.inc
+
+
+ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+EXTERN RhpCidResolve : PROC
+EXTERN _RhpUniversalTransition_DebugStepTailCall@0 : PROC
+
+
+;; Macro that generates code to check a single cache entry.
+CHECK_CACHE_ENTRY macro entry
+NextLabel textequ @CatStr( Attempt, %entry+1 )
+        cmp     ebx, [eax + (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 8))]
+        jne     @F
+        pop     ebx
+        jmp     dword ptr [eax + (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 8) + 4)]
+@@:
+endm
+
+
+;; Macro that generates a stub consuming a cache with the given number of entries.
+DEFINE_INTERFACE_DISPATCH_STUB macro entries
+
+StubName textequ @CatStr( _RhpInterfaceDispatch, entries )
+
+    StubName proc public
+
+        ;; Check the instance here to catch null references. We're going to touch it again below (to cache
+        ;; the EEType pointer), but that's after we've pushed ebx below, and taking an A/V there will
+        ;; mess up the stack trace for debugging. We also don't have a spare scratch register (eax holds
+        ;; the cache pointer and the push of ebx below is precisely so we can access a second register
+        ;; to hold the EEType pointer).
+        test    ecx, ecx
+        je      RhpInterfaceDispatchNullReference
+
+        ;; eax currently contains the indirection cell address. We need to update it to point to the cache
+        ;; block instead.
+        mov     eax, [eax + OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        ;; Cache pointer is already loaded in the only scratch register we have so far, eax. We need
+        ;; another scratch register to hold the instance type so save the value of ebx and use that.
+        push    ebx
+
+        ;; Load the EEType from the object instance in ebx.
+        mov     ebx, [ecx]
+
+CurrentEntry = 0
+    while CurrentEntry lt entries
+        CHECK_CACHE_ENTRY %CurrentEntry
+CurrentEntry = CurrentEntry + 1
+    endm
+
+        ;; eax currently contains the cache block. We need to point it back to the 
+        ;; indirection cell using the back pointer in the cache block
+        mov     eax, [eax + OFFSETOF__InterfaceDispatchCache__m_pCell]
+        pop     ebx
+        jmp     RhpInterfaceDispatchSlow
+
+    StubName endp
+
+    endm ;; DEFINE_INTERFACE_DISPATCH_STUB
+
+
+;; Define all the stub routines we currently need.
+DEFINE_INTERFACE_DISPATCH_STUB 1
+DEFINE_INTERFACE_DISPATCH_STUB 2
+DEFINE_INTERFACE_DISPATCH_STUB 4
+DEFINE_INTERFACE_DISPATCH_STUB 8
+DEFINE_INTERFACE_DISPATCH_STUB 16
+DEFINE_INTERFACE_DISPATCH_STUB 32
+DEFINE_INTERFACE_DISPATCH_STUB 64
+
+;; Shared out of line helper used on cache misses.
+RhpInterfaceDispatchSlow proc
+;; eax points at InterfaceDispatchCell
+
+        ;; Setup call to Universal Transition thunk
+        push        ebp
+        mov         ebp, esp
+        push        eax   ; First argument (Interface Dispatch Cell)
+        lea         eax, [RhpCidResolve]
+        push        eax ; Second argument (RhpCidResolve)
+
+        ;; Jump to Universal Transition
+        jmp         _RhpUniversalTransition_DebugStepTailCall@0
+RhpInterfaceDispatchSlow endp
+
+;; Out of line helper used when we try to interface dispatch on a null pointer. Sets up the stack so the
+;; debugger gives a reasonable stack trace.
+RhpInterfaceDispatchNullReference proc public
+        push    ebp
+        mov     ebp, esp
+        mov     ebx, [ecx]  ;; This should A/V
+        int     3
+RhpInterfaceDispatchNullReference endp
+
+;; Stub dispatch routine for dispatch to a vtable slot
+_RhpVTableOffsetDispatch proc public
+        ;; eax currently contains the indirection cell address. We need to update it to point to the vtable offset (which is in the m_pCache field)
+        mov     eax, [eax + OFFSETOF__InterfaceDispatchCell__m_pCache]
+
+        ;; add the vtable offset to the EEType pointer 
+        add     eax, [ecx]
+
+        ;; Load the target address of the vtable into eax
+        mov     eax, [eax]
+
+        ;; tail-jump to the target
+        jmp     eax
+_RhpVTableOffsetDispatch endp
+
+
+;; Initial dispatch on an interface when we don't have a cache yet.
+_RhpInitialInterfaceDispatch proc public
+    ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch
+
+        jmp RhpInterfaceDispatchSlow
+
+_RhpInitialInterfaceDispatch endp
+
+
+endif ;; FEATURE_CACHED_INTERFACE_DISPATCH
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/ThunkPoolThunks.asm b/src/coreclr/src/nativeaot/Runtime/i386/ThunkPoolThunks.asm
new file mode 100644
index 0000000000000..868f48837597f
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/ThunkPoolThunks.asm
@@ -0,0 +1,297 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+.586
+.model  flat
+option  casemap:none
+.code
+
+include AsmMacros.inc
+
+;; -----------------------------------------------------------------------------------------------------------
+;; standard macros
+;; -----------------------------------------------------------------------------------------------------------
+LEAF_ENTRY macro Name, Section
+    Section segment para 'CODE'
+    public  Name
+    Name    proc
+endm
+
+NAMED_LEAF_ENTRY macro Name, Section, SectionAlias
+    Section segment para alias(SectionAlias) 'CODE'
+    public  Name
+    Name    proc
+endm
+
+LEAF_END macro Name, Section
+    Name    endp
+    Section ends
+endm
+
+NAMED_READONLY_DATA_SECTION macro Section, SectionAlias
+    Section segment para alias(SectionAlias) read 'DATA'
+    DD 0
+    Section ends
+endm
+
+NAMED_READWRITE_DATA_SECTION macro Section, SectionAlias
+    Section segment para alias(SectionAlias) read write 'DATA'
+    DD 0
+    Section ends
+endm
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  STUBS & DATA SECTIONS  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+THUNK_CODESIZE                      equ 20h     ;; 5-byte call, 1 byte pop, 6-byte lea, 6-byte jmp, 14 bytes of padding
+THUNK_DATASIZE                      equ 08h     ;; 2 dwords
+
+THUNK_POOL_NUM_THUNKS_PER_PAGE      equ 078h    ;; 120 thunks per page
+
+PAGE_SIZE                           equ 01000h  ;; 4K
+POINTER_SIZE                        equ 04h
+
+
+GET_CURRENT_IP macro
+        ALIGN   10h                             ;; make sure we align to 16-byte boundary for CFG table
+        call    @F
+    @@: pop     eax
+endm
+
+LOAD_DATA_ADDRESS macro groupIndex, index
+        ;; start                            : eax points to current instruction of the current thunk
+        ;; set eax to begining of data page : eax <- [eax - (size of the call instruction + (THUNK_CODESIZE * current thunk's index)) + PAGE_SIZE]
+        ;; fix offset of the data           : eax <- eax + (THUNK_DATASIZE * current thunk's index)
+        lea     eax,[eax - (5 + groupIndex * THUNK_CODESIZE * 10 + THUNK_CODESIZE * index) + PAGE_SIZE + (groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * index)]
+endm
+
+JUMP_TO_COMMON macro groupIndex, index
+        ;; start                                   : eax points to current thunk's data block
+        ;; re-point eax to begining of data page   : eax <- [eax - (THUNK_DATASIZE * current thunk's index)]
+        ;; jump to the location pointed at by the last dword in the data page : jump [eax + PAGE_SIZE - POINTER_SIZE]
+        jmp     dword ptr[eax - (groupIndex * THUNK_DATASIZE * 10 + THUNK_DATASIZE * index) + PAGE_SIZE - POINTER_SIZE]
+endm
+
+TenThunks macro groupIndex
+        ;; Each thunk will load the address of its corresponding data (from the page that immediately follows)
+        ;; and call a common stub. The address of the common stub is setup by the caller (last dword
+        ;; in the thunks data section) depending on the 'kind' of thunks needed (interop, fat function pointers, etc...)
+        
+        ;; Each data block used by a thunk consists of two dword values:
+        ;;      - Context: some value given to the thunk as context (passed in eax). Example for fat-fptrs: context = generic dictionary
+        ;;      - Target : target code that the thunk eventually jumps to.
+
+        GET_CURRENT_IP
+        LOAD_DATA_ADDRESS groupIndex,0
+        JUMP_TO_COMMON    groupIndex,0
+
+        GET_CURRENT_IP
+        LOAD_DATA_ADDRESS groupIndex,1
+        JUMP_TO_COMMON    groupIndex,1
+
+        GET_CURRENT_IP
+        LOAD_DATA_ADDRESS groupIndex,2
+        JUMP_TO_COMMON    groupIndex,2
+
+        GET_CURRENT_IP
+        LOAD_DATA_ADDRESS groupIndex,3
+        JUMP_TO_COMMON    groupIndex,3
+
+        GET_CURRENT_IP
+        LOAD_DATA_ADDRESS groupIndex,4
+        JUMP_TO_COMMON    groupIndex,4
+
+        GET_CURRENT_IP
+        LOAD_DATA_ADDRESS groupIndex,5
+        JUMP_TO_COMMON    groupIndex,5
+
+        GET_CURRENT_IP
+        LOAD_DATA_ADDRESS groupIndex,6
+        JUMP_TO_COMMON    groupIndex,6
+
+        GET_CURRENT_IP
+        LOAD_DATA_ADDRESS groupIndex,7
+        JUMP_TO_COMMON    groupIndex,7
+
+        GET_CURRENT_IP
+        LOAD_DATA_ADDRESS groupIndex,8
+        JUMP_TO_COMMON    groupIndex,8
+
+        GET_CURRENT_IP
+        LOAD_DATA_ADDRESS groupIndex,9
+        JUMP_TO_COMMON    groupIndex,9
+endm
+
+THUNKS_PAGE_BLOCK macro
+        TenThunks 0
+        TenThunks 1
+        TenThunks 2
+        TenThunks 3
+        TenThunks 4
+        TenThunks 5 
+        TenThunks 6 
+        TenThunks 7 
+        TenThunks 8 
+        TenThunks 9 
+        TenThunks 10 
+        TenThunks 11 
+endm
+
+;;
+;; The first thunks section should be 64K aligned because it can get
+;; mapped multiple  times in memory, and mapping works on allocation
+;; granularity boundaries (we don't want to map more than what we need)
+;;
+;; The easiest way to do so is by having the thunks section at the 
+;; first 64K aligned virtual address in the binary. We provide a section
+;; layout file to the linker to tell it how to layout the thunks sections
+;; that we care about. (ndp\rh\src\runtime\DLLs\app\mrt100_app_sectionlayout.txt)
+;;
+;; The PE spec says images cannot have gaps between sections (other 
+;; than what is required by the section alignment value in the header),
+;; therefore we need a couple of padding data sections (otherwise the
+;; OS will not load the image).
+;;
+
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment0, ".pad0"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment1, ".pad1"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment2, ".pad2"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment3, ".pad3"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment4, ".pad4"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment5, ".pad5"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment6, ".pad6"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment7, ".pad7"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment8, ".pad8"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment9, ".pad9"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment10, ".pad10"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment11, ".pad11"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment12, ".pad12"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment13, ".pad13"
+NAMED_READONLY_DATA_SECTION PaddingFor64KAlignment14, ".pad14"
+
+;;
+;; Thunk Stubs
+;; NOTE: Keep number of blocks in sync with macro/constant named 'NUM_THUNK_BLOCKS' in:
+;;      - ndp\FxCore\src\System.Private.CoreLib\System\Runtime\InteropServices\ThunkPool.cs
+;;      - ndp\rh\src\tools\rhbind\zapimage.h
+;;
+NAMED_LEAF_ENTRY ThunkPool, TKS0, ".tks0"
+    THUNKS_PAGE_BLOCK
+LEAF_END ThunkPool, TKS0
+
+NAMED_READWRITE_DATA_SECTION ThunkData0, ".tkd0"
+
+NAMED_LEAF_ENTRY ThunkPool1, TKS1, ".tks1"
+    THUNKS_PAGE_BLOCK
+LEAF_END ThunkPool1, TKS1
+
+NAMED_READWRITE_DATA_SECTION ThunkData1, ".tkd1"
+
+NAMED_LEAF_ENTRY ThunkPool2, TKS2, ".tks2"
+    THUNKS_PAGE_BLOCK
+LEAF_END ThunkPool2, TKS2
+
+NAMED_READWRITE_DATA_SECTION ThunkData2, ".tkd2"
+
+NAMED_LEAF_ENTRY ThunkPool3, TKS3, ".tks3"
+    THUNKS_PAGE_BLOCK
+LEAF_END ThunkPool3, TKS3
+
+NAMED_READWRITE_DATA_SECTION ThunkData3, ".tkd3"
+
+NAMED_LEAF_ENTRY ThunkPool4, TKS4, ".tks4"
+    THUNKS_PAGE_BLOCK
+LEAF_END ThunkPool4, TKS4
+
+NAMED_READWRITE_DATA_SECTION ThunkData4, ".tkd4"
+
+NAMED_LEAF_ENTRY ThunkPool5, TKS5, ".tks5"
+    THUNKS_PAGE_BLOCK
+LEAF_END ThunkPool5, TKS5
+
+NAMED_READWRITE_DATA_SECTION ThunkData5, ".tkd5"
+
+NAMED_LEAF_ENTRY ThunkPool6, TKS6, ".tks6"
+    THUNKS_PAGE_BLOCK
+LEAF_END ThunkPool6, TKS6
+
+NAMED_READWRITE_DATA_SECTION ThunkData6, ".tkd6"
+
+NAMED_LEAF_ENTRY ThunkPool7, TKS7, ".tks7"
+    THUNKS_PAGE_BLOCK
+LEAF_END ThunkPool7, TKS7
+
+NAMED_READWRITE_DATA_SECTION ThunkData7, ".tkd7"
+
+
+;;
+;; IntPtr RhpGetThunksBase()
+;;
+FASTCALL_FUNC RhpGetThunksBase, 0
+        ;; Return the address of the first thunk pool to the caller (this is really the base address)
+        lea     eax, [ThunkPool]
+        ret
+FASTCALL_ENDFUNC
+
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; General Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;
+;; int RhpGetNumThunksPerBlock()
+;;
+FASTCALL_FUNC RhpGetNumThunksPerBlock, 0
+        mov     eax, THUNK_POOL_NUM_THUNKS_PER_PAGE
+        ret   
+FASTCALL_ENDFUNC
+
+;;
+;; int RhpGetThunkSize()
+;;
+FASTCALL_FUNC RhpGetThunkSize, 0
+        mov     eax, THUNK_CODESIZE
+        ret   
+FASTCALL_ENDFUNC
+
+;;
+;; int RhpGetNumThunkBlocksPerMapping()
+;;
+FASTCALL_FUNC RhpGetNumThunkBlocksPerMapping, 0
+        mov     eax, 8
+        ret   
+FASTCALL_ENDFUNC
+
+;;
+;; int RhpGetThunkBlockSize
+;;
+FASTCALL_FUNC RhpGetThunkBlockSize, 0
+        mov     eax, PAGE_SIZE * 2
+        ret
+FASTCALL_ENDFUNC
+
+;; 
+;; IntPtr RhpGetThunkDataBlockAddress(IntPtr thunkStubAddress)
+;; 
+FASTCALL_FUNC RhpGetThunkDataBlockAddress, 4
+        mov     eax, ecx
+        mov     ecx, PAGE_SIZE - 1
+        not     ecx
+        and     eax, ecx
+        add     eax, PAGE_SIZE
+        ret
+FASTCALL_ENDFUNC
+
+;; 
+;; IntPtr RhpGetThunkStubsBlockAddress(IntPtr thunkDataAddress)
+;; 
+FASTCALL_FUNC RhpGetThunkStubsBlockAddress, 4
+        mov     eax, ecx
+        mov     ecx, PAGE_SIZE - 1
+        not     ecx
+        and     eax, ecx
+        sub     eax, PAGE_SIZE
+        ret
+FASTCALL_ENDFUNC
+
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/UniversalTransition.S b/src/coreclr/src/nativeaot/Runtime/i386/UniversalTransition.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/UniversalTransition.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/UniversalTransition.asm b/src/coreclr/src/nativeaot/Runtime/i386/UniversalTransition.asm
new file mode 100644
index 0000000000000..b425c7d17d80c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/UniversalTransition.asm
@@ -0,0 +1,101 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+        .586
+        .model  flat
+        option  casemap:none
+        .code
+
+include AsmMacros.inc
+
+ifdef FEATURE_DYNAMIC_CODE
+
+;;
+;; Defines an assembly thunk used to make a transition from managed code to a callee,
+;; then (based on the return value from the callee), either returning or jumping to
+;; a new location while preserving the input arguments.  The usage of this thunk also
+;; ensures arguments passed are properly reported.
+;;
+;; TODO: This code currently only tailcalls, and does not return.
+;;
+;; Inputs:
+;;      ecx, edx, stack space three pops down: arguments as normal
+;;       first register sized fields on the stack is the location of the target code 
+;;       the UniversalTransitionThunk will call
+;;       second register sized field on the stack is the parameter to the target function
+;;       followed by the return address of the whole method. (This method cannot be called
+;;       via a call instruction, it must be jumped to.) The fake entrypoint is in place to 
+;;       convince the stack walker this is a normal framed function.
+;;
+;;  NOTE! FOR CORRECTNESS THIS FUNCTION REQUIRES THAT ALL NON-LEAF MANAGED FUNCTIONS HAVE
+;;        FRAME POINTERS, OR THE STACK WALKER CAN'T STACKWALK OUT OF HERE
+;;
+
+;
+; Frame layout is:
+;
+;   {StackPassedArgs}                           ChildSP+018     CallerSP+000
+;   {CallerRetaddr}                             ChildSP+014     CallerSP-004
+;   {CallerEBP}                                 ChildSP+010     CallerSP-008
+;   {ReturnBlock (0x8 bytes)}                   ChildSP+008     CallerSP-010
+;    -- On input (i.e., when control jumps to RhpUniversalTransition), the low 4 bytes of
+;       the ReturnBlock area holds the address of the callee and the high 4 bytes holds the
+;       extra argument to pass to the callee.
+;   {IntArgRegs (edx,ecx) (0x8 bytes)}          ChildSP+000     CallerSP-018
+;   {CalleeRetaddr}                             ChildSP-004     CallerSP-01c
+;
+; NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure
+; must be updated as well.
+;
+; NOTE: The callee receives a pointer to the base of the pushed IntArgRegs, and the callee
+; has knowledge of the exact layout of the entire frame.
+;
+; NOTE: The stack walker guarantees that conservative GC reporting will be applied to
+; everything between the base of the IntArgRegs and the top of the StackPassedArgs.
+;
+
+UNIVERSAL_TRANSITION macro FunctionName
+
+FASTCALL_FUNC Rhp&FunctionName&_FAKE_ENTRY, 0        
+        ; Set up an ebp frame
+        push        ebp
+        mov         ebp, esp
+        push eax
+        push eax
+ALTERNATE_ENTRY Rhp&FunctionName&@0
+        push ecx
+        push edx
+
+        ;
+        ; Call out to the target, while storing and reporting arguments to the GC.
+        ;
+        mov  eax, [ebp-8]    ; Get the address of the callee 
+        mov  edx, [ebp-4]    ; Get the extra argument to pass to the callee
+        lea  ecx, [ebp-10h]  ; Get pointer to edx value pushed above
+        call eax
+
+        EXPORT_POINTER_TO_ADDRESS _PointerToReturnFrom&FunctionName
+
+        ; We cannot make the label public as that tricks DIA stackwalker into thinking
+        ; it's the beginning of a method. For this reason we export an auxiliary variable
+        ; holding the address instead.
+
+        pop edx
+        pop ecx
+        add esp, 8
+        pop ebp
+        jmp eax
+
+FASTCALL_ENDFUNC
+
+        endm
+        
+        ; To enable proper step-in behavior in the debugger, we need to have two instances
+        ; of the thunk. For the first one, the debugger steps into the call in the function, 
+        ; for the other, it steps over it.
+        UNIVERSAL_TRANSITION UniversalTransition
+        UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall
+
+endif
+
+end
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/WriteBarriers.S b/src/coreclr/src/nativeaot/Runtime/i386/WriteBarriers.S
new file mode 100644
index 0000000000000..876f2dfbcb80d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/WriteBarriers.S
@@ -0,0 +1,4 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// TODO: Implement
diff --git a/src/coreclr/src/nativeaot/Runtime/i386/WriteBarriers.asm b/src/coreclr/src/nativeaot/Runtime/i386/WriteBarriers.asm
new file mode 100644
index 0000000000000..0f6b3f1ba8ea5
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/i386/WriteBarriers.asm
@@ -0,0 +1,266 @@
+;; Licensed to the .NET Foundation under one or more agreements.
+;; The .NET Foundation licenses this file to you under the MIT license.
+
+;;
+;; Define the helpers used to implement the write barrier required when writing an object reference into a
+;; location residing on the GC heap. Such write barriers allow the GC to optimize which objects in
+;; non-ephemeral generations need to be scanned for references to ephemeral objects during an ephemeral
+;; collection.
+;;
+
+    .xmm
+    .model  flat
+    option  casemap:none
+    .code
+
+include AsmMacros.inc
+
+;; Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used
+;; during garbage collections to verify that object references where never written to the heap without using a
+;; write barrier. Note that we're potentially racing to update the shadow heap while other threads are writing
+;; new references to the real heap. Since this can't be solved perfectly without critical sections around the
+;; entire update process, we instead update the shadow location and then re-check the real location (as two
+;; ordered operations) and if there is a disparity we'll re-write the shadow location with a special value
+;; (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC
+;; time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the
+;; shadow heap contains only valid copies of real heap values or INVALIDGCVALUE.
+ifdef WRITE_BARRIER_CHECK  
+
+g_GCShadow      TEXTEQU <?g_GCShadow@@3PAEA>
+g_GCShadowEnd   TEXTEQU <?g_GCShadowEnd@@3PAEA>
+INVALIDGCVALUE  EQU 0CCCCCCCDh
+
+EXTERN  g_GCShadow : DWORD
+EXTERN  g_GCShadowEnd : DWORD
+
+UPDATE_GC_SHADOW macro BASENAME, DESTREG, REFREG
+
+    ;; If g_GCShadow is 0, don't perform the check.
+    cmp     g_GCShadow, 0
+    je      &BASENAME&_UpdateShadowHeap_Done_&DESTREG&_&REFREG&
+
+    ;; Save DESTREG since we're about to modify it (and we need the original value both within the macro and
+    ;; once we exit the macro).
+    push    DESTREG
+
+    ;; Transform DESTREG into the equivalent address in the shadow heap.
+    sub     DESTREG, G_LOWEST_ADDRESS
+    jb      &BASENAME&_UpdateShadowHeap_PopThenDone_&DESTREG&_&REFREG&
+    add     DESTREG, [g_GCShadow]
+    cmp     DESTREG, [g_GCShadowEnd]
+    ja      &BASENAME&_UpdateShadowHeap_PopThenDone_&DESTREG&_&REFREG&
+
+    ;; Update the shadow heap.
+    mov     [DESTREG], REFREG
+
+    ;; Now check that the real heap location still contains the value we just wrote into the shadow heap. This
+    ;; read must be strongly ordered wrt to the previous write to prevent race conditions. We also need to
+    ;; recover the old value of DESTREG for the comparison so use an xchg instruction (which has an implicit lock
+    ;; prefix).
+    xchg    [esp], DESTREG
+    cmp     [DESTREG], REFREG
+    jne     &BASENAME&_UpdateShadowHeap_Invalidate_&DESTREG&_&REFREG&
+
+    ;; The original DESTREG value is now restored but the stack has a value (the shadow version of the
+    ;; location) pushed. Need to discard this push before we are done.
+    add     esp, 4
+    jmp     &BASENAME&_UpdateShadowHeap_Done_&DESTREG&_&REFREG&
+
+&BASENAME&_UpdateShadowHeap_Invalidate_&DESTREG&_&REFREG&:
+    ;; Someone went and updated the real heap. We need to invalidate the shadow location since we can't
+    ;; guarantee whose shadow update won.
+
+    ;; Retrieve shadow location from the stack and restore original DESTREG to the stack. This is an
+    ;; additional memory barrier we don't require but it's on the rare path and x86 doesn't have an xchg
+    ;; variant that doesn't implicitly specify the lock prefix.
+    xchg    [esp], DESTREG
+    mov     dword ptr [DESTREG], INVALIDGCVALUE
+
+&BASENAME&_UpdateShadowHeap_PopThenDone_&DESTREG&_&REFREG&:
+    ;; Restore original DESTREG value from the stack.
+    pop     DESTREG
+
+&BASENAME&_UpdateShadowHeap_Done_&DESTREG&_&REFREG&:
+endm
+
+else ; WRITE_BARRIER_CHECK
+
+UPDATE_GC_SHADOW macro BASENAME, DESTREG, REFREG
+endm
+
+endif ; WRITE_BARRIER_CHECK
+
+;; There are several different helpers used depending on which register holds the object reference. Since all
+;; the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the
+;; name of the register that points to the location to be updated and the name of the register that holds the
+;; object reference (this should be in upper case as it's used in the definition of the name of the helper).
+DEFINE_WRITE_BARRIER macro DESTREG, REFREG
+
+;; Define a helper with a name of the form RhpAssignRefEAX etc. (along with suitable calling standard
+;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into that
+;; location is in one of the other general registers determined by the value of REFREG.
+FASTCALL_FUNC RhpAssignRef&REFREG&, 0
+
+    ;; Export the canonical write barrier under unqualified name as well
+    ifidni <REFREG>, <EDX>
+    @RhpAssignRef@0 label proc
+    PUBLIC @RhpAssignRef@0
+    ALTERNATE_ENTRY RhpAssignRefAVLocation
+    endif
+
+    ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    ;; and the card table update we may perform below.
+    mov     dword ptr [DESTREG], REFREG
+
+    ;; Update the shadow copy of the heap with the same value (if enabled).
+    UPDATE_GC_SHADOW RhpAssignRef, DESTREG, REFREG
+
+    ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    ;; (since the object won't be collected or moved by an ephemeral collection).
+    cmp     REFREG, [G_EPHEMERAL_LOW]
+    jb      WriteBarrier_NoBarrierRequired_&DESTREG&_&REFREG&
+    cmp     REFREG, [G_EPHEMERAL_HIGH]
+    jae     WriteBarrier_NoBarrierRequired_&DESTREG&_&REFREG&
+
+    ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    ;; track this write. The location address is translated into an offset in the card table bitmap. We set
+    ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    ;; the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     DESTREG, 10
+    add     DESTREG, [G_CARD_TABLE]
+    cmp     byte ptr [DESTREG], 0FFh
+    jne     WriteBarrier_UpdateCardTable_&DESTREG&_&REFREG&
+
+WriteBarrier_NoBarrierRequired_&DESTREG&_&REFREG&:
+    ret
+
+;; We get here if it's necessary to update the card table.
+WriteBarrier_UpdateCardTable_&DESTREG&_&REFREG&:
+    mov     byte ptr [DESTREG], 0FFh
+    ret
+FASTCALL_ENDFUNC
+endm
+
+RET4    macro
+    ret     4
+endm
+
+DEFINE_CHECKED_WRITE_BARRIER_CORE macro BASENAME, DESTREG, REFREG, RETINST
+
+    ;; The location being updated might not even lie in the GC heap (a handle or stack location for instance),
+    ;; in which case no write barrier is required.
+    cmp     DESTREG, [G_LOWEST_ADDRESS]
+    jb      &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG&
+    cmp     DESTREG, [G_HIGHEST_ADDRESS]
+    jae     &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG&
+
+    ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
+    ;; we're in a debug build and write barrier checking has been enabled).
+    UPDATE_GC_SHADOW BASENAME, DESTREG, REFREG
+
+    ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
+    ;; (since the object won't be collected or moved by an ephemeral collection).
+    cmp     REFREG, [G_EPHEMERAL_LOW]
+    jb      &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG&
+    cmp     REFREG, [G_EPHEMERAL_HIGH]
+    jae     &BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG&
+
+    ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must
+    ;; track this write. The location address is translated into an offset in the card table bitmap. We set
+    ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
+    ;; the byte if it hasn't already been done since writes are expensive and impact scaling.
+    shr     DESTREG, 10
+    add     DESTREG, [G_CARD_TABLE]
+    cmp     byte ptr [DESTREG], 0FFh
+    jne     &BASENAME&_UpdateCardTable_&DESTREG&_&REFREG&
+
+&BASENAME&_NoBarrierRequired_&DESTREG&_&REFREG&:
+    RETINST
+
+;; We get here if it's necessary to update the card table.
+&BASENAME&_UpdateCardTable_&DESTREG&_&REFREG&:
+    mov     byte ptr [DESTREG], 0FFh
+    RETINST
+
+endm
+
+
+;; This macro is very much like the one above except that it generates a variant of the function which also
+;; checks whether the destination is actually somewhere within the GC heap.
+DEFINE_CHECKED_WRITE_BARRIER macro DESTREG, REFREG
+
+;; Define a helper with a name of the form RhpCheckedAssignRefEAX etc. (along with suitable calling standard
+;; decoration). The location to be updated is in DESTREG. The object reference that will be assigned into
+;; that location is in one of the other general registers determined by the value of REFREG.
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen on the first instruction
+;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+FASTCALL_FUNC RhpCheckedAssignRef&REFREG&, 0
+
+    ;; Export the canonical write barrier under unqualified name as well
+    ifidni <REFREG>, <EDX>
+    @RhpCheckedAssignRef@0 label proc
+    PUBLIC @RhpCheckedAssignRef@0
+    ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation
+    endif
+
+    ;; Write the reference into the location. Note that we rely on the fact that no GC can occur between here
+    ;; and the card table update we may perform below.
+    mov     dword ptr [DESTREG], REFREG
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedAssignRef, DESTREG, REFREG, ret
+
+FASTCALL_ENDFUNC
+
+endm
+
+;; One day we might have write barriers for all the possible argument registers but for now we have
+;; just one write barrier that assumes the input register is EDX.
+DEFINE_CHECKED_WRITE_BARRIER ECX, EDX
+DEFINE_WRITE_BARRIER ECX, EDX
+
+;; Need some more write barriers to run CLR compiled MDIL on Redhawk - commented out for now
+;; DEFINE_WRITE_BARRIER EDX, EAX
+;; DEFINE_WRITE_BARRIER EDX, ECX
+;; DEFINE_WRITE_BARRIER EDX, EBX
+;; DEFINE_WRITE_BARRIER EDX, ESI
+;; DEFINE_WRITE_BARRIER EDX, EDI
+;; DEFINE_WRITE_BARRIER EDX, EBP
+
+;; DEFINE_CHECKED_WRITE_BARRIER EDX, EAX
+;; DEFINE_CHECKED_WRITE_BARRIER EDX, ECX
+;; DEFINE_CHECKED_WRITE_BARRIER EDX, EBX
+;; DEFINE_CHECKED_WRITE_BARRIER EDX, ESI
+;; DEFINE_CHECKED_WRITE_BARRIER EDX, EDI
+;; DEFINE_CHECKED_WRITE_BARRIER EDX, EBP
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at @RhpCheckedLockCmpXchgAVLocation@0
+;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+;; pass third argument in EAX
+FASTCALL_FUNC RhpCheckedLockCmpXchg
+ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation
+    lock cmpxchg    [ecx], edx
+    jne              RhpCheckedLockCmpXchg_NoBarrierRequired_ECX_EDX
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedLockCmpXchg, ECX, EDX, ret
+
+FASTCALL_ENDFUNC
+
+;; WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular:
+;; - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at @RhpCheckedXchgAVLocation@0
+;; - Function "UnwindWriteBarrierToCaller" assumes the stack contains just the pushed return address
+FASTCALL_FUNC RhpCheckedXchg, 0
+
+    ;; Setup eax with the new object for the exchange, that way it will automatically hold the correct result
+    ;; afterwards and we can leave edx unaltered ready for the GC write barrier below.
+    mov             eax, edx
+ALTERNATE_ENTRY RhpCheckedXchgAVLocation
+    xchg            [ecx], eax
+
+    DEFINE_CHECKED_WRITE_BARRIER_CORE RhpCheckedXchg, ECX, EDX, ret
+
+FASTCALL_ENDFUNC
+
+    end
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/CommonTypes.h b/src/coreclr/src/nativeaot/Runtime/inc/CommonTypes.h
new file mode 100644
index 0000000000000..43da9a17d19a8
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/CommonTypes.h
@@ -0,0 +1,53 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef __COMMON_TYPES_H__
+#define __COMMON_TYPES_H__
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <new>
+
+using std::nothrow;
+using std::size_t;
+using std::uintptr_t;
+using std::intptr_t;
+
+//
+// These type names are chosen to match the C# types
+//
+typedef int8_t              Int8;
+typedef int16_t             Int16;
+typedef int32_t             Int32;
+typedef int64_t             Int64;
+typedef uint8_t             UInt8;
+typedef uint16_t            UInt16;
+typedef uint32_t            UInt32;
+typedef uint64_t            UInt64;
+typedef intptr_t            IntNative;  // intentional deviation from C# IntPtr
+typedef uintptr_t           UIntNative; // intentional deviation from C# UIntPtr
+typedef wchar_t             WCHAR;
+typedef void *              HANDLE;
+
+typedef unsigned char       Boolean;
+#define Boolean_false 0
+#define Boolean_true 1
+
+typedef UInt32              UInt32_BOOL;    // windows 4-byte BOOL, 0 -> false, everything else -> true
+#define UInt32_FALSE        0
+#define UInt32_TRUE         1
+
+#define UInt16_MAX          ((UInt16)0xffffU)
+#define UInt16_MIN          ((UInt16)0x0000U)
+
+#define UInt32_MAX          ((UInt32)0xffffffffU)
+#define UInt32_MIN          ((UInt32)0x00000000U)
+
+#define Int32_MAX           ((Int32)0x7fffffff)
+#define Int32_MIN           ((Int32)0x80000000)
+
+#define UInt64_MAX          ((UInt64)0xffffffffffffffffUL)
+#define UInt64_MIN          ((UInt64)0x0000000000000000UL)
+
+#endif // __COMMON_TYPES_H__
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/DebugEvents.h b/src/coreclr/src/nativeaot/Runtime/inc/DebugEvents.h
new file mode 100644
index 0000000000000..657a23c2d0009
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/DebugEvents.h
@@ -0,0 +1,68 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// -----------------------------------------------------------------------------------------------------------
+// This defines the payload of debug events that are emited by Redhawk runtime and
+// received by the debugger. These payloads are referenced by 1st chance SEH exceptions
+
+
+// -----------------------------------------------------------------------------------------------------------
+// This version of holder does not have a default constructor.
+#ifndef __DEBUG_EVENTS_H_
+#define __DEBUG_EVENTS_H_
+
+// Special Exception code for RH to communicate to debugger
+// RH will raise this exception to communicate managed debug events.
+// Exception codes can't use bit 0x10000000, that's reserved by OS.
+// NOTE: This is intentionally different than CLR's exception code (0x04242420)
+// Perhaps it is because now we are in building 40? Who would know
+#define CLRDBG_NOTIFICATION_EXCEPTION_CODE  ((int) 0x04040400)
+
+// This is exception argument 0 included in debugger notification events. 
+// The debugger uses this as a sanity check.
+// This could be very volatile data that changes between builds.
+// NOTE: Again intentionally different than CLR's checksum (0x31415927)
+//       It doesn't have to be, but if anyone is manually looking at these
+//       exception payloads I am trying to make it obvious that they aren't
+//       the same.
+#define CLRDBG_EXCEPTION_DATA_CHECKSUM ((int) 0x27182818)
+
+typedef enum 
+{
+    DEBUG_EVENT_TYPE_INVALID = 0,
+    DEBUG_EVENT_TYPE_LOAD_MODULE = 1,
+    DEBUG_EVENT_TYPE_UNLOAD_MODULE = 2,
+    DEBUG_EVENT_TYPE_EXCEPTION_THROWN = 3,
+    DEBUG_EVENT_TYPE_EXCEPTION_FIRST_PASS_FRAME_ENTER = 4,
+    DEBUG_EVENT_TYPE_EXCEPTION_CATCH_HANDLER_FOUND = 5,
+    DEBUG_EVENT_TYPE_EXCEPTION_UNHANDLED = 6,
+    DEBUG_EVENT_TYPE_CUSTOM = 7,
+    DEBUG_EVENT_TYPE_MAX = 8
+} DebugEventType;
+
+typedef unsigned int ULONG32;
+
+struct DebugEventPayload
+{
+    DebugEventType type;
+    union
+    {
+        struct 
+        {
+            CORDB_ADDRESS pModuleHeader; //ModuleHeader*
+        } ModuleLoadUnload;
+        struct
+        {
+            CORDB_ADDRESS ip;
+            CORDB_ADDRESS sp;
+        } Exception;
+        struct
+        {
+            CORDB_ADDRESS payload;
+            ULONG32 length;
+        } Custom;
+    };
+};
+
+
+#endif // __DEBUG_EVENTS_H_
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/DebugMacrosExt.h b/src/coreclr/src/nativeaot/Runtime/inc/DebugMacrosExt.h
new file mode 100644
index 0000000000000..54622f5a45c8a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/DebugMacrosExt.h
@@ -0,0 +1,45 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//*****************************************************************************
+// DebugMacrosExt.h
+//
+// Simple debugging macros that take no dependencies on CLR headers.
+// This header can be used from outside the CLR.
+//
+//*****************************************************************************
+
+#ifndef __DebugMacrosExt_h__
+#define __DebugMacrosExt_h__
+
+#if !defined(_DEBUG_IMPL) && defined(_DEBUG) && !defined(DACCESS_COMPILE)
+#define _DEBUG_IMPL 1
+#endif
+
+#ifdef _DEBUG
+// A macro to execute a statement only in _DEBUG.
+#define DEBUG_STMT(stmt) stmt
+#define INDEBUG(x)          x
+#define INDEBUG_COMMA(x)    x,
+#define COMMA_INDEBUG(x)    ,x
+#define NOT_DEBUG(x)
+#else
+#define DEBUG_STMT(stmt)
+#define INDEBUG(x)
+#define INDEBUG_COMMA(x)
+#define COMMA_INDEBUG(x)
+#define NOT_DEBUG(x)        x
+#endif
+
+
+#ifdef _DEBUG_IMPL
+#define INDEBUGIMPL(x)          x
+#define INDEBUGIMPL_COMMA(x)    x,
+#define COMMA_INDEBUGIMPL(x)    ,x
+#else
+#define INDEBUGIMPL(x)
+#define INDEBUGIMPL_COMMA(x)
+#define COMMA_INDEBUGIMPL(x)
+#endif
+
+
+#endif
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/ModuleHeaders.h b/src/coreclr/src/nativeaot/Runtime/inc/ModuleHeaders.h
new file mode 100644
index 0000000000000..83c8c608e8499
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/ModuleHeaders.h
@@ -0,0 +1,64 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Please keep the data structures in this file in sync with the managed version at
+//  src/Common/src/Internal/Runtime/ModuleHeaders.cs
+//
+
+struct ReadyToRunHeaderConstants
+{
+    static const uint32_t Signature = 0x00525452; // 'RTR'
+
+    static const uint32_t CurrentMajorVersion = 4;
+    static const uint32_t CurrentMinorVersion = 0;
+};
+
+struct ReadyToRunHeader
+{
+    uint32_t                Signature;      // ReadyToRunHeaderConstants.Signature
+    uint16_t                MajorVersion;
+    uint16_t                MinorVersion;
+
+    uint32_t                Flags;
+
+    uint16_t                NumberOfSections;
+    uint8_t                 EntrySize;
+    uint8_t                 EntryType;
+
+    // Array of sections follows.
+};
+
+//
+// ReadyToRunSectionType IDs are used by the runtime to look up specific global data sections
+// from each module linked into the final binary. New sections should be added at the bottom
+// of the enum and deprecated sections should not be removed to preserve ID stability.
+//
+// Eventually this will be reconciled with ReadyToRunSectionType from 
+// https://github.com/dotnet/coreclr/blob/master/src/inc/readytorun.h
+//
+enum class ReadyToRunSectionType
+{
+    StringTable                 = 200,
+    GCStaticRegion              = 201,
+    ThreadStaticRegion          = 202,
+    InterfaceDispatchTable      = 203,
+    TypeManagerIndirection      = 204,
+    EagerCctor                  = 205,
+    FrozenObjectRegion          = 206,
+    GCStaticDesc                = 207,
+    ThreadStaticOffsetRegion    = 208,
+    ThreadStaticGCDescRegion    = 209,
+    ThreadStaticIndex           = 210,
+    LoopHijackFlag              = 211,
+    ImportAddressTables         = 212,
+
+    // Sections 300 - 399 are reserved for RhFindBlob backwards compatibility
+    ReadonlyBlobRegionStart     = 300,
+    ReadonlyBlobRegionEnd       = 399,
+};
+
+enum class ModuleInfoFlags
+{
+    HasEndPointer               = 0x1,
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/OptionalFieldDefinitions.h b/src/coreclr/src/nativeaot/Runtime/inc/OptionalFieldDefinitions.h
new file mode 100644
index 0000000000000..a76c31e83496d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/OptionalFieldDefinitions.h
@@ -0,0 +1,24 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This file is designed to be included multiple times with different definitions of the
+// DEFINE_INLINE_OPTIONAL_FIELD macro in order to build data structures
+// related to each type of EEType optional field we support (see OptionalFields.h for details).
+//
+
+// The order of definition of the fields is somewhat important: for types that require multiple optional
+// fields the fields are laid out in the order of definition. Thus access to the fields defined first will be
+// slightly faster than the later fields.
+
+#ifndef DEFINE_INLINE_OPTIONAL_FIELD
+#error Must define DEFINE_INLINE_OPTIONAL_FIELD before including this file
+#endif
+
+//                               Field name                Field type
+DEFINE_INLINE_OPTIONAL_FIELD    (RareFlags,                UInt32)
+DEFINE_INLINE_OPTIONAL_FIELD    (DispatchMap,              UInt32)
+DEFINE_INLINE_OPTIONAL_FIELD    (ValueTypeFieldPadding,    UInt32)
+DEFINE_INLINE_OPTIONAL_FIELD    (NullableValueOffset,      UInt8)
+
+#undef DEFINE_INLINE_OPTIONAL_FIELD
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/OptionalFields.h b/src/coreclr/src/nativeaot/Runtime/inc/OptionalFields.h
new file mode 100644
index 0000000000000..e90c34490aad9
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/OptionalFields.h
@@ -0,0 +1,202 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Support for optional fields attached out-of-line to EETypes (or any other data structure for that matter).
+// These should be used for attributes that exist for only a small subset of EETypes or are accessed only
+// rarely. The idea is to avoid bloating the size of the most common EETypes and to move some of the colder
+// data out-of-line to improve the density of the hot data. The basic idea is that the EEType contains a
+// pointer to an OptionalFields structure (which may be NULL) and that structure contains a somewhat
+// compressed version of the optional fields.
+//
+// For each OptionalFields instance we encode only the fields that are present so that the structure is as
+// small as possible while retaining reasonable access costs.
+//
+// This implies some tricky tradeoffs:
+//  * The more we compress the data the greater the access costs in terms of CPU.
+//  * More effective compression schemes tend to lead to the payload data being unaligned. This itself can
+//    result in overhead but on some architectures it's worse than that and the unaligned nature of the data
+//    requires special handling in client code. Generally it would be more robust and clean not to leak out
+//    such requirements to our callers. For small fields we can imagine copying the data into aligned storage
+//    (and indeed that might be a natural part of the decompression process). It might be more problematic for
+//    larger data items.
+//
+// In order to get the best of both worlds we employ a hybrid approach. Small values (typically single small
+// integers) get encoded inline in a compressed format. Decoding them will automatically copy them into
+// aligned storage. Larger values (such as complex data structures) will be stored out-of-line, naturally
+// aligned and uncompressed (at least by this layer of the software). The entry in the optional field record
+// will instead contain a reference to this out-of-line structure.
+//
+// Pointers are large (especially on 64-bit) and incur overhead in terms of base relocs and complexity (since
+// the locations requiring relocs may not be aligned). To mitigate this we can encode references to these
+// out-of-line records as deltas from a base address and by carefully ordering the layout of the out-of-line
+// records we can share the same base address amongst multiple OptionalFields structures.
+//
+// Taking this to one end of the logical extreme we could store a single base address such as the module base
+// address and encode all OptionalFields references as offsets from this; basically RVAs. This is cheap in the
+// respect that we only need one base address (and associated reloc) but the majority of OptionalFields
+// references will encode as fairly large deltas. As we'll touch on later our mechanism for compressing inline
+// values in OptionalRecords is based on discarding insignificant leading zero bits; i.e. we encode small
+// integers more effectively. So ideally we want to store multiple base addresses so we can lower the average
+// encoding cost of the deltas.
+//
+// An additional concern is how these base addresses are located. Take the module base address example: we
+// have no direct means of locating this based on an OptionalFields (or even the EEType that owns it). To
+// obtain this value we're likely to have to perform some operation akin to a range lookup and there are
+// interesting edge cases such as EETypes for generic types, which don't reside in modules.
+//
+// The approach taken here addresses several of the concerns above. The algorithm stores base addresses
+// interleaved with the OptionalFields. They are located at well-known locations by aligning their addresses
+// to a specific value (we can tune this but assume for the purposes of this explanation that the value is 64
+// bytes). This implies that the address requiring a base reloc is always aligned plus it can be located
+// cheaply from an OptionalFields address by masking off the low-order bits of that address.
+//
+// As OptionalFields are added any out-of-line data they reference is stored linearly in the same order (this
+// does imply that all out-of-line records must live in the same section and thus must have the same access
+// attributes). This provides locality: adjacent OptionalFields may encode deltas to different out-of-line
+// records but since the out-of-line records are adjacent (or nearly so) as well, both deltas will be about
+// the same size. Once we've filled in the space between stored base addresses (some padding might be needed
+// near the end where a full OptionalField won't fit, but this should be small given good compression of
+// OptionalFields) then we write out a new base address. This is chosen based on the first out-of-line record
+// referenced by the next OptionalField (i.e. it will make the first delta zero and keep the subsequent ones
+// small).
+//
+// Consider the following example where for the sake of simplicity we assume each OptionalFields structure has
+// precisely one out-of-line reference:
+//
+//    +-----------------+                        Out-of-line Records
+//    | Base Address    |----------------------> +--------------------+
+//    +-----------------+                        | #1                 |
+//    | OptionalFields  |                        +--------------------+
+//    |   Record #1     |                        | #2                 |
+//    |                 |                        |                    |
+//    +-----------------+                        +--------------------+
+//    | OptionalFields  |                        | #3                 |
+//    |   Record #2     |         /------------> +--------------------+
+//    |                 |        /               | #4                 |
+//    +-----------------+       /                |                    |
+//    | OptionalFields  |      /                 |                    |
+//    |   Record #3     |     /                  +--------------------+
+//    |                 |    /                   | #5                 |
+//    +-----------------+   /                    |                    |
+//    | Padding         |  /                     +--------------------+
+//    +-----------------+ /                      :                    :
+//    | Base Address    |-
+//    +-----------------+
+//    | OptionalFields  |
+//    |   Record #4     |
+//    |                 |
+//    +-----------------+
+//    | OptionalFields  |
+//    |   Record #5     |
+//    :                 :
+//
+// Each optional field uses the base address defined above it (at the lower memory address determined by
+// masking off the alignment bits). No matter which out-of-line records they reference the deltas will be as
+// small as we can make them.
+//
+// Lowering the alignment requirement introduces more base addresses and as a result also lowers the number of
+// OptionalFields that share the same base address, leading to smaller encodings for out-of-line deltas. But
+// at the same time it increases the number of pointers (and associated base relocs) that we must store.
+// Additionally the compression of the deltas is not completely linear: certain ranges of delta magnitude will
+// result in exactly the same storage being used when compressed. See the details of the delta encoding below
+// to see how we can use this to our advantage when tuning the alignment of base addresses.
+//
+// We optimize the case where OptionalFields structs don't contain any out-of-line references. We collect
+// those together and emit them in a single run with no interleaved base addresses.
+//
+// The OptionalFields record encoding itself is a byte stream representing one or more fields. The first byte
+// is a field header: it contains a field type tag in the low-order 7 bits (giving us 128 possible field
+// types) and the most significant bit indicates whether this is the last field of the structure. The field
+// value (a 32-bit unsigned number) is encoded using the existing VarInt support which encodes the value in
+// byte chunks taking between 1 and 5 bytes to do so.
+//
+// If the field value is out-of-line we decode the delta from the base address in much the same way as for
+// inline field values. Before adding the delta to the base address, however, we scale it based on the natural
+// alignment of the out-of-line data record it references. Since the out-of-line data is aligned on the same
+// basis this scaling avoids encoding bits that will always be zero and thus allows us to reference a greater
+// range of memory with a delta that encodes using less bytes.
+//
+// The value compression algorithm above gives us the non-linearity of compression referenced earlier. 32-bit
+// values will encode in a given number of bytes based on the having a given number of significant
+// (non-leading zero) bits:
+//      5 bytes : 25 - 32 significant bits
+//      4 bytes : 18 - 24 significant bits
+//      3 bytes : 11 - 17 significant bits
+//      2 bytes : 4 - 10 significant bits
+//      1 byte  : 0 - 3 significant bits
+//
+// We can use this to our advantage when choosing an alignment at which to store base addresses. Assuming that
+// most out-of-line data will have an alignment requirement of at least 4 bytes we note that the 2 byte
+// encoding already gives us an addressable range of 2^10 * 4 == 4KB which is likely to be enough for the vast
+// majority of cases. That is we can raise the granularity of base addresses until the average amount of
+// out-of-line data addressed begins to approach 4KB which lowers the cost of storing the base addresses while
+// not impacting the encoding size of deltas at all (there's no point in storing base addresses more
+// frequently because it won't make the encodings of deltas any smaller).
+//
+// Trying to tune for one byte deltas all the time is probably not worth it. The addressability range (again
+// assuming 4 byte alignment) is only 32 bytes and unless we start storing a lot of small data structures
+// out-of-line tuning for this will involve placing the base addresses very frequently and our costs will be
+// dominated by the size of the base address pointers and their relocs.
+//
+
+// Define enumeration of optional field tags.
+enum OptionalFieldTag
+{
+#define DEFINE_INLINE_OPTIONAL_FIELD(_name, _type) OFT_##_name,
+#include "OptionalFieldDefinitions.h"
+    OFT_Count // Number of field types we support
+};
+
+// Array that indicates whether a given field type is inline (true) or out-of-line (false).
+static bool g_rgOptionalFieldTypeIsInline[OFT_Count] = {
+#define DEFINE_INLINE_OPTIONAL_FIELD(_name, _type) true,
+#include "OptionalFieldDefinitions.h"
+};
+
+// Various random global constants we can tweak for performance tuning.
+enum OptionalFieldConstants
+{
+    // Constants determining how often we interleave a "header" containing a base address for out-of-line
+    // records into the stream of OptionalFields structures. These will occur at some power of 2 alignment of
+    // memory address. The alignment must at least exceed that of a pointer (since we'll store a pointer in
+    // the header and we need room for at least one OptionalFields record between each header). As the
+    // alignment goes up we store less headers but may impose a larger one-time padding cost at the start of
+    // the optional fields memory block as well as increasing the average encoding size for out-of-line record
+    // deltas in each optional field record.
+    //
+    // Note that if you change these constants you must be sure to modify the alignment of the optional field
+    // virtual section in ZapImage.cpp as well as ensuring the alignment of the containing physical section is
+    // at least as high (this latter cases matters for the COFF output case only, when we're generating PE
+    // images directly the physical section will get page alignment).
+    OFC_HeaderAlignmentShift    = 7,
+    OFC_HeaderAlignmentBytes    = 1 << OFC_HeaderAlignmentShift,
+    OFC_HeaderAlignmentMask     = OFC_HeaderAlignmentBytes - 1,
+};
+
+typedef DPTR(class OptionalFields) PTR_OptionalFields;
+typedef DPTR(PTR_OptionalFields) PTR_PTR_OptionalFields;
+
+class OptionalFields
+{
+public:
+    // Define accessors for each field type.
+#define DEFINE_INLINE_OPTIONAL_FIELD(_name, _type)                       \
+    _type Get##_name(_type defaultValue)                                 \
+    {                                                                    \
+    return (_type)GetInlineField(OFT_##_name, (UInt32)defaultValue); \
+    }
+
+#include "OptionalFieldDefinitions.h"
+
+private:
+    // Reads a field value (or the basis for an out-of-line record delta) starting from the first byte after
+    // the field header. Advances the field location to the start of the next field.
+    static OptionalFieldTag DecodeFieldTag(PTR_UInt8 * ppFields, bool *pfLastField);
+
+    // Reads a field value (or the basis for an out-of-line record delta) starting from the first byte of a
+    // field description. Advances the field location to the start of the next field.
+    static UInt32 DecodeFieldValue(PTR_UInt8 * ppFields);
+
+    UInt32 GetInlineField(OptionalFieldTag eTag, UInt32 uiDefaultValue);
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/TargetPtrs.h b/src/coreclr/src/nativeaot/Runtime/inc/TargetPtrs.h
new file mode 100644
index 0000000000000..846e5054727a4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/TargetPtrs.h
@@ -0,0 +1,31 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#ifndef _TARGETPTRS_H_
+#define _TARGETPTRS_H_
+
+typedef DPTR(class EEType) PTR_EEType;
+typedef SPTR(struct StaticGcDesc) PTR_StaticGcDesc;
+
+#ifdef TARGET_AMD64
+typedef UInt64 UIntTarget;
+#elif defined(TARGET_X86)
+typedef UInt32 UIntTarget;
+#elif defined(TARGET_ARM)
+typedef UInt32 UIntTarget;
+#elif defined(TARGET_ARM64)
+typedef UInt64 UIntTarget;
+#elif defined(TARGET_WASM)
+typedef UInt32 UIntTarget;
+#else
+#error unexpected target architecture
+#endif
+
+typedef PTR_UInt8                       TgtPTR_UInt8;
+typedef PTR_UInt32                      TgtPTR_UInt32;
+typedef void *                          TgtPTR_Void;
+typedef PTR_EEType                      TgtPTR_EEType;
+typedef class Thread *                  TgtPTR_Thread;
+typedef struct CORINFO_Object *         TgtPTR_CORINFO_Object;
+typedef PTR_StaticGcDesc                TgtPTR_StaticGcDesc;
+
+#endif // !_TARGETPTRS_H_
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/daccess.h b/src/coreclr/src/nativeaot/Runtime/inc/daccess.h
new file mode 100644
index 0000000000000..5f142ff056038
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/daccess.h
@@ -0,0 +1,2387 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//*****************************************************************************
+// File: daccess.h
+//
+// Support for external access of runtime data structures.  These
+// macros and templates hide the details of pointer and data handling
+// so that data structures and code can be compiled to work both
+// in-process and through a special memory access layer.
+//
+// This code assumes the existence of two different pieces of code,
+// the target, the runtime code that is going to be examined, and
+// the host, the code that's doing the examining.  Access to the
+// target is abstracted so the target may be a live process on the
+// same machine, a live process on a different machine, a dump file
+// or whatever.  No assumptions should be made about accessibility
+// of the target.
+//
+// This code assumes that the data in the target is static.  Any
+// time the target's data changes the interfaces must be reset so
+// that potentially stale data is discarded.
+//
+// This code is intended for read access and there is no
+// way to write data back currently.
+//
+// DAC-ized code:
+// - is read-only (non-invasive). So DACized codepaths can not trigger a GC.
+// - has no Thread* object.  In reality, DAC-ized codepaths are
+//   ReadProcessMemory calls from out-of-process. Conceptually, they
+//   are like a pure-native (preemptive) thread.
+////
+// This means that in particular, you cannot DACize a GCTRIGGERS function.
+// Neither can you DACize a function that throws if this will involve
+// allocating a new exception object. There may be
+// exceptions to these rules if you can guarantee that the DACized
+// part of the code path cannot cause a garbage collection (see
+// EditAndContinueModule::ResolveField for an example).
+// If you need to DACize a function that may trigger
+// a GC, it is probably best to refactor the function so that the DACized
+// part of the code path is in a separate function. For instance,
+// functions with GetOrCreate() semantics are hard to DAC-ize because
+// they the Create portion is inherently invasive. Instead, consider refactoring
+// into a GetOrFail() function that DAC can call; and then make GetOrCreate()
+// a wrapper around that.
+
+//
+// This code works by hiding the details of access to target memory.
+// Access is divided into two types:
+// 1. DPTR - access to a piece of data.
+// 2. VPTR - access to a class with a vtable.  The class can only have
+//           a single vtable pointer at the beginning of the class instance.
+// Things only need to be declared as VPTRs when it is necessary to
+// call virtual functions in the host.  In that case the access layer
+// must do extra work to provide a host vtable for the object when
+// it is retrieved so that virtual functions can be called.
+//
+// When compiling with DACCESS_COMPILE the macros turn into templates
+// which replace pointers with smart pointers that know how to fetch
+// data from the target process and provide a host process version of it.
+// Normal data structure access will transparently receive a host copy
+// of the data and proceed, so code such as
+//     typedef DPTR(Class) PTR_Class;
+//     PTR_Class cls;
+//     int val = cls->m_Int;
+// will work without modification.  The appropriate operators are overloaded
+// to provide transparent access, such as the -> operator in this case.
+// Note that the convention is to create an appropriate typedef for
+// each type that will be accessed.  This hides the particular details
+// of the type declaration and makes the usage look more like regular code.
+//
+// The ?PTR classes also have an implicit base type cast operator to
+// produce a host-pointer instance of the given type.  For example
+//     Class* cls = PTR_Class(addr);
+// works by implicit conversion from the PTR_Class created by wrapping
+// to a host-side Class instance.  Again, this means that existing code
+// can work without modification.
+//
+// Code Example:
+//
+// typedef struct _rangesection
+// {
+//     PTR_IJitManager pjit;
+//     PTR_RangeSection pright;
+//     PTR_RangeSection pleft;
+//     ... Other fields omitted ...
+// } RangeSection;
+//
+//     RangeSection* pRS = m_RangeTree;
+//
+//     while (pRS != NULL)
+//     {
+//         if (currentPC < pRS->LowAddress)
+//             pRS=pRS->pleft;
+//         else if (currentPC > pRS->HighAddress)
+//             pRS=pRS->pright;
+//         else
+//         {
+//             return pRS->pjit;
+//         }
+//     }
+//
+// This code does not require any modifications.  The global reference
+// provided by m_RangeTree will be a host version of the RangeSection
+// instantiated by conversion.  The references to pRS->pleft and
+// pRS->pright will refer to DPTRs due to the modified declaration.
+// In the assignment statement the compiler will automatically use
+// the implicit conversion from PTR_RangeSection to RangeSection*,
+// causing a host instance to be created.  Finally, if an appropriate
+// section is found the use of pRS->pjit will cause an implicit
+// conversion from PTR_IJitManager to IJitManager.  The VPTR code
+// will look at target memory to determine the actual derived class
+// for the JitManager and instantiate the right class in the host so
+// that host virtual functions can be used just as they would in
+// the target.
+//
+// There are situations where code modifications are required, though.
+//
+// 1.  Any time the actual value of an address matters, such as using
+//     it as a search key in a tree, the target address must be used.
+//
+// An example of this is the RangeSection tree used to locate JIT
+// managers.  A portion of this code is shown above.  Each
+// RangeSection node in the tree describes a range of addresses
+// managed by the JitMan.  These addresses are just being used as
+// values, not to dereference through, so there are not DPTRs.  When
+// searching the range tree for an address the address used in the
+// search must be a target address as that's what values are kept in
+// the RangeSections.  In the code shown above, currentPC must be a
+// target address as the RangeSections in the tree are all target
+// addresses.  Use dac_cast<TADDR> to retrieve the target address
+// of a ?PTR, as well as to convert a host address to the
+// target address used to retrieve that particular instance. Do not
+// use dac_cast with any raw target pointer types (such as BYTE*).
+//
+// 2.  Any time an address is modified, such as by address arithmetic,
+//     the arithmetic must be performed on the target address.
+//
+// When a host instance is created it is created for the type in use.
+// There is no particular relation to any other instance, so address
+// arithmetic cannot be used to get from one instance to any other
+// part of memory.  For example
+//     char* Func(Class* cls)
+//     {
+//         // String follows the basic Class data.
+//         return (char*)(cls + 1);
+//     }
+// does not work with external access because the Class* used would
+// have retrieved only a Class worth of data.  There is no string
+// following the host instance.  Instead, this code should use
+// dac_cast<TADDR> to get the target address of the Class
+// instance, add sizeof(*cls) and then create a new ?PTR to access
+// the desired data.  Note that the newly retrieved data will not
+// be contiguous with the Class instance, so address arithmetic
+// will still not work.
+//
+// Previous Code:
+//
+//     BOOL IsTarget(LPVOID ip)
+//     {
+//         StubCallInstrs* pStubCallInstrs = GetStubCallInstrs();
+//
+//         if (ip == (LPVOID) &(pStubCallInstrs->m_op))
+//         {
+//             return TRUE;
+//         }
+//
+// Modified Code:
+//
+//     BOOL IsTarget(LPVOID ip)
+//     {
+//         StubCallInstrs* pStubCallInstrs = GetStubCallInstrs();
+//
+//         if ((TADDR)ip == dac_cast<TADDR>(pStubCallInstrs) +
+//             (TADDR)offsetof(StubCallInstrs, m_op))
+//         {
+//             return TRUE;
+//         }
+//
+// The parameter ip is a target address, so the host pStubCallInstrs
+// cannot be used to derive an address from.  The member & reference
+// has to be replaced with a conversion from host to target address
+// followed by explicit offsetting for the field.
+//
+// PTR_HOST_MEMBER_TADDR is a convenience macro that encapsulates
+// these two operations, so the above code could also be:
+//
+//     if ((TADDR)ip ==
+//         PTR_HOST_MEMBER_TADDR(StubCallInstrs, pStubCallInstrs, m_op))
+//
+// 3.  Any time the amount of memory referenced through an address
+//     changes, such as by casting to a different type, a new ?PTR
+//     must be created.
+//
+// Host instances are created and stored based on both the target
+// address and size of access.  The access code has no way of knowing
+// all possible ways that data will be retrieved for a given address
+// so if code changes the way it accesses through an address a new
+// ?PTR must be used, which may lead to a difference instance and
+// different host address.  This means that pointer identity does not hold
+// across casts, so code like
+//     Class* cls = PTR_Class(addr);
+//     Class2* cls2 = PTR_Class2(addr);
+//     return cls == cls2;
+// will fail because the host-side instances have no relation to each
+// other.  That isn't a problem, since by rule #1 you shouldn't be
+// relying on specific host address values.
+//
+// Previous Code:
+//
+//     return (ArrayClass *) m_pMethTab->GetClass();
+//
+// Modified Code:
+//
+//     return PTR_ArrayClass(m_pMethTab->GetClass());
+//
+// The ?PTR templates have an implicit conversion from a host pointer
+// to a target address, so the cast above constructs a new
+// PTR_ArrayClass by implicitly converting the host pointer result
+// from GetClass() to its target address and using that as the address
+// of the new PTR_ArrayClass.  As mentioned, the actual host-side
+// pointer values may not be the same.
+//
+// Host pointer identity can be assumed as long as the type of access
+// is the same.  In the example above, if both accesses were of type
+// Class then the host pointer will be the same, so it is safe to
+// retrieve the target address of an instance and then later get
+// a new host pointer for the target address using the same type as
+// the host pointer in that case will be the same.  This is enabled
+// by caching all of the retrieved host instances.  This cache is searched
+// by the addr:size pair and when there's a match the existing instance
+// is reused.  This increases performance and also allows simple
+// pointer identity to hold.  It does mean that host memory grows
+// in proportion to the amount of target memory being referenced,
+// so retrieving extraneous data should be avoided.
+// The host-side data cache grows until the Flush() method is called,
+// at which point all host-side data is discarded.  No host
+// instance pointers should be held across a Flush().
+//
+// Accessing into an object can lead to some unusual behavior.  For
+// example, the SList class relies on objects to contain an SLink
+// instance that it uses for list maintenance.  This SLink can be
+// embedded anywhere in the larger object.  The SList access is always
+// purely to an SLink, so when using the access layer it will only
+// retrieve an SLink's worth of data.  The SList template will then
+// do some address arithmetic to determine the start of the real
+// object and cast the resulting pointer to the final object type.
+// When using the access layer this results in a new ?PTR being
+// created and used, so a new instance will result.  The internal
+// SLink instance will have no relation to the new object instance
+// even though in target address terms one is embedded in the other.
+// The assumption of data stability means that this won't cause
+// a problem, but care must be taken with the address arithmetic,
+// as layed out in rules #2 and #3.
+//
+// 4.  Global address references cannot be used.  Any reference to a
+//     global piece of code or data, such as a function address, global
+//     variable or class static variable, must be changed.
+//
+// The external access code may load at a different base address than
+// the target process code.  Global addresses are therefore not
+// meaningful and must be replaced with something else.  There isn't
+// a single solution, so replacements must be done on a case-by-case
+// basis.
+//
+// The simplest case is a global or class static variable.  All
+// declarations must be replaced with a special declaration that
+// compiles into a modified accessor template value when compiled for
+// external data access.  Uses of the variable automatically are fixed
+// up by the template instance.  Note that assignment to the global
+// must be independently ifdef'ed as the external access layer should
+// not make any modifications.
+//
+// Macros allow for simple declaration of a class static and global
+// values that compile into an appropriate templated value.
+//
+// Previous Code:
+//
+//     static RangeSection* m_RangeTree;
+//     RangeSection* ExecutionManager::m_RangeTree;
+//
+//     extern ThreadStore* g_pThreadStore;
+//     ThreadStore* g_pThreadStore = &StaticStore;
+//     class SystemDomain : public BaseDomain {
+//         ...
+//         ArrayListStatic m_appDomainIndexList;
+//         ...
+//     }
+//
+//     SystemDomain::m_appDomainIndexList;
+//
+//     extern DWORD gThreadTLSIndex;
+//
+//     DWORD gThreadTLSIndex = TLS_OUT_OF_INDEXES;
+//
+// Modified Code:
+//
+//     typedef DPTR(RangeSection) PTR_RangeSection;
+//     SPTR_DECL(RangeSection, m_RangeTree);
+//     SPTR_IMPL(RangeSection, ExecutionManager, m_RangeTree);
+//
+//     typedef DPTR(ThreadStore) PTR_ThreadStore
+//     GPTR_DECL(ThreadStore, g_pThreadStore);
+//     GPTR_IMPL_INIT(ThreadStore, g_pThreadStore, &StaticStore);
+//
+//     class SystemDomain : public BaseDomain {
+//         ...
+//         SVAL_DECL(ArrayListStatic; m_appDomainIndexList);
+//         ...
+//     }
+//
+//     SVAL_IMPL(ArrayListStatic, SystemDomain, m_appDomainIndexList);
+//
+//     GVAL_DECL(DWORD, gThreadTLSIndex);
+//
+//     GVAL_IMPL_INIT(DWORD, gThreadTLSIndex, TLS_OUT_OF_INDEXES);
+//
+// When declaring the variable, the first argument declares the
+// variable's type and the second argument declares the variable's
+// name.  When defining the variable the arguments are similar, with
+// an extra class name parameter for the static class variable case.
+// If an initializer is needed the IMPL_INIT macro should be used.
+//
+// Things get slightly more complicated when declaring an embedded
+// array.  In this case the data element is not a single element and
+// therefore cannot be represented by a ?PTR. In the case of a global
+// array, you should use the GARY_DECL and GARY_IMPL macros.
+// We durrently have no support for declaring static array data members
+// or initialized arrays. Array data members that are dynamically allocated
+// need to be treated as pointer members. To reference individual elements
+// you must use pointer arithmetic (see rule 2 above). An array declared
+// as a local variable within a function does not need to be DACized.
+//
+//
+// All uses of ?VAL_DECL must have a corresponding entry given in the
+// DacGlobals structure in src\inc\dacvars.h.  For SVAL_DECL the entry
+// is class__name.  For GVAL_DECL the entry is dac__name. You must add
+// these entries in dacvars.h using the DEFINE_DACVAR macro. Note that
+// these entries also are used for dumping memory in mini dumps and
+// heap dumps. If it's not appropriate to dump a variable, (e.g.,
+// it's an array or some other value that is not important to have
+// in a minidump) a second macro, DEFINE_DACVAR_NO_DUMP, will allow
+// you to make the required entry in the DacGlobals structure without
+// dumping its value.
+//
+// For convenience, here is a list of the various variable declaration and
+// initialization macros:
+// SVAL_DECL(type, name)      static non-pointer data   class MyClass
+//                            member declared within    {
+//                            the class declaration        // static int i;
+//                                                         SVAL_DECL(int, i);
+//                                                      }
+//
+// SVAL_IMPL(type, cls, name) static non-pointer data   // int MyClass::i;
+//                            member defined outside    SVAL_IMPL(int, MyClass, i);
+//                            the class declaration
+//
+// SVAL_IMPL_INIT(type, cls,  static non-pointer data   // int MyClass::i = 0;
+//                name, val)  member defined and        SVAL_IMPL_INIT(int, MyClass, i, 0);
+//                            initialized outside the
+//                            class declaration
+// ------------------------------------------------------------------------------------------------
+// SPTR_DECL(type, name)      static pointer data       class MyClass
+//                            member declared within    {
+//                            the class declaration        // static int * pInt;
+//                                                         SPTR_DECL(int, pInt);
+//                                                      }
+//
+// SPTR_IMPL(type, cls, name) static pointer data       // int * MyClass::pInt;
+//                            member defined outside    SPTR_IMPL(int, MyClass, pInt);
+//                            the class declaration
+//
+// SPTR_IMPL_INIT(type, cls,  static pointer data       // int * MyClass::pInt = NULL;
+//                name, val)  member defined and        SPTR_IMPL_INIT(int, MyClass, pInt, NULL);
+//                            initialized outside the
+//                            class declaration
+// ------------------------------------------------------------------------------------------------
+// GVAL_DECL(type, name)      extern declaration of     // extern int g_i
+//                            global non-pointer        GVAL_DECL(int, g_i);
+//                            variable
+//
+// GVAL_IMPL(type, name)      declaration of a          // int g_i
+//                            global non-pointer        GVAL_IMPL(int, g_i);
+//                            variable
+//
+// GVAL_IMPL_INIT (type,      declaration and           // int g_i = 0;
+//                 name,      initialization of a       GVAL_IMPL_INIT(int, g_i, 0);
+//                 val)       global non-pointer
+//                            variable
+// ****Note****
+// If you use GVAL_? to declare a global variable of a structured type and you need to
+// access a member of the type, you cannot use the dot operator. Instead, you must take the
+// address of the variable and use the arrow operator. For example:
+// struct
+// {
+//    int x;
+//    char ch;
+// } MyStruct;
+// GVAL_IMPL(MyStruct, g_myStruct);
+// int i = (&g_myStruct)->x;
+// ------------------------------------------------------------------------------------------------
+// GPTR_DECL(type, name)      extern declaration of     // extern int * g_pInt
+//                            global pointer            GPTR_DECL(int, g_pInt);
+//                            variable
+//
+// GPTR_IMPL(type, name)      declaration of a          // int * g_pInt
+//                            global pointer            GPTR_IMPL(int, g_pInt);
+//                            variable
+//
+// GPTR_IMPL_INIT (type,      declaration and           // int * g_pInt = 0;
+//                 name,      initialization of a       GPTR_IMPL_INIT(int, g_pInt, NULL);
+//                 val)       global pointer
+//                            variable
+// ------------------------------------------------------------------------------------------------
+// GARY_DECL(type, name)      extern declaration of     // extern int g_rgIntList[MAX_ELEMENTS];
+//                            a global array            GPTR_DECL(int, g_rgIntList, MAX_ELEMENTS);
+//                            variable
+//
+// GARY_IMPL(type, name)      declaration of a          // int g_rgIntList[MAX_ELEMENTS];
+//                            global pointer            GPTR_IMPL(int, g_rgIntList, MAX_ELEMENTS);
+//                            variable
+//
+//
+// Certain pieces of code, such as the stack walker, rely on identifying
+// an object from its vtable address.  As the target vtable addresses
+// do not necessarily correspond to the vtables used in the host, these
+// references must be translated.  The access layer maintains translation
+// tables for all classes used with VPTR and can return the target
+// vtable pointer for any host vtable in the known list of VPTR classes.
+//
+// ----- Errors:
+//
+// All errors in the access layer are reported via exceptions.  The
+// formal access layer methods catch all such exceptions and turn
+// them into the appropriate error, so this generally isn't visible
+// to users of the access layer.
+//
+// ----- DPTR Declaration:
+//
+// Create a typedef for the type with typedef DPTR(type) PTR_type;
+// Replace type* with PTR_type.
+//
+// ----- VPTR Declaration:
+//
+// VPTR can only be used on classes that have a single vtable
+// pointer at the beginning of the object.  This should be true
+// for a normal single-inheritance object.
+//
+// All of the classes that may be instantiated need to be identified
+// and marked.  In the base class declaration add either
+// VPTR_BASE_VTABLE_CLASS if the class is abstract or
+// VPTR_BASE_CONCRETE_VTABLE_CLASS if the class is concrete.  In each
+// derived class add VPTR_VTABLE_CLASS.  If you end up with compile or
+// link errors for an unresolved method called VPtrSize you missed a
+// derived class declaration.
+//
+// As described above, dac can only handle classes with a single
+// vtable.  However, there's a special case for multiple inheritance
+// situations when only one of the classes is needed for dac.  If
+// the base class needed is the first class in the derived class's
+// layout then it can be used with dac via using the VPTR_MULTI_CLASS
+// macros.  Use with extreme care.
+//
+// All classes to be instantiated must be listed in src\inc\vptr_list.h.
+//
+// Create a typedef for the type with typedef VPTR(type) PTR_type;
+// When using a VPTR, replace Class* with PTR_Class.
+//
+// ----- Specific Macros:
+//
+// PTR_TO_TADDR(ptr)
+// Retrieves the raw target address for a ?PTR.
+// See code:dac_cast for the preferred alternative
+//
+// PTR_HOST_TO_TADDR(host)
+// Given a host address of an instance produced by a ?PTR reference,
+// return the original target address.  The host address must
+// be an exact match for an instance.
+// See code:dac_cast for the preferred alternative
+//
+// PTR_HOST_INT_TO_TADDR(host)
+// Given a host address which resides somewhere within an instance
+// produced by a ?PTR reference (a host interior pointer) return the
+// corresponding target address. This is useful for evaluating
+// relative pointers (e.g. RelativePointer<T>) where calculating the
+// target address requires knowledge of the target address of the
+// relative pointer field itself. This lookup is slower than that for
+// a non-interior host pointer so use it sparingly.
+//
+// VPTR_HOST_VTABLE_TO_TADDR(host)
+// Given the host vtable pointer for a known VPTR class, return
+// the target vtable pointer.
+//
+// PTR_HOST_MEMBER_TADDR(type, host, memb)
+// Retrieves the target address of a host instance pointer and
+// offsets it by the given member's offset within the type.
+//
+// PTR_HOST_INT_MEMBER_TADDR(type, host, memb)
+// As above but will work for interior host pointers (see the
+// description of PTR_HOST_INT_TO_TADDR for an explanation of host
+// interior pointers).
+//
+// PTR_READ(addr, size)
+// Reads a block of memory from the target and returns a host
+// pointer for it.  Useful for reading blocks of data from the target
+// whose size is only known at runtime, such as raw code for a jitted
+// method.  If the data being read is actually an object, use SPTR
+// instead to get better type semantics.
+//
+// DAC_EMPTY()
+// DAC_EMPTY_ERR()
+// DAC_EMPTY_RET(retVal)
+// DAC_UNEXPECTED()
+// Provides an empty method implementation when compiled
+// for DACCESS_COMPILE.  For example, use to stub out methods needed
+// for vtable entries but otherwise unused.
+//
+// These macros are designed to turn into normal code when compiled
+// without DACCESS_COMPILE.
+//
+//*****************************************************************************
+// See code:EEStartup#TableOfContents for EE overview
+
+#ifndef __daccess_h__ 
+#define __daccess_h__
+
+#ifndef __in 
+#include <specstrings.h>
+#endif
+
+#define DACCESS_TABLE_RESOURCE L"COREXTERNALDATAACCESSRESOURCE"
+
+#include "type_traits.hpp"
+
+#ifdef DACCESS_COMPILE 
+
+#include "safemath.h"
+
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+typedef UInt64 UIntTarget;
+#elif defined(TARGET_X86)
+typedef UInt32 UIntTarget;
+#elif defined(TARGET_ARM)
+typedef UInt32 UIntTarget;
+#else
+#error unexpected target architecture
+#endif
+
+//
+// This version of things wraps pointer access in
+// templates which understand how to retrieve data
+// through an access layer.  In this case no assumptions
+// can be made that the current compilation processor or
+// pointer types match the target's processor or pointer types.
+//
+
+// Define TADDR as a non-pointer value so use of it as a pointer
+// will not work properly.  Define it as unsigned so
+// pointer comparisons aren't affected by sign.
+// This requires special casting to ULONG64 to sign-extend if necessary.
+// XXX drewb - Cheating right now by not supporting cross-plat.
+typedef UIntTarget TADDR;
+
+// TSIZE_T used for counts or ranges that need to span the size of a
+// target pointer.  For cross-plat, this may be different than SIZE_T
+// which reflects the host pointer size.
+typedef UIntTarget TSIZE_T;
+
+// Information stored in the DAC table of interest to the DAC implementation
+// Note that this information is shared between all instantiations of ClrDataAccess, so initialize
+// it just once in code:ClrDataAccess.GetDacGlobals (rather than use fields in ClrDataAccess);
+struct DacTableInfo
+{
+    // On Windows, the first DWORD is the 32-bit timestamp read out of the runtime dll's debug directory.
+    // The remaining 3 DWORDS must all be 0.
+    // On Mac, this is the 16-byte UUID of the runtime dll.
+    // It is used to validate that mscorwks is the same version as mscordacwks
+    UInt32 dwID0;
+    UInt32 dwID1;
+    UInt32 dwID2;
+    UInt32 dwID3;
+};
+extern DacTableInfo g_dacTableInfo;
+
+//
+// The following table contains all the global information that data access needs to begin
+// operation.  All of the values stored here are RVAs.  DacGlobalBase() returns the current
+// base address to combine with to get a full target address.
+//
+
+typedef struct _DacGlobals
+{
+// These will define all of the dac related mscorwks static and global variables
+// TODO: update DacTableGen to parse "UInt32" instead of "ULONG32" for the ids
+#ifdef DAC_CLR_ENVIRONMENT
+#define DEFINE_DACVAR(id_type, size, id)                 id_type id;
+#define DEFINE_DACVAR_NO_DUMP(id_type, size, id)         id_type id;
+#else
+#define DEFINE_DACVAR(id_type, size, id)                 UInt32 id;
+#define DEFINE_DACVAR_NO_DUMP(id_type, size, id)         UInt32 id;
+#endif
+#include "dacvars.h"
+#undef DEFINE_DACVAR_NO_DUMP
+#undef DEFINE_DACVAR
+
+/*
+    // Global functions.
+    ULONG fn__QueueUserWorkItemCallback;
+    ULONG fn__ThreadpoolMgr__AsyncCallbackCompletion;
+    ULONG fn__ThreadpoolMgr__AsyncTimerCallbackCompletion;
+    ULONG fn__DACNotifyCompilationFinished;
+#ifdef HOST_X86 
+    ULONG fn__NativeDelayFixupAsmStub;
+    ULONG fn__NativeDelayFixupAsmStubRet;
+#endif // HOST_X86
+    ULONG fn__PInvokeCalliReturnFromCall;
+    ULONG fn__NDirectGenericStubReturnFromCall;
+    ULONG fn__DllImportForDelegateGenericStubReturnFromCall;
+*/
+
+} DacGlobals;
+
+extern DacGlobals g_dacGlobals;
+
+#ifdef __cplusplus 
+extern "C" {
+#endif
+
+// These two functions are largely just for marking code
+// that is not fully converted.  DacWarning prints a debug
+// message, while DacNotImpl throws a not-implemented exception.
+void __cdecl DacWarning(__in __in_z char* format, ...);
+void DacNotImpl(void);
+void    DacError(HRESULT err);
+void __declspec(noreturn) DacError_NoRet(HRESULT err);
+TADDR   DacGlobalBase(void);
+HRESULT DacReadAll(TADDR addr, void* buffer, uint32_t size, bool throwEx);
+#ifdef DAC_CLR_ENVIRONMENT
+HRESULT DacWriteAll(TADDR addr, PVOID buffer, ULONG32 size, bool throwEx);
+HRESULT DacAllocVirtual(TADDR addr, ULONG32 size,
+                        ULONG32 typeFlags, ULONG32 protectFlags,
+                        bool throwEx, TADDR* mem);
+HRESULT DacFreeVirtual(TADDR mem, ULONG32 size, ULONG32 typeFlags,
+                       bool throwEx);
+
+#endif // DAC_CLR_ENVIRONMENT
+
+/* We are simulating a tiny bit of memory existing in the debuggee address space that really isn't there.
+   The memory appears to exist in the last 1KB of the memory space to make minimal risk that
+   it collides with any legitimate debuggee memory. When the DAC uses
+   DacInstantiateTypeByAddressHelper on these high addresses instead of getting back a pointer
+   in the DAC_INSTANCE cache it will get back a pointer to specifically configured block of
+   debugger memory.
+
+   Rationale:
+     This method was invented to solve a problem when doing stack walking in the DAC. When
+   running in-process the register context has always been written to memory somewhere before
+   the stackwalker begins to operate. The stackwalker doesn't track the registers themselves,
+   but rather the storage locations where registers were written.
+      When the DAC runs the registers haven't been saved anywhere - there is no memory address
+   that refers to them. It would be easy to store the registers in the debugger's memory space
+   but the Regdisplay is typed as PTR_UIntNative, not UIntNative*. We could change REGDISPLAY
+   to point at debugger local addresses, but then we would have the opposite problem, being unable
+   to refer to stack addresses that are in the debuggee memory space. Options we could do:
+   1) Add discriminant bits to REGDISPLAY fields to record whether the pointer is local or remote
+      a) Do it in the runtime definition - adds size and complexity to mrt100 for a debug only scenario
+      b) Do it only in the DAC definition - breaks marshalling for types that are or contain REGDISPLAY
+         (ie StackFrameIterator).
+   2) Add a new DebuggerREGDISPLAY type that can hold local or remote addresses, and then create
+      parallel DAC stackwalking code that uses it. This is a bunch of work and 
+      has higher maintenance cost to keep both code paths operational and functionally identical.
+   3) Allocate space in debuggee that will be used to stash the registers when doing a debug stackwalk -
+      increases runtime working set for debug only scenario and won't work for dumps
+   4) Same as #3, but don't actually allocate the space at runtime, just simulate that it was allocated
+      within the debugger - risk of colliding with real runtime allocations, adds complexity to the
+      DAC.
+
+   #4 seems the best option to me, so we wound up here.
+*/
+
+// This address is picked to be very unlikely to collide with any real memory usage in the target
+#define SIMULATED_DEBUGGEE_MEMORY_BASE_ADDRESS ((TADDR) -1024)
+// The byte at ((TADDR)-1) isn't addressable at all, so we only have 1023 bytes of usable space
+// At the moment we only need 256 bytes at most.
+#define SIMULATED_DEBUGGEE_MEMORY_MAX_SIZE 1023
+
+// Sets the simulated debuggee memory region, or clears it if pSimulatedDebuggeeMemory = NULL
+// See large comment above for more details.
+void SetSimulatedDebuggeeMemory(void* pSimulatedDebuggeeMemory, UInt32 cbSimulatedDebuggeeMemory);
+
+void*    DacInstantiateTypeByAddress(TADDR addr, UInt32 size, bool throwEx);
+void*    DacInstantiateTypeByAddressNoReport(TADDR addr, UInt32 size, bool throwEx);
+void*    DacInstantiateClassByVTable(TADDR addr, UInt32 minSize, bool throwEx);
+
+// This method should not be used casually. Make sure simulatedTargetAddr does not cause collisions. See comment in dacfn.cpp for more details.
+void*    DacInstantiateTypeAtSimulatedAddress(TADDR simulatedTargetAddr, UInt32 size, void* pLocalBuffer, bool throwEx);
+
+// Copy a null-terminated ascii or unicode string from the target to the host.
+// Note that most of the work here is to find the null terminator.  If you know the exact length,
+// then you can also just call DacInstantiateTypebyAddress.
+char*    DacInstantiateStringA(TADDR addr, UInt32 maxChars, bool throwEx);
+wchar_t* DacInstantiateStringW(TADDR addr, UInt32 maxChars, bool throwEx);
+
+TADDR    DacGetTargetAddrForHostAddr(const void* ptr, bool throwEx);
+TADDR    DacGetTargetAddrForHostInteriorAddr(const void* ptr, bool throwEx);
+TADDR    DacGetTargetVtForHostVt(const void* vtHost, bool throwEx);
+wchar_t* DacGetVtNameW(TADDR targetVtable);
+
+// Report a region of memory to the debugger
+void    DacEnumMemoryRegion(TADDR addr, TSIZE_T size, bool fExpectSuccess = true);
+
+HRESULT DacWriteHostInstance(void * host, bool throwEx);
+
+#ifdef DAC_CLR_ENVIRONMENT
+
+// Occasionally it's necessary to allocate some host memory for
+// instance data that's created on the fly and so doesn't directly
+// correspond to target memory.  These are held and freed on flush
+// like other instances but can't be looked up by address.
+PVOID DacAllocHostOnlyInstance(ULONG32 size, bool throwEx);
+
+// Determines whether ASSERTs should be raised when inconsistencies in the target are detected
+bool DacTargetConsistencyAssertsEnabled();
+
+// Host instances can be marked as they are enumerated in
+// order to break cycles.  This function returns true if
+// the instance is already marked, otherwise it marks the
+// instance and returns false.
+bool DacHostPtrHasEnumMark(LPCVOID host);
+
+// Determines if EnumMemoryRegions has been called on a method descriptor.
+// This helps perf for minidumps of apps with large managed stacks.
+bool DacHasMethodDescBeenEnumerated(LPCVOID pMD);
+
+// Sets a flag indicating that EnumMemoryRegions on a method desciptor
+// has been successfully called. The function returns true if
+// this flag had been previously set.
+bool DacSetMethodDescEnumerated(LPCVOID pMD);
+
+// Determines if a method descriptor is valid
+BOOL DacValidateMD(LPCVOID pMD);
+
+// Enumerate the instructions around a call site to help debugger stack walking heuristics
+void DacEnumCodeForStackwalk(TADDR taCallEnd);
+
+// Given the address and the size of a memory range which is stored in the buffer, replace all the patches
+// in the buffer with the real opcodes.  This is especially important on X64 where the unwinder needs to
+// disassemble the native instructions.
+class MemoryRange;
+HRESULT DacReplacePatchesInHostMemory(MemoryRange range, PVOID pBuffer);
+
+//
+// Convenience macros for EnumMemoryRegions implementations.
+//
+
+// Enumerate the given host instance and return
+// true if the instance hasn't already been enumerated.
+#define DacEnumHostDPtrMem(host) \
+    (!DacHostPtrHasEnumMark(host) ? \
+     (DacEnumMemoryRegion(PTR_HOST_TO_TADDR(host), sizeof(*host)), \
+      true) : false)
+#define DacEnumHostSPtrMem(host, type) \
+    (!DacHostPtrHasEnumMark(host) ? \
+     (DacEnumMemoryRegion(PTR_HOST_TO_TADDR(host), \
+                          type::DacSize(PTR_HOST_TO_TADDR(host))), \
+      true) : false)
+#define DacEnumHostVPtrMem(host) \
+    (!DacHostPtrHasEnumMark(host) ? \
+     (DacEnumMemoryRegion(PTR_HOST_TO_TADDR(host), (host)->VPtrSize()), \
+      true) : false)
+
+// Check enumeration of 'this' and return if this has already been
+// enumerated.  Making this the first line of an object's EnumMemoryRegions
+// method will prevent cycles.
+#define DAC_CHECK_ENUM_THIS() \
+    if (DacHostPtrHasEnumMark(this)) return
+#define DAC_ENUM_DTHIS() \
+    if (!DacEnumHostDPtrMem(this)) return
+#define DAC_ENUM_STHIS(type) \
+    if (!DacEnumHostSPtrMem(this, type)) return
+#define DAC_ENUM_VTHIS() \
+    if (!DacEnumHostVPtrMem(this)) return
+
+#endif // DAC_CLR_ENVIRONMENT
+
+#ifdef __cplusplus 
+}
+
+//
+// Computes (taBase + (dwIndex * dwElementSize()), with overflow checks.
+//
+// Arguments:
+//     taBase          the base TADDR value
+//     dwIndex         the index of the offset
+//     dwElementSize   the size of each element (to multiply the offset by)
+//
+// Return value:
+//     The resulting TADDR, or throws CORDB_E_TARGET_INCONSISTENT on overlow.
+//
+// Notes:
+//     The idea here is that overflows during address arithmetic suggest that we're operating on corrupt
+//     pointers.  It helps to improve reliability to detect the cases we can (like overflow) and fail.  Note
+//     that this is just a heuristic, not a security measure.  We can't trust target data regardless -
+//     failing on overflow is just one easy case of corruption to detect.  There is no need to use checked
+//     arithmetic everywhere in the DAC infrastructure, this is intended just for the places most likely to
+//     help catch bugs (eg. __DPtr::operator[]).
+//
+inline TADDR DacTAddrOffset( TADDR taBase, TSIZE_T dwIndex, TSIZE_T dwElementSize )
+{
+#ifdef DAC_CLR_ENVIRONMENT
+    ClrSafeInt<TADDR> t(taBase);
+    t += ClrSafeInt<TSIZE_T>(dwIndex) * ClrSafeInt<TSIZE_T>(dwElementSize);
+    if( t.IsOverflow() )
+    {
+        // Pointer arithmetic overflow - probably due to corrupt target data
+        //DacError(CORDBG_E_TARGET_INCONSISTENT);
+        DacError(E_FAIL);
+    }
+    return t.Value();
+#else // TODO: port safe math
+    return taBase + (dwIndex*dwElementSize);
+#endif
+}
+
+// Base pointer wrapper which provides common behavior.
+class __TPtrBase
+{
+public:
+    __TPtrBase()
+    {
+        // Make uninitialized pointers obvious.
+        m_addr = (TADDR)-1;
+    }
+    explicit __TPtrBase(TADDR addr)
+    {
+        m_addr = addr;
+    }
+
+    bool operator!() const
+    {
+        return m_addr == 0;
+    }
+    // We'd like to have an implicit conversion to bool here since the C++
+    // standard says all pointer types are implicitly converted to bool.
+    // Unfortunately, that would cause ambiguous overload errors for uses
+    // of operator== and operator!=.  Instead callers will have to compare
+    // directly against NULL.
+
+    bool operator==(TADDR addr) const
+    {
+        return m_addr == addr;
+    }
+    bool operator!=(TADDR addr) const
+    {
+        return m_addr != addr;
+    }
+    bool operator<(TADDR addr) const
+    {
+        return m_addr < addr;
+    }
+    bool operator>(TADDR addr) const
+    {
+        return m_addr > addr;
+    }
+    bool operator<=(TADDR addr) const
+    {
+        return m_addr <= addr;
+    }
+    bool operator>=(TADDR addr) const
+    {
+        return m_addr >= addr;
+    }
+
+    TADDR GetAddr(void) const
+    {
+        return m_addr;
+    }
+    TADDR SetAddr(TADDR addr)
+    {
+        m_addr = addr;
+        return addr;
+    }
+
+protected:
+    TADDR m_addr;
+};
+
+// Adds comparison operations
+// Its possible we just want to merge these into __TPtrBase, but SPtr isn't comparable with
+// other types right now and I would rather stay conservative
+class __ComparableTPtrBase : public __TPtrBase
+{
+protected:
+    __ComparableTPtrBase(void) : __TPtrBase()
+    {}
+
+    explicit __ComparableTPtrBase(TADDR addr) : __TPtrBase(addr)
+    {}
+
+public:
+    bool operator==(const __ComparableTPtrBase& ptr) const
+    {
+        return m_addr == ptr.m_addr;
+    }
+    bool operator!=(const __ComparableTPtrBase& ptr) const
+    {
+        return !operator==(ptr);
+    }
+    bool operator<(const __ComparableTPtrBase& ptr) const
+    {
+        return m_addr < ptr.m_addr;
+    }
+    bool operator>(const __ComparableTPtrBase& ptr) const
+    {
+        return m_addr > ptr.m_addr;
+    }
+    bool operator<=(const __ComparableTPtrBase& ptr) const
+    {
+        return m_addr <= ptr.m_addr;
+    }
+    bool operator>=(const __ComparableTPtrBase& ptr) const
+    {
+        return m_addr >= ptr.m_addr;
+    }
+};
+
+// Pointer wrapper base class for various forms of normal data.
+// This has the common functionality between __DPtr and __ArrayDPtr.
+// The DPtrType type parameter is the actual derived type in use.  This is necessary so that
+// inhereted functions preserve exact return types.
+template<typename type, typename DPtrType>
+class __DPtrBase : public __ComparableTPtrBase
+{
+public:
+    typedef type _Type;
+    typedef type* _Ptr;
+
+protected:
+    // Constructors
+    // All protected - this type should not be used directly - use one of the derived types instead.
+    __DPtrBase< type, DPtrType >(void) : __ComparableTPtrBase()
+    {}
+
+    explicit __DPtrBase< type, DPtrType >(TADDR addr) : __ComparableTPtrBase(addr)
+    {}
+
+    explicit __DPtrBase(__TPtrBase addr)
+    {
+        m_addr = addr.GetAddr();
+    }
+    explicit __DPtrBase(type const * host)
+    {
+        m_addr = DacGetTargetAddrForHostAddr(host, true);
+    }
+
+public:
+    DPtrType& operator=(const __TPtrBase& ptr)
+    {
+        m_addr = ptr.GetAddr();
+        return DPtrType(m_addr);
+    }
+    DPtrType& operator=(TADDR addr)
+    {
+        m_addr = addr;
+        return DPtrType(m_addr);
+    }
+
+    type& operator*(void) const
+    {
+        return *(type*)DacInstantiateTypeByAddress(m_addr, sizeof(type), true);
+    }
+    
+
+    using __ComparableTPtrBase::operator==;
+    using __ComparableTPtrBase::operator!=;
+    using __ComparableTPtrBase::operator<;
+    using __ComparableTPtrBase::operator>;
+    using __ComparableTPtrBase::operator<=;
+    using __ComparableTPtrBase::operator>=;
+    bool operator==(TADDR addr) const
+    {
+        return m_addr == addr;
+    }
+    bool operator!=(TADDR addr) const
+    {
+        return m_addr != addr;
+    }
+
+    // Array index operator
+    // we want an operator[] for all possible numeric types (rather than rely on
+    // implicit numeric conversions on the argument) to prevent ambiguity with
+    // DPtr's implicit conversion to type* and the built-in operator[].
+    // @dbgtodo rbyers: we could also use this technique to simplify other operators below.
+    template<typename indexType>
+    type& operator[](indexType index)
+    {
+        // Compute the address of the element.
+        TADDR elementAddr;
+        if( index >= 0 )
+    {
+            elementAddr = DacTAddrOffset(m_addr, index, sizeof(type));
+    }
+        else
+    {
+            // Don't bother trying to do overflow checking for negative indexes - they are rare compared to
+            // positive ones.  ClrSafeInt doesn't support signed datatypes yet (although we should be able to add it
+            // pretty easily).
+            elementAddr = m_addr + index * sizeof(type);
+    }
+
+        // Marshal over a single instance and return a reference to it.
+        return *(type*) DacInstantiateTypeByAddress(elementAddr, sizeof(type), true);
+    }
+
+    template<typename indexType>
+    type const & operator[](indexType index) const
+    {
+        return (*const_cast<__DPtrBase*>(this))[index];
+    }
+
+    //-------------------------------------------------------------------------
+    // operator+
+
+    DPtrType operator+(unsigned short val)
+    {
+        return DPtrType(DacTAddrOffset(m_addr, val, sizeof(type)));
+    }
+    DPtrType operator+(short val)
+    {
+        return DPtrType(m_addr + val * sizeof(type));
+    }
+    // size_t is unsigned int on Win32, so we need
+    // to ifdef here to make sure the unsigned int
+    // and size_t overloads don't collide.  size_t
+    // is marked __w64 so a simple unsigned int
+    // will not work on Win32, it has to be size_t.
+    DPtrType operator+(size_t val)
+    {
+        return DPtrType(DacTAddrOffset(m_addr, val, sizeof(type)));
+    }
+#if (!defined (HOST_X86) && !defined(_SPARC_) && !defined(HOST_ARM)) || (defined(HOST_X86) && defined(__APPLE__)) 
+    DPtrType operator+(unsigned int val)
+    {
+        return DPtrType(DacTAddrOffset(m_addr, val, sizeof(type)));
+    }
+#endif // (!defined (HOST_X86) && !defined(_SPARC_) && !defined(HOST_ARM)) || (defined(HOST_X86) && defined(__APPLE__))
+    DPtrType operator+(int val)
+    {
+        return DPtrType(m_addr + val * sizeof(type));
+    }
+#ifndef TARGET_UNIX // for now, everything else is 32 bit
+    DPtrType operator+(unsigned long val)
+    {
+        return DPtrType(DacTAddrOffset(m_addr, val, sizeof(type)));
+    }
+    DPtrType operator+(long val)
+    {
+        return DPtrType(m_addr + val * sizeof(type));
+    }
+#endif // !TARGET_UNIX // for now, everything else is 32 bit
+#if !defined(HOST_ARM) && !defined(HOST_X86)
+    DPtrType operator+(IntNative val)
+    {
+        return DPtrType(m_addr + val * sizeof(type));
+    }
+#endif
+
+    //-------------------------------------------------------------------------
+    // operator-
+
+    DPtrType operator-(unsigned short val)
+    {
+        return DPtrType(m_addr - val * sizeof(type));
+    }
+    DPtrType operator-(short val)
+    {
+        return DPtrType(m_addr - val * sizeof(type));
+    }
+    // size_t is unsigned int on Win32, so we need
+    // to ifdef here to make sure the unsigned int
+    // and size_t overloads don't collide.  size_t
+    // is marked __w64 so a simple unsigned int
+    // will not work on Win32, it has to be size_t.
+    DPtrType operator-(size_t val)
+    {
+        return DPtrType(m_addr - val * sizeof(type));
+    }
+    DPtrType operator-(signed __int64 val)
+    {
+        return DPtrType(m_addr - val * sizeof(type));
+    }
+#if !defined (HOST_X86) && !defined(_SPARC_) && !defined(HOST_ARM)
+    DPtrType operator-(unsigned int val)
+    {
+        return DPtrType(m_addr - val * sizeof(type));
+    }
+#endif // !defined (HOST_X86) && !defined(_SPARC_) && !defined(HOST_ARM)
+    DPtrType operator-(int val)
+    {
+        return DPtrType(m_addr - val * sizeof(type));
+    }
+#ifdef _MSC_VER // for now, everything else is 32 bit
+    DPtrType operator-(unsigned long val)
+    {
+        return DPtrType(m_addr - val * sizeof(type));
+    }
+    DPtrType operator-(long val)
+    {
+        return DPtrType(m_addr - val * sizeof(type));
+    }
+#endif // _MSC_VER // for now, everything else is 32 bit
+    size_t operator-(const DPtrType& val)
+    {
+        return (size_t)((m_addr - val.m_addr) / sizeof(type));
+    }
+
+    //-------------------------------------------------------------------------
+
+    DPtrType& operator+=(size_t val)
+    {
+        m_addr += val * sizeof(type);
+        return static_cast<DPtrType&>(*this);
+    }
+    DPtrType& operator-=(size_t val)
+    {
+        m_addr -= val * sizeof(type);
+        return static_cast<DPtrType&>(*this);
+    }
+
+    DPtrType& operator++()
+    {
+        m_addr += sizeof(type);
+        return static_cast<DPtrType&>(*this);
+    }
+    DPtrType& operator--()
+    {
+        m_addr -= sizeof(type);
+        return static_cast<DPtrType&>(*this);
+    }
+    DPtrType operator++(int postfix)
+    {
+        UNREFERENCED_PARAMETER(postfix);
+        DPtrType orig = DPtrType(*this);
+        m_addr += sizeof(type);
+        return orig;
+    }
+    DPtrType operator--(int postfix)
+    {
+        UNREFERENCED_PARAMETER(postfix);
+        DPtrType orig = DPtrType(*this);
+        m_addr -= sizeof(type);
+        return orig;
+    }
+
+    bool IsValid(void) const
+    {
+        return m_addr &&
+            DacInstantiateTypeByAddress(m_addr, sizeof(type),
+                                        false) != NULL;
+    }
+    void EnumMem(void) const
+    {
+        DacEnumMemoryRegion(m_addr, sizeof(type));
+    }
+};
+
+// Pointer wrapper for objects which are just plain data
+// and need no special handling.
+template<typename type>
+class __DPtr : public __DPtrBase<type,__DPtr<type> >
+{
+#ifdef __GNUC__ 
+protected:
+    //there seems to be a bug in GCC's inference logic.  It can't find m_addr.
+    using __DPtrBase<type,__DPtr<type> >::m_addr;
+#endif // __GNUC__
+public:
+    // constructors - all chain to __DPtrBase constructors
+    __DPtr< type >(void) : __DPtrBase<type,__DPtr<type> >() {}
+    __DPtr< type >(TADDR addr) : __DPtrBase<type,__DPtr<type> >(addr) {}
+
+    // construct const from non-const
+    typedef typename type_traits::remove_const<type>::type mutable_type;
+    __DPtr< type >(__DPtr<mutable_type> const & rhs) : __DPtrBase<type,__DPtr<type> >(rhs.GetAddr()) {}
+
+    explicit __DPtr< type >(__TPtrBase addr) : __DPtrBase<type,__DPtr<type> >(addr) {}
+    explicit __DPtr< type >(type const * host) : __DPtrBase<type,__DPtr<type> >(host) {}
+
+    operator type*() const
+    {
+        return (type*)DacInstantiateTypeByAddress(m_addr, sizeof(type), true);
+    }
+    type* operator->() const
+    {
+        return (type*)DacInstantiateTypeByAddress(m_addr, sizeof(type), true);
+    }
+};
+
+#define DPTR(type) __DPtr< type >
+
+// A restricted form of DPtr that doesn't have any conversions to pointer types.
+// This is useful for pointer types that almost always represent arrays, as opposed
+// to pointers to single instances (eg. PTR_BYTE).  In these cases, allowing implicit
+// conversions to (for eg.) BYTE* would usually result in incorrect usage (eg. pointer
+// arithmetic and array indexing), since only a single instance has been marshalled to the host.
+// If you really must marshal a single instance (eg. converting T* to PTR_T is too painful for now),
+// then use code:DacUnsafeMarshalSingleElement so we can identify such unsafe code.
+template<typename type>
+class __ArrayDPtr : public __DPtrBase<type,__ArrayDPtr<type> >
+{
+public:
+    // constructors - all chain to __DPtrBase constructors
+    __ArrayDPtr< type >(void) : __DPtrBase<type,__ArrayDPtr<type> >() {}
+    __ArrayDPtr< type >(TADDR addr) : __DPtrBase<type,__ArrayDPtr<type> >(addr) {}
+
+    // construct const from non-const
+    typedef typename type_traits::remove_const<type>::type mutable_type;
+    __ArrayDPtr< type >(__ArrayDPtr<mutable_type> const & rhs) : __DPtrBase<type,__ArrayDPtr<type> >(rhs.GetAddr()) {}
+
+    explicit __ArrayDPtr< type >(__TPtrBase addr) : __DPtrBase<type,__ArrayDPtr<type> >(addr) {}
+
+    // Note that there is also no explicit constructor from host instances (type*).
+    // Going this direction is less problematic, but often still represents risky coding.
+};
+
+#define ArrayDPTR(type) __ArrayDPtr< type >
+
+
+// Pointer wrapper for objects which are just plain data
+// but whose size is not the same as the base type size.
+// This can be used for prefetching data for arrays or
+// for cases where an object has a variable size.
+template<typename type>
+class __SPtr : public __TPtrBase
+{
+public:
+    typedef type _Type;
+    typedef type* _Ptr;
+
+    __SPtr< type >(void) : __TPtrBase() {}
+    __SPtr< type >(TADDR addr) : __TPtrBase(addr) {}
+    explicit __SPtr< type >(__TPtrBase addr)
+    {
+        m_addr = addr.GetAddr();
+    }
+    explicit __SPtr< type >(type* host)
+    {
+        m_addr = DacGetTargetAddrForHostAddr(host, true);
+    }
+
+    __SPtr< type >& operator=(const __TPtrBase& ptr)
+    {
+        m_addr = ptr.m_addr;
+        return *this;
+    }
+    __SPtr< type >& operator=(TADDR addr)
+    {
+        m_addr = addr;
+        return *this;
+    }
+
+    operator type*() const
+    {
+        if (m_addr)
+        {
+            return (type*)DacInstantiateTypeByAddress(m_addr,
+                                                      type::DacSize(m_addr),
+                                                      true);
+        }
+        else
+        {
+            return (type*)NULL;
+        }
+    }
+    type* operator->() const
+    {
+        if (m_addr)
+        {
+            return (type*)DacInstantiateTypeByAddress(m_addr,
+                                                      type::DacSize(m_addr),
+                                                      true);
+        }
+        else
+        {
+            return (type*)NULL;
+        }
+    }
+    type& operator*(void) const
+    {
+        if (!m_addr)
+        {
+            DacError(E_INVALIDARG);
+        }
+
+        return *(type*)DacInstantiateTypeByAddress(m_addr,
+                                                   type::DacSize(m_addr),
+                                                   true);
+    }
+
+    bool IsValid(void) const
+    {
+        return m_addr &&
+            DacInstantiateTypeByAddress(m_addr, type::DacSize(m_addr),
+                                        false) != NULL;
+    }
+    void EnumMem(void) const
+    {
+        if (m_addr)
+        {
+            DacEnumMemoryRegion(m_addr, type::DacSize(m_addr));
+        }
+    }
+};
+
+#define SPTR(type) __SPtr< type >
+
+// Pointer wrapper for objects which have a single leading
+// vtable, such as objects in a single-inheritance tree.
+// The base class of all such trees must have use
+// VPTR_BASE_VTABLE_CLASS in their declaration and all
+// instantiable members of the tree must be listed in vptr_list.h.
+template<class type>
+class __VPtr : public __TPtrBase
+{
+public:
+    // VPtr::_Type has to be a pointer as
+    // often the type is an abstract class.
+    // This type is not expected to be used anyway.
+    typedef type* _Type;
+    typedef type* _Ptr;
+    
+    __VPtr< type >(void) : __TPtrBase() {}
+    __VPtr< type >(TADDR addr) : __TPtrBase(addr) {}
+    explicit __VPtr< type >(__TPtrBase addr)
+    {
+        m_addr = addr.GetAddr();
+    }
+    explicit __VPtr< type >(type* host)
+    {
+        m_addr = DacGetTargetAddrForHostAddr(host, true);
+    }
+    
+    __VPtr< type >& operator=(const __TPtrBase& ptr)
+    {
+        m_addr = ptr.m_addr;
+        return *this;
+    }
+    __VPtr< type >& operator=(TADDR addr)
+    {
+        m_addr = addr;
+        return *this;
+    }
+
+    operator type*() const
+    {
+        return (type*)DacInstantiateClassByVTable(m_addr, sizeof(type), true);
+    }
+    type* operator->() const
+    {
+        return (type*)DacInstantiateClassByVTable(m_addr, sizeof(type), true);
+    }
+    
+    bool operator==(const __VPtr< type >& ptr) const
+    {
+        return m_addr == ptr.m_addr;
+    }
+    bool operator==(TADDR addr) const
+    {
+        return m_addr == addr;
+    }
+    bool operator!=(const __VPtr< type >& ptr) const
+    {
+        return !operator==(ptr);
+    }
+    bool operator!=(TADDR addr) const
+    {
+        return m_addr != addr;
+    }
+
+    bool IsValid(void) const
+    {
+        return m_addr &&
+            DacInstantiateClassByVTable(m_addr, sizeof(type), false) != NULL;
+    }
+    void EnumMem(void) const
+    {
+        if (IsValid())
+        {
+            DacEnumMemoryRegion(m_addr, (operator->())->VPtrSize());
+        }
+    }
+};
+
+#define VPTR(type) __VPtr< type >
+
+// Pointer wrapper for 8-bit strings.
+#ifdef DAC_CLR_ENVIRONMENT
+template<typename type, ULONG32 maxChars = 32760>
+#else
+template<typename type, UInt32 maxChars = 32760>
+#endif
+class __Str8Ptr : public __DPtr<char>
+{
+public:
+    typedef type _Type;
+    typedef type* _Ptr;
+
+    __Str8Ptr< type, maxChars >(void) : __DPtr<char>() {}
+    __Str8Ptr< type, maxChars >(TADDR addr) : __DPtr<char>(addr) {}
+    explicit __Str8Ptr< type, maxChars >(__TPtrBase addr)
+    {
+        m_addr = addr.GetAddr();
+    }
+    explicit __Str8Ptr< type, maxChars >(type* host)
+    {
+        m_addr = DacGetTargetAddrForHostAddr(host, true);
+    }
+
+    __Str8Ptr< type, maxChars >& operator=(const __TPtrBase& ptr)
+    {
+        m_addr = ptr.m_addr;
+        return *this;
+    }
+    __Str8Ptr< type, maxChars >& operator=(TADDR addr)
+    {
+        m_addr = addr;
+        return *this;
+    }
+
+    operator type*() const
+    {
+        return (type*)DacInstantiateStringA(m_addr, maxChars, true);
+    }
+
+    bool IsValid(void) const
+    {
+        return m_addr &&
+            DacInstantiateStringA(m_addr, maxChars, false) != NULL;
+    }
+    void EnumMem(void) const
+    {
+        char* str = DacInstantiateStringA(m_addr, maxChars, false);
+        if (str)
+        {
+            DacEnumMemoryRegion(m_addr, strlen(str) + 1);
+        }
+    }
+};
+
+#define S8PTR(type) __Str8Ptr< type >
+#define S8PTRMAX(type, maxChars) __Str8Ptr< type, maxChars >
+
+// Pointer wrapper for 16-bit strings.
+#ifdef DAC_CLR_ENVIRONMENT
+template<typename type, ULONG32 maxChars = 32760>
+#else
+template<typename type, UInt32 maxChars = 32760>
+#endif
+class __Str16Ptr : public __DPtr<wchar_t>
+{
+public:
+    typedef type _Type;
+    typedef type* _Ptr;
+
+    __Str16Ptr< type, maxChars >(void) : __DPtr<wchar_t>() {}
+    __Str16Ptr< type, maxChars >(TADDR addr) : __DPtr<wchar_t>(addr) {}
+    explicit __Str16Ptr< type, maxChars >(__TPtrBase addr)
+    {
+        m_addr = addr.GetAddr();
+    }
+    explicit __Str16Ptr< type, maxChars >(type* host)
+    {
+        m_addr = DacGetTargetAddrForHostAddr(host, true);
+    }
+
+    __Str16Ptr< type, maxChars >& operator=(const __TPtrBase& ptr)
+    {
+        m_addr = ptr.m_addr;
+        return *this;
+    }
+    __Str16Ptr< type, maxChars >& operator=(TADDR addr)
+    {
+        m_addr = addr;
+        return *this;
+    }
+
+    operator type*() const
+    {
+        return (type*)DacInstantiateStringW(m_addr, maxChars, true);
+    }
+
+    bool IsValid(void) const
+    {
+        return m_addr &&
+            DacInstantiateStringW(m_addr, maxChars, false) != NULL;
+    }
+    void EnumMem(void) const
+    {
+        char* str = DacInstantiateStringW(m_addr, maxChars, false);
+        if (str)
+        {
+            DacEnumMemoryRegion(m_addr, strlen(str) + 1);
+        }
+    }
+};
+
+#define S16PTR(type) __Str16Ptr< type >
+#define S16PTRMAX(type, maxChars) __Str16Ptr< type, maxChars >
+
+template<typename type>
+class __GlobalVal
+{
+public:
+#ifdef DAC_CLR_ENVIRONMENT
+    __GlobalVal< type >(PULONG rvaPtr)
+#else
+    __GlobalVal< type >(UInt32* rvaPtr)
+#endif
+    {
+        m_rvaPtr = rvaPtr;
+    }
+
+    operator type() const
+    {
+        return (type)*__DPtr< type >(DacGlobalBase() + *m_rvaPtr);
+    }
+
+    __DPtr< type > operator&() const
+    {
+        return __DPtr< type >(DacGlobalBase() + *m_rvaPtr);
+    }
+
+    // @dbgtodo rbyers dac support: This updates values in the host.  This seems extremely dangerous
+    // to do silently.  I'd prefer that a specific (searchable) write function
+    // was used.  Try disabling this and see what fails...
+    type & operator=(type & val)
+    {
+        type* ptr = __DPtr< type >(DacGlobalBase() + *m_rvaPtr);
+        // Update the host copy;
+        *ptr = val;
+        // Write back to the target.
+        DacWriteHostInstance(ptr, true);
+        return val;
+    }
+
+    bool IsValid(void) const
+    {
+        return __DPtr< type >(DacGlobalBase() + *m_rvaPtr).IsValid();
+    }
+    void EnumMem(void) const
+    {
+        TADDR p = DacGlobalBase() + *m_rvaPtr;
+        __DPtr< type >(p).EnumMem();
+    }
+
+private:
+#ifdef DAC_CLR_ENVIRONMENT
+    PULONG m_rvaPtr;
+#else
+    UInt32* m_rvaPtr;
+#endif
+};
+
+template<typename type, size_t size>
+class __GlobalArray
+{
+public:
+#ifdef DAC_CLR_ENVIRONMENT
+    __GlobalArray< type, size >(PULONG rvaPtr)
+#else
+    __GlobalArray< type, size >(UInt32* rvaPtr)
+#endif
+    {
+        m_rvaPtr = rvaPtr;
+    }
+
+    __DPtr< type > operator&() const
+    {
+        return __DPtr< type >(DacGlobalBase() + *m_rvaPtr);
+    }
+
+    type& operator[](unsigned int index) const
+    {
+        return __DPtr< type >(DacGlobalBase() + *m_rvaPtr)[index];
+    }
+
+    bool IsValid(void) const
+    {
+        // Only validates the base pointer, not the full array range.
+        return __DPtr< type >(DacGlobalBase() + *m_rvaPtr).IsValid();
+    }
+    void EnumMem(void) const
+    {
+        DacEnumMemoryRegion(DacGlobalBase() + *m_rvaPtr, sizeof(type) * size);
+    }
+
+private:
+#ifdef DAC_CLR_ENVIRONMENT
+    PULONG m_rvaPtr;
+#else
+    UInt32* m_rvaPtr;
+#endif
+};
+
+template<typename acc_type, typename store_type>
+class __GlobalPtr
+{
+public:
+#ifdef DAC_CLR_ENVIRONMENT
+    __GlobalPtr< acc_type, store_type >(PULONG rvaPtr)
+#else
+    __GlobalPtr< acc_type, store_type >(UInt32* rvaPtr)
+#endif
+    {
+        m_rvaPtr = rvaPtr;
+    }
+
+    __DPtr< store_type > operator&() const
+    {
+        return __DPtr< store_type >(DacGlobalBase() + *m_rvaPtr);
+    }
+
+    store_type & operator=(store_type & val)
+    {
+        store_type* ptr = __DPtr< store_type >(DacGlobalBase() + *m_rvaPtr);
+        // Update the host copy;
+        *ptr = val;
+        // Write back to the target.
+        DacWriteHostInstance(ptr, true);
+        return val;
+    }
+
+    acc_type operator->() const
+    {
+        return (acc_type)*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr);
+    }
+    operator acc_type() const
+    {
+        return (acc_type)*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr);
+    }
+    operator store_type() const
+    {
+        return *__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr);
+    }
+    bool operator!() const
+    {
+        return !*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr);
+    }
+
+    typename store_type operator[](int index) const
+    {
+        return (*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr))[index];
+    }
+
+    typename store_type operator[](unsigned int index) const
+    {
+        return (*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr))[index];
+    }
+
+    TADDR GetAddr() const
+    {
+        return (*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr)).GetAddr();
+    }
+
+    TADDR GetAddrRaw () const
+    {
+        return DacGlobalBase() + *m_rvaPtr;
+    }
+
+    // This is only testing the the pointer memory is available but does not verify
+    // the memory that it points to.
+    //
+    bool IsValidPtr(void) const
+    {
+        return __DPtr< store_type >(DacGlobalBase() + *m_rvaPtr).IsValid();
+    }
+
+    bool IsValid(void) const
+    {
+        return __DPtr< store_type >(DacGlobalBase() + *m_rvaPtr).IsValid() &&
+            (*__DPtr< store_type >(DacGlobalBase() + *m_rvaPtr)).IsValid();
+    }
+    void EnumMem(void) const
+    {
+        __DPtr< store_type > ptr(DacGlobalBase() + *m_rvaPtr);
+        ptr.EnumMem();
+        if (ptr.IsValid())
+        {
+            (*ptr).EnumMem();
+        }
+    }
+
+#ifdef DAC_CLR_ENVIRONMENT
+    PULONG m_rvaPtr;
+#else
+    UInt32* m_rvaPtr;
+#endif
+};
+
+template<typename acc_type, typename store_type>
+inline bool operator==(const __GlobalPtr<acc_type, store_type>& gptr,
+                       acc_type host)
+{
+    return DacGetTargetAddrForHostAddr(host, true) ==
+        *__DPtr< TADDR >(DacGlobalBase() + *gptr.m_rvaPtr);
+}
+template<typename acc_type, typename store_type>
+inline bool operator!=(const __GlobalPtr<acc_type, store_type>& gptr,
+                       acc_type host)
+{
+    return !operator==(gptr, host);
+}
+
+template<typename acc_type, typename store_type>
+inline bool operator==(acc_type host,
+                       const __GlobalPtr<acc_type, store_type>& gptr)
+{
+    return DacGetTargetAddrForHostAddr(host, true) ==
+        *__DPtr< TADDR >(DacGlobalBase() + *gptr.m_rvaPtr);
+}
+template<typename acc_type, typename store_type>
+inline bool operator!=(acc_type host,
+                       const __GlobalPtr<acc_type, store_type>& gptr)
+{
+    return !operator==(host, gptr);
+}
+
+
+//
+// __VoidPtr is a type that behaves like void* but for target pointers.
+// Behavior of PTR_VOID:
+// * has void* semantics. Will compile to void* in non-DAC builds (just like
+//     other PTR types. Unlike TADDR, we want pointer semantics.
+// * NOT assignable from host pointer types or convertible to host pointer
+//     types - ensures we can't confuse host and target pointers (we'll get
+//     compiler errors if we try and cast between them).
+// * like void*, no pointer arithmetic or dereferencing is allowed
+// * like TADDR, can be used to construct any __DPtr / __VPtr instance
+// * representation is the same as a void* (for marshalling / casting)
+//
+// One way in which __VoidPtr is unlike void* is that it can't be cast to
+// pointer or integer types. On the one hand, this is a good thing as it forces
+// us to keep target pointers separate from other data types. On the other hand
+// in practice this means we have to use dac_cast<TADDR> in places where we used
+// to use a (TADDR) cast. Unfortunately C++ provides us no way to allow the
+// explicit cast to primitive types without also allowing implicit conversions.
+//
+// This is very similar in spirit to TADDR. The primary difference is that
+// PTR_VOID has pointer semantics, where TADDR has integer semantics. When
+// dacizing uses of void* to TADDR, casts must be inserted everywhere back to
+// pointer types. If we switch a use of TADDR to PTR_VOID, those casts in
+// DACCESS_COMPILE regions no longer compile (see above). Also, TADDR supports
+// pointer arithmetic, but that might not be necessary (could use PTR_BYTE
+// instead etc.). Ideally we'd probably have just one type for this purpose
+// (named TADDR but with the semantics of PTR_VOID), but outright conversion
+// would require too much work.
+//
+
+template <>
+class __DPtr<void> : public __ComparableTPtrBase
+{
+public:
+    __DPtr(void) : __ComparableTPtrBase() {}
+    __DPtr(TADDR addr) : __ComparableTPtrBase(addr) {}
+
+    // Note, unlike __DPtr, this ctor form is not explicit.  We allow implicit
+    // conversions from any pointer type (just like for void*).
+    __DPtr(__TPtrBase addr)
+    {
+        m_addr = addr.GetAddr();
+    }
+
+    // Like TPtrBase, VoidPtrs can also be created impicitly from all GlobalPtrs
+    template<typename acc_type, typename store_type>
+    __DPtr(__GlobalPtr<acc_type, store_type> globalPtr)
+    {
+        m_addr = globalPtr.GetAddr();
+    }
+
+    // Note, unlike __DPtr, there is no explicit conversion from host pointer
+    // types.  Since void* cannot be marshalled, there is no such thing as
+    // a void* DAC instance in the host.
+
+    // Also, we don't want an implicit conversion to TADDR because then the
+    // compiler will allow pointer arithmetic (which it wouldn't allow for
+    // void*).  Instead, callers can use dac_cast<TADDR> if they want.
+
+    // Note, unlike __DPtr, any pointer type can be assigned to a __DPtr
+    // This is to mirror the assignability of any pointer type to a void*
+    __DPtr& operator=(const __TPtrBase& ptr)
+    {
+        m_addr = ptr.GetAddr();
+        return *this;
+    }
+    __DPtr& operator=(TADDR addr)
+    {
+        m_addr = addr;
+        return *this;
+    }
+    
+    // note, no marshalling operators (type* conversion, operator ->, operator*)
+    // A void* can't be marshalled because we don't know how much to copy
+
+    // PTR_Void can be compared to any other pointer type (because conceptually,
+    // any other pointer type should be implicitly convertible to void*)
+    using __ComparableTPtrBase::operator==;
+    using __ComparableTPtrBase::operator!=;
+    using __ComparableTPtrBase::operator<;
+    using __ComparableTPtrBase::operator>;
+    using __ComparableTPtrBase::operator<=;
+    using __ComparableTPtrBase::operator>=;
+    bool operator==(TADDR addr) const
+    {
+        return m_addr == addr;
+    }
+    bool operator!=(TADDR addr) const
+    {
+        return m_addr != addr;
+    }
+};
+
+typedef __DPtr<void> __VoidPtr;
+typedef __VoidPtr PTR_VOID;
+typedef DPTR(PTR_VOID) PTR_PTR_VOID;
+
+// For now we treat pointers to const and non-const void the same in DAC
+// builds. In general, DAC is read-only anyway and so there isn't a danger of
+// writing to these pointers. Also, the non-dac builds will ensure
+// const-correctness. However, if we wanted to support true void* / const void*
+// behavior, we could probably build the follow functionality by templating
+// __VoidPtr:
+//  * A PTR_VOID would be implicitly convertable to PTR_CVOID
+//  * An explicit coercion (ideally const_cast) would be required to convert a
+//      PTR_CVOID to a PTR_VOID
+//  * Similarily, an explicit coercion would be required to convert a cost PTR
+//      type (eg. PTR_CBYTE) to a PTR_VOID.
+typedef __VoidPtr PTR_CVOID;
+
+
+// The special empty ctor declared here allows the whole
+// class hierarchy to be instantiated easily by the
+// external access code.  The actual class body will be
+// read externally so no members should be initialized.
+
+// Safe access for retrieving the target address of a PTR.
+#define PTR_TO_TADDR(ptr) ((ptr).GetAddr())
+
+#define GFN_TADDR(name) (DacGlobalBase() + g_dacGlobals.fn__ ## name)
+
+// ROTORTODO - g++ 3 doesn't like the use of the operator& in __GlobalVal
+// here. Putting GVAL_ADDR in to get things to compile while I discuss
+// this matter with the g++ authors.
+
+#define GVAL_ADDR(g) \
+    ((g).operator&())
+
+//
+// References to class static and global data.
+// These all need to be redirected through the global
+// data table.
+//
+
+#define _SPTR_DECL(acc_type, store_type, var) \
+    static __GlobalPtr< acc_type, store_type > var
+#define _SPTR_IMPL(acc_type, store_type, cls, var) \
+    __GlobalPtr< acc_type, store_type > cls::var(&g_dacGlobals.cls##__##var)
+#define _SPTR_IMPL_INIT(acc_type, store_type, cls, var, init) \
+    __GlobalPtr< acc_type, store_type > cls::var(&g_dacGlobals.cls##__##var)
+#define _SPTR_IMPL_NS(acc_type, store_type, ns, cls, var) \
+    __GlobalPtr< acc_type, store_type > cls::var(&g_dacGlobals.ns##__##cls##__##var)
+#define _SPTR_IMPL_NS_INIT(acc_type, store_type, ns, cls, var, init) \
+    __GlobalPtr< acc_type, store_type > cls::var(&g_dacGlobals.ns##__##cls##__##var)
+
+#define _GPTR_DECL(acc_type, store_type, var) \
+    extern __GlobalPtr< acc_type, store_type > var
+#define _GPTR_IMPL(acc_type, store_type, var) \
+    __GlobalPtr< acc_type, store_type > var(&g_dacGlobals.dac__##var)
+#define _GPTR_IMPL_INIT(acc_type, store_type, var, init) \
+    __GlobalPtr< acc_type, store_type > var(&g_dacGlobals.dac__##var)
+
+#define SVAL_DECL(type, var) \
+    static __GlobalVal< type > var
+#define SVAL_IMPL(type, cls, var) \
+    __GlobalVal< type > cls::var(&g_dacGlobals.cls##__##var)
+#define SVAL_IMPL_INIT(type, cls, var, init) \
+    __GlobalVal< type > cls::var(&g_dacGlobals.cls##__##var)
+#define SVAL_IMPL_NS(type, ns, cls, var) \
+    __GlobalVal< type > cls::var(&g_dacGlobals.ns##__##cls##__##var)
+#define SVAL_IMPL_NS_INIT(type, ns, cls, var, init) \
+    __GlobalVal< type > cls::var(&g_dacGlobals.ns##__##cls##__##var)
+
+#define GVAL_DECL(type, var) \
+    extern __GlobalVal< type > var
+#define GVAL_IMPL(type, var) \
+    __GlobalVal< type > var(&g_dacGlobals.dac__##var)
+#define GVAL_IMPL_INIT(type, var, init) \
+    __GlobalVal< type > var(&g_dacGlobals.dac__##var)
+
+#define GARY_DECL(type, var, size) \
+    extern __GlobalArray< type, size > var
+#define GARY_IMPL(type, var, size) \
+    __GlobalArray< type, size > var(&g_dacGlobals.dac__##var)
+
+// Translation from a host pointer back to the target address
+// that was used to retrieve the data for the host pointer.
+#define PTR_HOST_TO_TADDR(host) DacGetTargetAddrForHostAddr(host, true)
+
+// Translation from a host interior pointer back to the corresponding
+// target address. The host address must reside within a previously
+// retrieved instance.
+#define PTR_HOST_INT_TO_TADDR(host) DacGetTargetAddrForHostInteriorAddr(host, true)
+
+// Construct a pointer to a member of the given type.
+#define PTR_HOST_MEMBER_TADDR(type, host, memb) \
+    (PTR_HOST_TO_TADDR(host) + (TADDR)offsetof(type, memb))
+
+// in the DAC build this is still typed TADDR, but in the runtime
+// build it preserves the member type.
+#define PTR_HOST_MEMBER(type, host, memb) \
+    (PTR_HOST_TO_TADDR(host) + (TADDR)offsetof(type, memb))
+
+// Construct a pointer to a member of the given type given an interior
+// host address.
+#define PTR_HOST_INT_MEMBER_TADDR(type, host, memb) \
+    (PTR_HOST_INT_TO_TADDR(host) + (TADDR)offsetof(type, memb))
+
+#define PTR_TO_MEMBER_TADDR(type, ptr, memb) \
+    (PTR_TO_TADDR(ptr) + (TADDR)offsetof(type, memb))
+
+// in the DAC build this is still typed TADDR, but in the runtime
+// build it preserves the member type.
+#define PTR_TO_MEMBER(type, ptr, memb) \
+    (PTR_TO_TADDR(ptr) + (TADDR)offsetof(type, memb))
+
+// Constructs an arbitrary data instance for a piece of
+// memory in the target.
+#define PTR_READ(addr, size) \
+    DacInstantiateTypeByAddress(addr, size, true)
+
+// This value is used to initialize target pointers to NULL.  We want this to be TADDR type
+// (as opposed to, say, __TPtrBase) so that it can be used in the non-explicit ctor overloads,
+// eg. as an argument default value.
+// We can't always just use NULL because that's 0 which (in C++) can be any integer or pointer
+// type (causing an ambiguous overload compiler error when used in explicit ctor forms).
+#define PTR_NULL ((TADDR)0)
+
+// Provides an empty method implementation when compiled
+// for DACCESS_COMPILE.  For example, use to stub out methods needed
+// for vtable entries but otherwise unused.
+// Note that these functions are explicitly NOT marked SUPPORTS_DAC so that we'll get a
+// DacCop warning if any calls to them are detected.
+// @dbgtodo rbyers: It's probably almost always wrong to call any such function, so
+// we should probably throw a better error (DacNotImpl), and ideally mark the function
+// DECLSPEC_NORETURN so we don't have to deal with fabricating return values and we can
+// get compiler warnings (unreachable code) anytime functions marked this way are called.
+#define DAC_EMPTY() { LEAF_CONTRACT; }
+#define DAC_EMPTY_ERR() { LEAF_CONTRACT; DacError(E_UNEXPECTED); }
+#define DAC_EMPTY_RET(retVal) { LEAF_CONTRACT; DacError(E_UNEXPECTED); return retVal; }
+#define DAC_UNEXPECTED() { LEAF_CONTRACT; DacError_NoRet(E_UNEXPECTED); }
+
+#endif // __cplusplus
+
+HRESULT DacGetTargetAddrForHostAddr(const void* ptr, TADDR * pTADDR);
+
+// Implementation details for dac_cast, should never be accessed directly.
+// See code:dac_cast for details and discussion.
+namespace dac_imp
+{
+    //---------------------------------------------
+    // Conversion to TADDR
+
+    // Forward declarations.
+    template <typename>
+    struct conversionHelper;
+
+    template <typename T>
+    TADDR getTaddr(T&& val);
+
+    // Helper structs to get the target address of specific types
+
+    // This non-specialized struct handles all instances of asTADDR that don't
+    // take partially-specialized arguments.
+    template <typename T>
+    struct conversionHelper
+    {
+        inline static TADDR asTADDR(__TPtrBase const & tptr)
+        { return PTR_TO_TADDR(tptr); }
+
+        inline static TADDR asTADDR(TADDR addr)
+        { return addr; }
+    };
+
+    // Handles 
+    template <typename TypeT>
+    struct conversionHelper<TypeT * &>
+    {
+        inline static TADDR asTADDR(TypeT * src)
+        {
+            TADDR addr = 0;
+            if (DacGetTargetAddrForHostAddr(src, &addr) != S_OK)
+                addr = DacGetTargetAddrForHostInteriorAddr(src, true);
+            return addr;
+        }
+    };
+
+    template<typename acc_type, typename store_type>
+    struct conversionHelper<__GlobalPtr<acc_type, store_type> const & >
+    {
+        inline static TADDR asTADDR(__GlobalPtr<acc_type, store_type> const & gptr)
+        { return PTR_TO_TADDR(gptr); }
+    };
+
+    // It is an error to try dac_cast on a __GlobalVal or a __GlobalArray.
+    template<typename TypeT>
+    struct conversionHelper< __GlobalVal<TypeT> const & >
+    {
+        inline static TADDR asTADDR(__GlobalVal<TypeT> const & gval)
+        { static_assert(false, "Cannot use dac_cast on a __GlobalVal; first you must get its address using the '&' operator."); }
+    };
+
+    template<typename TypeT, size_t size>
+    struct conversionHelper< __GlobalArray<TypeT, size> const & >
+    {
+        inline static TADDR asTADDR(__GlobalArray<TypeT, size> const & garr)
+        { static_assert(false, "Cannot use dac_cast on a __GlobalArray; first you must get its address using the '&' operator."); }
+    };
+
+    // This is the main helper function, and it delegates to the above helper functions.
+    // NOTE: this works because of C++0x reference collapsing rules for rvalue reference
+    // arguments in template functions.
+    template <typename T>
+    TADDR getTaddr(T&& val)
+    { return conversionHelper<T>::asTADDR(val); }
+
+    //---------------------------------------------
+    // Conversion to DAC instance
+
+    // Helper class to instantiate DAC instances from a TADDR
+    // The default implementation assumes we want to create an instance of a PTR type
+    template <typename T>
+    struct makeDacInst
+    {
+        // First constructing a __TPtrBase and then constructing the target type
+        // ensures that the target type can construct itself from a __TPtrBase.
+        // This also prevents unknown user conversions from producing incorrect
+        // results (since __TPtrBase can only be constructed from TADDR values).
+        static inline T fromTaddr(TADDR addr)
+        { return T(__TPtrBase(addr)); }
+    };
+
+    // Specialization for creating TADDRs from TADDRs.
+    template<> struct makeDacInst<TADDR>
+    {
+        static inline TADDR fromTaddr(TADDR addr) { return addr; }
+    };
+
+    // Partial specialization for creating host instances.
+    template <typename T>
+    struct makeDacInst<T *>
+    {
+        static inline T * fromTaddr(TADDR addr)
+        { return makeDacInst<DPTR(T)>::fromTaddr(addr); }
+    };
+
+    /*
+    struct Yes { char c[2]; };
+    struct No { char c; };
+    Yes& HasTPtrBase(__TPtrBase const *, );
+    No& HasTPtrBase(...);
+
+    template <typename T>
+    typename rh::std::enable_if<
+        sizeof(HasTPtrBase(typename rh::std::remove_reference<T>::type *)) == sizeof(Yes),
+        T>::type
+    makeDacInst(TADDR addr)
+    */
+
+} // namespace dac_imp
+
+// DacCop in-line exclusion mechanism
+
+// Warnings - official home is DacCop\Shared\Warnings.cs, but we want a way for users to indicate
+// warning codes in a way that is descriptive to readers (not just code numbers).  The names here
+// don't matter - DacCop just looks at the value
+enum DacCopWarningCode
+{
+    // General Rules
+    FieldAccess = 1,
+    PointerArith = 2,
+    PointerComparison = 3,
+    InconsistentMarshalling = 4,
+    CastBetweenAddressSpaces = 5,
+    CastOfMarshalledType = 6,
+    VirtualCallToNonVPtr = 7,
+    UndacizedGlobalVariable = 8,
+
+    // Function graph related
+    CallUnknown = 701,
+    CallNonDac = 702,
+    CallVirtualUnknown = 704,
+    CallVirtualNonDac = 705,
+};
+
+// DACCOP_IGNORE is a mechanism to suppress DacCop violations from within the source-code.
+// See the DacCop wiki for guidance on how best to use this: http://mswikis/clr/dev/Pages/DacCop.aspx
+//
+// DACCOP_IGNORE will suppress a DacCop violation for the following (non-compound) statement.
+// For example:
+//      // The "dual-mode DAC problem" occurs in a few places where a class is used both
+//      // in the host, and marshalled from the target ... <further details>
+//      DACCOP_IGNORE(CastBetweenAddressSpaces,"SBuffer has the dual-mode DAC problem");
+//      TADDR bufAddr = (TADDR)m_buffer;
+//
+// A call to DACCOP_IGNORE must occur as it's own statement, and can apply only to following
+// single-statements (not to compound statement blocks).  Occasionally it is necessary to hoist
+// violation-inducing code out to its own statement (e.g., if it occurs in the conditional of an
+// if).
+//
+// Arguments:
+//   code: a literal value from DacCopWarningCode indicating which violation should be suppressed.
+//   szReasonString: a short description of why this exclusion is necessary.  This is intended just
+//        to help readers of the code understand the source of the problem, and what would be required
+//        to fix it.  More details can be provided in comments if desired.
+//
+inline void DACCOP_IGNORE(DacCopWarningCode code, const char * szReasonString)
+{
+    UNREFERENCED_PARAMETER(code);
+    UNREFERENCED_PARAMETER(szReasonString);
+    // DacCop detects calls to this function.  No implementation is necessary.
+}
+
+#else // !DACCESS_COMPILE
+
+//
+// This version of the macros turns into normal pointers
+// for unmodified in-proc compilation.
+
+// *******************************************************
+// !!!!!!!!!!!!!!!!!!!!!!!!!NOTE!!!!!!!!!!!!!!!!!!!!!!!!!!
+//
+// Please search this file for the type name to find the
+// DAC versions of these definitions
+//
+// !!!!!!!!!!!!!!!!!!!!!!!!!NOTE!!!!!!!!!!!!!!!!!!!!!!!!!!
+// *******************************************************
+
+// Declare TADDR as a non-pointer type so that arithmetic
+// can be done on it directly, as with the DACCESS_COMPILE definition.
+// This also helps expose pointer usage that may need to be changed.
+typedef UIntNative TADDR;
+
+typedef void* PTR_VOID;
+typedef void** PTR_PTR_VOID;
+
+#define DPTR(type) type*
+#define ArrayDPTR(type) type*
+#define SPTR(type) type*
+#define VPTR(type) type*
+#define S8PTR(type) type*
+#define S8PTRMAX(type, maxChars) type*
+#define S16PTR(type) type*
+#define S16PTRMAX(type, maxChars) type*
+
+#ifndef __GCENV_BASE_INCLUDED__
+#define PTR_TO_TADDR(ptr) (reinterpret_cast<TADDR>(ptr))
+#endif // __GCENV_BASE_INCLUDED__
+#define GFN_TADDR(name) (reinterpret_cast<TADDR>(&(name)))
+
+#define GVAL_ADDR(g) (&(g))
+#define _SPTR_DECL(acc_type, store_type, var) \
+    static store_type var
+#define _SPTR_IMPL(acc_type, store_type, cls, var) \
+    store_type cls::var
+#define _SPTR_IMPL_INIT(acc_type, store_type, cls, var, init) \
+    store_type cls::var = init
+#define _SPTR_IMPL_NS(acc_type, store_type, ns, cls, var) \
+    store_type cls::var
+#define _SPTR_IMPL_NS_INIT(acc_type, store_type, ns, cls, var, init) \
+    store_type cls::var = init
+#define _GPTR_DECL(acc_type, store_type, var) \
+    extern store_type var
+#define _GPTR_IMPL(acc_type, store_type, var) \
+    store_type var
+#define _GPTR_IMPL_INIT(acc_type, store_type, var, init) \
+    store_type var = init
+#define SVAL_DECL(type, var) \
+    static type var
+#define SVAL_IMPL(type, cls, var) \
+    type cls::var
+#define SVAL_IMPL_INIT(type, cls, var, init) \
+    type cls::var = init
+#define SVAL_IMPL_NS(type, ns, cls, var) \
+    type cls::var
+#define SVAL_IMPL_NS_INIT(type, ns, cls, var, init) \
+    type cls::var = init
+#define GVAL_DECL(type, var) \
+    extern type var
+#define GVAL_IMPL(type, var) \
+    type var
+#define GVAL_IMPL_INIT(type, var, init) \
+    type var = init
+#define GARY_DECL(type, var, size) \
+    extern type var[size]
+#define GARY_IMPL(type, var, size) \
+    type var[size]
+#define PTR_HOST_TO_TADDR(host) (reinterpret_cast<TADDR>(host))
+#define PTR_HOST_INT_TO_TADDR(host) ((TADDR)(host))
+#define VPTR_HOST_VTABLE_TO_TADDR(host) (reinterpret_cast<TADDR>(host))
+#define PTR_HOST_MEMBER_TADDR(type, host, memb) (reinterpret_cast<TADDR>(&(host)->memb))
+#define PTR_HOST_MEMBER(type, host, memb) (&((host)->memb))
+#define PTR_HOST_INT_MEMBER_TADDR(type, host, memb) ((TADDR)&(host)->memb)
+#define PTR_TO_MEMBER_TADDR(type, ptr, memb) (reinterpret_cast<TADDR>(&((ptr)->memb)))
+#define PTR_TO_MEMBER(type, ptr, memb) (&((ptr)->memb))
+#define PTR_READ(addr, size) (reinterpret_cast<void*>(addr))
+
+#define PTR_NULL NULL
+
+#define DAC_EMPTY()
+#define DAC_EMPTY_ERR()
+#define DAC_EMPTY_RET(retVal)
+#define DAC_UNEXPECTED()
+
+#define DACCOP_IGNORE(warningCode, reasonString)
+
+#endif // !DACCESS_COMPILE
+
+//----------------------------------------------------------------------------
+// dac_cast
+// Casting utility, to be used for casting one class pointer type to another.
+// Use as you would use static_cast
+//
+// dac_cast is designed to act just as static_cast does when
+// dealing with pointers and their DAC abstractions. Specifically,
+// it handles these coversions:
+//
+//      dac_cast<TargetType>(SourceTypeVal)
+//
+// where TargetType <- SourceTypeVal are
+//
+//      ?PTR(Tgt) <- TADDR     - Create PTR type (DPtr etc.) from TADDR
+//      ?PTR(Tgt) <- ?PTR(Src) - Convert one PTR type to another
+//      ?PTR(Tgt) <- Src *     - Create PTR type from dac host object instance
+//      TADDR <- ?PTR(Src)     - Get TADDR of PTR object (DPtr etc.)
+//      TADDR <- Src *         - Get TADDR of dac host object instance
+//
+// Note that there is no direct convertion to other host-pointer types (because we don't
+// know if you want a DPTR or VPTR etc.).  However, due to the implicit DAC conversions,
+// you can just use dac_cast<PTR_Foo> and assign that to a Foo*.
+//
+// The beauty of this syntax is that it is consistent regardless
+// of source and target casting types. You just use dac_cast
+// and the partial template specialization will do the right thing.
+//
+// One important thing to realise is that all "Foo *" types are
+// assumed to be pointers to host instances that were marshalled by DAC.  This should
+// fail at runtime if it's not the case.
+//
+// Some examples would be:
+//
+//   - Host pointer of one type to a related host pointer of another
+//     type, i.e., MethodDesc * <-> InstantiatedMethodDesc *
+//     Syntax: with MethodDesc *pMD, InstantiatedMethodDesc *pInstMD
+//             pInstMd = dac_cast<PTR_InstantiatedMethodDesc>(pMD)
+//             pMD = dac_cast<PTR_MethodDesc>(pInstMD)
+//
+//   - (D|V)PTR of one encapsulated pointer type to a (D|V)PTR of
+//     another type, i.e., PTR_AppDomain <-> PTR_BaseDomain
+//     Syntax: with PTR_AppDomain pAD, PTR_BaseDomain pBD
+//             dac_cast<PTR_AppDomain>(pBD)
+//             dac_cast<PTR_BaseDomain>(pAD)
+//
+// Example comparsions of some old and new syntax, where
+//    h is a host pointer, such as "Foo *h;"
+//    p is a DPTR, such as "PTR_Foo p;"
+//
+//      PTR_HOST_TO_TADDR(h)           ==> dac_cast<TADDR>(h)
+//      PTR_TO_TADDR(p)                ==> dac_cast<TADDR>(p)
+//      PTR_Foo(PTR_HOST_TO_TADDR(h))  ==> dac_cast<PTR_Foo>(h)
+//
+//----------------------------------------------------------------------------
+template <typename Tgt, typename Src>
+inline Tgt dac_cast(Src src)
+{
+#ifdef DACCESS_COMPILE 
+    // In DAC builds, first get a TADDR for the source, then create the
+    // appropriate destination instance.
+    TADDR addr = dac_imp::getTaddr(src);
+    return dac_imp::makeDacInst<Tgt>::fromTaddr(addr);
+#else // !DACCESS_COMPILE
+    // In non-DAC builds, dac_cast is the same as a C-style cast because we need to support:
+    //  - casting away const
+    //  - conversions between pointers and TADDR
+    // Perhaps we should more precisely restrict it's usage, but we get the precise
+    // restrictions in DAC builds, so it wouldn't buy us much.
+    return (Tgt)(src);
+#endif // !DACCESS_COMPILE
+}
+
+//----------------------------------------------------------------------------
+//
+// Convenience macros which work for either mode.
+//
+//----------------------------------------------------------------------------
+
+#define SPTR_DECL(type, var) _SPTR_DECL(type*, PTR_##type, var)
+#define SPTR_IMPL(type, cls, var) _SPTR_IMPL(type*, PTR_##type, cls, var)
+#define SPTR_IMPL_INIT(type, cls, var, init) _SPTR_IMPL_INIT(type*, PTR_##type, cls, var, init)
+#define SPTR_IMPL_NS(type, ns, cls, var) _SPTR_IMPL_NS(type*, PTR_##type, ns, cls, var)
+#define SPTR_IMPL_NS_INIT(type, ns, cls, var, init) _SPTR_IMPL_NS_INIT(type*, PTR_##type, ns, cls, var, init)
+#define GPTR_DECL(type, var) _GPTR_DECL(type*, PTR_##type, var)
+#define GPTR_IMPL(type, var) _GPTR_IMPL(type*, PTR_##type, var)
+#define GPTR_IMPL_INIT(type, var, init) _GPTR_IMPL_INIT(type*, PTR_##type, var, init)
+
+// If you want to marshal a single instance of an ArrayDPtr over to the host and
+// return a pointer to it, you can use this function.  However, this is unsafe because
+// users of value may assume they can do pointer arithmetic on it.  This is exactly
+// the bugs ArrayDPtr is designed to prevent.  See code:__ArrayDPtr for details.
+template<typename type>
+inline type* DacUnsafeMarshalSingleElement( ArrayDPTR(type) arrayPtr )
+{
+    return (DPTR(type))(arrayPtr);
+}
+
+typedef DPTR(Int8)          PTR_Int8;
+typedef DPTR(Int16)         PTR_Int16;
+typedef DPTR(Int32)         PTR_Int32;
+typedef DPTR(Int64)         PTR_Int64;
+typedef ArrayDPTR(UInt8)    PTR_UInt8;
+typedef DPTR(PTR_UInt8)     PTR_PTR_UInt8;
+typedef DPTR(PTR_PTR_UInt8) PTR_PTR_PTR_UInt8;
+typedef DPTR(UInt16)        PTR_UInt16;
+typedef DPTR(UInt32)        PTR_UInt32;
+typedef DPTR(UInt64)        PTR_UInt64;
+typedef DPTR(UIntNative)    PTR_UIntNative;
+
+typedef DPTR(size_t)  PTR_size_t;
+
+typedef UInt8               Code;
+typedef DPTR(Code)          PTR_Code;
+typedef DPTR(PTR_Code)      PTR_PTR_Code;
+
+#if defined(DACCESS_COMPILE) && defined(DAC_CLR_ENVIRONMENT)
+#include <corhdr.h>
+#include <clrdata.h>
+//#include <xclrdata.h>
+#endif // defined(DACCESS_COMPILE) && defined(DAC_CLR_ENVIRONMENT)
+
+//----------------------------------------------------------------------------
+// PCODE is pointer to any executable code.
+typedef TADDR PCODE;
+typedef DPTR(TADDR) PTR_PCODE;
+
+//----------------------------------------------------------------------------
+//
+// The access code compile must compile data structures that exactly
+// match the real structures for access to work.  The access code
+// doesn't want all of the debugging validation code, though, so
+// distinguish between _DEBUG, for declaring general debugging data
+// and always-on debug code, and _DEBUG_IMPL, for debugging code
+// which will be disabled when compiling for external access.
+//
+//----------------------------------------------------------------------------
+
+#if !defined(_DEBUG_IMPL) && defined(_DEBUG) && !defined(DACCESS_COMPILE) 
+#define _DEBUG_IMPL 1
+#endif
+
+// Helper macro for tracking EnumMemoryRegions progress.
+#if 0 
+#define EMEM_OUT(args) DacWarning args
+#else // !0
+#define EMEM_OUT(args)
+#endif // !0
+
+// TARGET_CONSISTENCY_CHECK represents a condition that should not fail unless the DAC target is corrupt.
+// This is in contrast to ASSERTs in DAC infrastructure code which shouldn't fail regardless of the memory
+// read from the target.  At the moment we treat these the same, but in the future we will want a mechanism
+// for disabling just the target consistency checks (eg. for tests that intentionally use corrupted targets).
+// @dbgtodo rbyers: Separating asserts and target consistency checks is tracked by DevDiv Bugs 31674
+#define TARGET_CONSISTENCY_CHECK(expr,msg) _ASSERTE_MSG(expr,msg)
+
+#ifdef DACCESS_COMPILE
+#define NO_DAC() static_assert(false, "Cannot use this method in builds DAC: " __FILE__ ":" __LINE__)
+#else
+#define NO_DAC() do {} while (0)
+#endif
+
+#endif // !__daccess_h__
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/eetype.h b/src/coreclr/src/nativeaot/Runtime/inc/eetype.h
new file mode 100644
index 0000000000000..35e4a64294bc8
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/eetype.h
@@ -0,0 +1,421 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// Fundamental runtime type representation
+
+#pragma warning(push)
+#pragma warning(disable:4200) // nonstandard extension used : zero-sized array in struct/union
+//-------------------------------------------------------------------------------------------------
+// Forward declarations
+
+class EEType;
+class OptionalFields;
+class TypeManager;
+struct TypeManagerHandle;
+class DynamicModule;
+struct EETypeRef;
+
+#if !defined(USE_PORTABLE_HELPERS)
+#define SUPPORTS_WRITABLE_DATA 1
+#endif
+
+//-------------------------------------------------------------------------------------------------
+// Array of these represents the interfaces implemented by a type
+
+class EEInterfaceInfo
+{
+  public:
+    EEType * GetInterfaceEEType()
+    {
+        return ((UIntTarget)m_pInterfaceEEType & ((UIntTarget)1)) ?
+               *(EEType**)((UIntTarget)m_ppInterfaceEETypeViaIAT & ~((UIntTarget)1)) :
+               m_pInterfaceEEType;
+    }
+
+  private:
+    union
+    {
+        EEType *    m_pInterfaceEEType;         // m_uFlags == InterfaceFlagNormal
+        EEType **   m_ppInterfaceEETypeViaIAT;  // m_uFlags == InterfaceViaIATFlag
+    };
+};
+
+//-------------------------------------------------------------------------------------------------
+// The subset of TypeFlags that Redhawk knows about at runtime
+// This should match the TypeFlags enum in the managed type system.
+enum EETypeElementType : UInt8
+{
+    // Primitive
+    ElementType_Unknown = 0x00,
+    ElementType_Void = 0x01,
+    ElementType_Boolean = 0x02,
+    ElementType_Char = 0x03,
+    ElementType_SByte = 0x04,
+    ElementType_Byte = 0x05,
+    ElementType_Int16 = 0x06,
+    ElementType_UInt16 = 0x07,
+    ElementType_Int32 = 0x08,
+    ElementType_UInt32 = 0x09,
+    ElementType_Int64 = 0x0A,
+    ElementType_UInt64 = 0x0B,
+    ElementType_IntPtr = 0x0C,
+    ElementType_UIntPtr = 0x0D,
+    ElementType_Single = 0x0E,
+    ElementType_Double = 0x0F,
+
+    ElementType_ValueType = 0x10,
+    // Enum = 0x11, // EETypes store enums as their underlying type
+    ElementType_Nullable = 0x12,
+    // Unused 0x13,
+
+    ElementType_Class = 0x14,
+    ElementType_Interface = 0x15,
+
+    ElementType_SystemArray = 0x16, // System.Array type
+
+    ElementType_Array = 0x17,
+    ElementType_SzArray = 0x18,
+    ElementType_ByRef = 0x19,
+    ElementType_Pointer = 0x1A,
+};
+
+//-------------------------------------------------------------------------------------------------
+// Support for encapsulating the location of fields in the EEType that have variable offsets or may be
+// optional.
+//
+// The following enumaration gives symbolic names for these fields and is used with the GetFieldPointer() and
+// GetFieldOffset() APIs.
+enum EETypeField
+{
+    ETF_InterfaceMap,
+    ETF_TypeManagerIndirection,
+    ETF_WritableData,
+    ETF_Finalizer,
+    ETF_OptionalFieldsPtr,
+    ETF_SealedVirtualSlots,
+    ETF_DynamicTemplateType,
+    ETF_DynamicDispatchMap,
+    ETF_DynamicModule,
+    ETF_GenericDefinition,
+    ETF_GenericComposition,
+    ETF_DynamicGcStatics,
+    ETF_DynamicNonGcStatics,
+    ETF_DynamicThreadStaticOffset,
+};
+
+//-------------------------------------------------------------------------------------------------
+// Fundamental runtime type representation
+typedef DPTR(class EEType) PTR_EEType;
+typedef DPTR(PTR_EEType) PTR_PTR_EEType;
+typedef DPTR(class OptionalFields) PTR_OptionalFields;
+typedef DPTR(PTR_OptionalFields) PTR_PTR_OptionalFields;
+
+class EEType
+{
+    friend class AsmOffsets;
+
+private:
+    struct RelatedTypeUnion
+    {
+        union 
+        {
+            // Kinds.CanonicalEEType
+            EEType*     m_pBaseType;
+            EEType**    m_ppBaseTypeViaIAT;
+
+            // Kinds.ClonedEEType
+            EEType** m_pCanonicalType;
+            EEType** m_ppCanonicalTypeViaIAT;
+
+            // Kinds.ParameterizedEEType
+            EEType*  m_pRelatedParameterType;
+            EEType** m_ppRelatedParameterTypeViaIAT;
+        };
+    };
+
+    UInt16              m_usComponentSize;
+    UInt16              m_usFlags;
+    UInt32              m_uBaseSize;
+    RelatedTypeUnion    m_RelatedType;
+    UInt16              m_usNumVtableSlots;
+    UInt16              m_usNumInterfaces;
+    UInt32              m_uHashCode;
+
+    TgtPTR_Void         m_VTable[];  // make this explicit so the binder gets the right alignment
+
+    // after the m_usNumVtableSlots vtable slots, we have m_usNumInterfaces slots of 
+    // EEInterfaceInfo, and after that a couple of additional pointers based on whether the type is
+    // finalizable (the address of the finalizer code) or has optional fields (pointer to the compacted
+    // fields).
+
+    enum Flags
+    {
+        // There are four kinds of EETypes, the three of them regular types that use the full EEType encoding
+        // plus a fourth kind used as a grab bag of unusual edge cases which are encoded in a smaller,
+        // simplified version of EEType. See LimitedEEType definition below.
+        EETypeKindMask = 0x0003,
+
+        // This flag is set when m_pRelatedType is in a different module.  In that case, m_pRelatedType
+        // actually points to a 'fake' EEType whose m_pRelatedType field lines up with an IAT slot in this
+        // module, which then points to the desired EEType.  In other words, there is an extra indirection
+        // through m_pRelatedType to get to the related type in the other module.
+        RelatedTypeViaIATFlag   = 0x0004,
+
+        IsDynamicTypeFlag       = 0x0008,
+
+        // This EEType represents a type which requires finalization
+        HasFinalizerFlag        = 0x0010,
+
+        // This type contain gc pointers
+        HasPointersFlag         = 0x0020,
+
+        // This type is generic and one or more of it's type parameters is co- or contra-variant. This only
+        // applies to interface and delegate types.
+        GenericVarianceFlag     = 0x0080,
+
+        // This type has optional fields present.
+        OptionalFieldsFlag      = 0x0100,
+
+        // Unused         = 0x0200,
+
+        // This type is generic.
+        IsGenericFlag           = 0x0400,
+
+        // We are storing a EETypeElementType in the upper bits for unboxing enums
+        ElementTypeMask      = 0xf800,
+        ElementTypeShift     = 11,
+    };
+
+public:
+
+    // These are flag values that are rarely set for types. If any of them are set then an optional field will
+    // be associated with the EEType to represent them.
+    enum RareFlags
+    {
+        // This type requires 8-byte alignment for its fields on certain platforms (only ARM currently).
+        RequiresAlign8Flag      = 0x00000001,
+
+        // Old unused flag
+        UNUSED1                 = 0x00000002,
+
+        // unused               = 0x00000004,
+
+        // unused               = 0x00000008,
+
+        // unused               = 0x00000010,
+
+        // This EEType has a Class Constructor
+        HasCctorFlag            = 0x0000020,
+
+        // Old unused flag
+        UNUSED2                 = 0x00000040,
+
+        // This EEType was constructed from a universal canonical template, and has
+        // its own dynamically created DispatchMap (does not use the DispatchMap of its template type)
+        HasDynamicallyAllocatedDispatchMapFlag      = 0x00000080,
+
+        // This EEType represents a structure that is an HFA (only ARM currently)
+        IsHFAFlag                           = 0x00000100,
+
+        // This EEType has sealed vtable entries
+        HasSealedVTableEntriesFlag          = 0x00000200,
+
+        // This dynamically created type has gc statics
+        IsDynamicTypeWithGcStaticsFlag      = 0x00000400,
+
+        // This dynamically created type has non gc statics
+        IsDynamicTypeWithNonGcStaticsFlag   = 0x00000800,
+
+        // This dynamically created type has thread statics
+        IsDynamicTypeWithThreadStaticsFlag  = 0x00001000,
+
+        // This EEType was constructed from a module where the open type is defined in
+        // a dynamically loaded type
+        HasDynamicModuleFlag                = 0x00002000,
+
+        // This EEType is for an abstract (but non-interface) type
+        IsAbstractClassFlag                 = 0x00004000,  
+
+        // This EEType is for a Byref-like class (TypedReference, Span&lt;T&gt;,...)
+        IsByRefLikeFlag                     = 0x00008000,
+    };
+
+public:
+
+    enum Kinds
+    {
+        CanonicalEEType         = 0x0000,
+        ClonedEEType            = 0x0001,
+        ParameterizedEEType     = 0x0002,
+        GenericTypeDefEEType    = 0x0003,
+    };
+
+    UInt32 get_BaseSize()
+        { return m_uBaseSize; }
+
+    UInt16 get_ComponentSize()
+        { return m_usComponentSize; }
+
+    PTR_Code get_Slot(UInt16 slotNumber);
+
+    PTR_PTR_Code get_SlotPtr(UInt16 slotNumber);
+
+    Kinds get_Kind();
+
+    bool IsCloned()
+        { return get_Kind() == ClonedEEType; }
+
+    bool IsRelatedTypeViaIAT()
+        { return ((m_usFlags & (UInt16)RelatedTypeViaIATFlag) != 0); }
+
+    bool IsArray()
+    {
+        EETypeElementType elementType = GetElementType();
+        return elementType == ElementType_Array || elementType == ElementType_SzArray;
+    }
+
+    bool IsParameterizedType()
+        { return (get_Kind() == ParameterizedEEType); }
+
+    bool IsGenericTypeDefinition()
+        { return (get_Kind() == GenericTypeDefEEType); }
+
+    bool IsCanonical()
+        { return get_Kind() == CanonicalEEType; }
+
+    bool IsInterface()
+        { return GetElementType() == ElementType_Interface; }
+
+    EEType * get_CanonicalEEType();
+
+    EEType * get_RelatedParameterType();
+
+    // A parameterized type shape less than SZARRAY_BASE_SIZE indicates that this is not
+    // an array but some other parameterized type (see: ParameterizedTypeShapeConstants)
+    // For arrays, this number uniquely captures both Sz/Md array flavor and rank.
+    UInt32 get_ParameterizedTypeShape() { return m_uBaseSize; }
+
+    bool get_IsValueType()
+        { return GetElementType() < ElementType_Class; }
+
+    bool HasFinalizer()
+    {
+        return (m_usFlags & HasFinalizerFlag) != 0;
+    }
+
+    bool HasReferenceFields()
+    {
+        return (m_usFlags & HasPointersFlag) != 0;
+    }
+
+    bool HasOptionalFields()
+    {
+        return (m_usFlags & OptionalFieldsFlag) != 0;
+    }
+
+    bool IsEquivalentTo(EEType * pOtherEEType)
+    {
+        if (this == pOtherEEType)
+            return true;
+
+        EEType * pThisEEType = this;
+
+        if (pThisEEType->IsCloned())
+            pThisEEType = pThisEEType->get_CanonicalEEType();
+
+        if (pOtherEEType->IsCloned())
+            pOtherEEType = pOtherEEType->get_CanonicalEEType();
+
+        if (pThisEEType == pOtherEEType)
+            return true;
+
+        if (pThisEEType->IsParameterizedType() && pOtherEEType->IsParameterizedType())
+        {
+            return pThisEEType->get_RelatedParameterType()->IsEquivalentTo(pOtherEEType->get_RelatedParameterType()) &&
+                pThisEEType->get_ParameterizedTypeShape() == pOtherEEType->get_ParameterizedTypeShape();
+        }
+
+        return false;
+    }
+
+    // How many vtable slots are there?
+    UInt16 GetNumVtableSlots()
+        { return m_usNumVtableSlots; }
+
+    // How many entries are in the interface map after the vtable slots?
+    UInt16 GetNumInterfaces()
+        { return m_usNumInterfaces; }
+
+    // Does this class (or its base classes) implement any interfaces?
+    bool HasInterfaces()
+        { return GetNumInterfaces() != 0; }
+
+    bool IsGeneric()
+        { return (m_usFlags & IsGenericFlag) != 0; }
+
+    DynamicModule* get_DynamicModule();
+
+    TypeManagerHandle* GetTypeManagerPtr();
+
+    // Used only by GC initialization, this initializes the EEType used to mark free entries in the GC heap.
+    // It should be an array type with a component size of one (so the GC can easily size it as appropriate)
+    // and should be marked as not containing any references. The rest of the fields don't matter: the GC does
+    // not query them and the rest of the runtime will never hold a reference to free object.
+    inline void InitializeAsGcFreeType();
+
+#ifdef DACCESS_COMPILE
+    bool DacVerify();
+    static bool DacVerifyWorker(EEType* pThis);
+#endif // DACCESS_COMPILE
+
+    // Mark or determine that a type is generic and one or more of it's type parameters is co- or
+    // contra-variant. This only applies to interface and delegate types.
+    bool HasGenericVariance()
+        { return (m_usFlags & GenericVarianceFlag) != 0; }
+
+    EETypeElementType GetElementType()
+        { return (EETypeElementType)((m_usFlags & ElementTypeMask) >> ElementTypeShift); }
+
+    // Determine whether a type requires 8-byte alignment for its fields (required only on certain platforms,
+    // only ARM so far).
+    bool RequiresAlign8()
+        { return (get_RareFlags() & RequiresAlign8Flag) != 0; }
+
+    // Determine whether a type is an instantiation of Nullable<T>.
+    bool IsNullable()
+        { return GetElementType() == ElementType_Nullable; }
+
+    // Determine whether a type was created by dynamic type loader
+    bool IsDynamicType()
+        { return (m_usFlags & IsDynamicTypeFlag) != 0; }
+
+    UInt32 GetHashCode();
+
+    // Retrieve optional fields associated with this EEType. May be NULL if no such fields exist.
+    inline PTR_OptionalFields get_OptionalFields();
+
+    // Get flags that are less commonly set on EETypes.
+    inline UInt32 get_RareFlags();
+
+    // Helper methods that deal with EEType topology (size and field layout). These are useful since as we
+    // optimize for pay-for-play we increasingly want to customize exactly what goes into an EEType on a
+    // per-type basis. The rules that govern this can be both complex and volatile and we risk sprinkling
+    // various layout rules through the binder and runtime that obscure the basic meaning of the code and are
+    // brittle: easy to overlook when one of the rules changes.
+    //
+    // The following methods can in some cases have fairly complex argument lists of their own and in that way
+    // they expose more of the implementation details than we'd ideally like. But regardless they still serve
+    // an arguably more useful purpose: they identify all the places that rely on the EEType layout. As we
+    // change layout rules we might have to change the arguments to the methods below but in doing so we will
+    // instantly identify all the other parts of the binder and runtime that need to be updated.
+
+    // Calculate the offset of a field of the EEType that has a variable offset.
+    inline UInt32 GetFieldOffset(EETypeField eField);
+
+    // Validate an EEType extracted from an object.
+    bool Validate(bool assertOnFail = true);
+};
+
+#pragma warning(pop)
+
+#include "OptionalFields.h"
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/eetype.inl b/src/coreclr/src/nativeaot/Runtime/inc/eetype.inl
new file mode 100644
index 0000000000000..c3549c0d8caef
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/eetype.inl
@@ -0,0 +1,272 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef __eetype_inl__
+#define __eetype_inl__
+//-----------------------------------------------------------------------------------------------------------
+inline UInt32 EEType::GetHashCode()
+{
+    return m_uHashCode;
+}
+
+//-----------------------------------------------------------------------------------------------------------
+inline PTR_Code EEType::get_Slot(UInt16 slotNumber)
+{
+    ASSERT(slotNumber < m_usNumVtableSlots);
+    return *get_SlotPtr(slotNumber);
+}
+
+//-----------------------------------------------------------------------------------------------------------
+inline PTR_PTR_Code EEType::get_SlotPtr(UInt16 slotNumber)
+{
+    ASSERT(slotNumber < m_usNumVtableSlots);
+    return dac_cast<PTR_PTR_Code>(dac_cast<TADDR>(this) + offsetof(EEType, m_VTable)) + slotNumber;
+}
+
+#ifdef DACCESS_COMPILE
+inline bool EEType::DacVerify()
+{
+    // Use a separate static worker because the worker validates
+    // the whole chain of EETypes and we don't want to accidentally
+    // answer questions from 'this' that should have come from the
+    // 'current' EEType.
+    return DacVerifyWorker(this);
+}
+// static
+inline bool EEType::DacVerifyWorker(EEType* pThis)  
+{
+    //*********************************************************************
+    //**** ASSUMES MAX TYPE HIERARCHY DEPTH OF 1024 TYPES              ****
+    //*********************************************************************
+    const int MAX_SANE_RELATED_TYPES = 1024;
+    //*********************************************************************
+    //**** ASSUMES MAX OF 200 INTERFACES IMPLEMENTED ON ANY GIVEN TYPE ****
+    //*********************************************************************
+    const int MAX_SANE_NUM_INSTANCES = 200;
+
+
+    PTR_EEType pCurrentType = dac_cast<PTR_EEType>(pThis);
+    for (int i = 0; i < MAX_SANE_RELATED_TYPES; i++)
+    {
+        // Verify interface map
+        if (pCurrentType->GetNumInterfaces() > MAX_SANE_NUM_INSTANCES)
+            return false;
+
+        // Validate the current type
+        if (!pCurrentType->Validate(false))
+            return false;
+
+        //
+        // Now on to the next type in the hierarchy.
+        //
+
+        if (pCurrentType->IsRelatedTypeViaIAT())
+            pCurrentType = *dac_cast<PTR_PTR_EEType>(reinterpret_cast<TADDR>(pCurrentType->m_RelatedType.m_ppBaseTypeViaIAT));
+        else
+            pCurrentType = dac_cast<PTR_EEType>(reinterpret_cast<TADDR>(pCurrentType->m_RelatedType.m_pBaseType));
+
+        if (pCurrentType == NULL)
+            break;
+    }
+    
+    if (pCurrentType != NULL)
+        return false;   // assume we found an infinite loop
+
+    return true;
+}
+#endif
+
+#if !defined(DACCESS_COMPILE)
+inline PTR_UInt8 FollowRelativePointer(const Int32* pDist)
+{
+    Int32 dist = *pDist;
+
+    PTR_UInt8 result = (PTR_UInt8)pDist + dist;
+
+    return result;
+}
+
+// Retrieve optional fields associated with this EEType. May be NULL if no such fields exist.
+inline PTR_OptionalFields EEType::get_OptionalFields()
+{
+    if ((m_usFlags & OptionalFieldsFlag) == 0)
+        return NULL;
+
+    UInt32 cbOptionalFieldsOffset = GetFieldOffset(ETF_OptionalFieldsPtr);
+
+#if !defined(USE_PORTABLE_HELPERS)
+    if (!IsDynamicType())
+    {
+        return (OptionalFields*)FollowRelativePointer((Int32*)((UInt8*)this + cbOptionalFieldsOffset));
+    }
+    else
+#endif
+    {
+        return *(OptionalFields**)((UInt8*)this + cbOptionalFieldsOffset);
+    }
+}
+
+// Get flags that are less commonly set on EETypes.
+inline UInt32 EEType::get_RareFlags()
+{
+    OptionalFields * pOptFields = get_OptionalFields();
+
+    // If there are no optional fields then none of the rare flags have been set.
+    if (!pOptFields)
+        return 0;
+
+    // Get the flags from the optional fields. The default is zero if that particular field was not included.
+    return pOptFields->GetRareFlags(0);
+}
+
+inline TypeManagerHandle* EEType::GetTypeManagerPtr()
+{
+    UInt32 cbOffset = GetFieldOffset(ETF_TypeManagerIndirection);
+
+#if !defined(USE_PORTABLE_HELPERS)
+    if (!IsDynamicType())
+    {
+        return (TypeManagerHandle*)FollowRelativePointer((Int32*)((UInt8*)this + cbOffset));
+    }
+    else
+#endif
+    {
+        return *(TypeManagerHandle**)((UInt8*)this + cbOffset);
+    }
+}
+#endif // !defined(DACCESS_COMPILE)
+
+// Calculate the offset of a field of the EEType that has a variable offset.
+__forceinline UInt32 EEType::GetFieldOffset(EETypeField eField)
+{
+    // First part of EEType consists of the fixed portion followed by the vtable.
+    UInt32 cbOffset = offsetof(EEType, m_VTable) + (sizeof(UIntTarget) * m_usNumVtableSlots);
+
+    // Then we have the interface map.
+    if (eField == ETF_InterfaceMap)
+    {
+        ASSERT(GetNumInterfaces() > 0);
+        return cbOffset;
+    }
+    cbOffset += sizeof(EEInterfaceInfo) * GetNumInterfaces();
+
+    const UInt32 relativeOrFullPointerOffset =
+#if USE_PORTABLE_HELPERS
+        sizeof(UIntTarget);
+#else
+        IsDynamicType() ? sizeof(UIntTarget) : sizeof(UInt32);
+#endif
+
+    // Followed by the type manager indirection cell.
+    if (eField == ETF_TypeManagerIndirection)
+    {
+        return cbOffset;
+    }
+    cbOffset += relativeOrFullPointerOffset;
+
+#if SUPPORTS_WRITABLE_DATA
+    // Followed by writable data.
+    if (eField == ETF_WritableData)
+    {
+        return cbOffset;
+    }
+    cbOffset += relativeOrFullPointerOffset;
+#endif
+
+    // Followed by the pointer to the finalizer method.
+    if (eField == ETF_Finalizer)
+    {
+        ASSERT(HasFinalizer());
+        return cbOffset;
+    }
+    if (HasFinalizer())
+        cbOffset += relativeOrFullPointerOffset;
+
+    // Followed by the pointer to the optional fields.
+    if (eField == ETF_OptionalFieldsPtr)
+    {
+        ASSERT(HasOptionalFields());
+        return cbOffset;
+    }
+    if (HasOptionalFields())
+        cbOffset += relativeOrFullPointerOffset;
+
+    // Followed by the pointer to the sealed virtual slots
+    if (eField == ETF_SealedVirtualSlots)
+        return cbOffset;
+
+    UInt32 rareFlags = get_RareFlags();
+
+    // in the case of sealed vtable entries on static types, we have a UInt sized relative pointer
+    if (rareFlags & HasSealedVTableEntriesFlag)
+        cbOffset += relativeOrFullPointerOffset;
+
+    if (eField == ETF_DynamicDispatchMap)
+    {
+        ASSERT(IsDynamicType());
+        return cbOffset;
+    }
+    if ((rareFlags & HasDynamicallyAllocatedDispatchMapFlag) != 0)
+        cbOffset += sizeof(UIntTarget);
+
+    if (eField == ETF_GenericDefinition)
+    {
+        ASSERT(IsGeneric());
+        return cbOffset;
+    }
+    if (IsGeneric())
+        cbOffset += relativeOrFullPointerOffset;
+
+    if (eField == ETF_GenericComposition)
+    {
+        ASSERT(IsGeneric());
+        return cbOffset;
+    }
+    if (IsGeneric())
+        cbOffset += relativeOrFullPointerOffset;
+
+    if (eField == ETF_DynamicModule)
+    {
+        ASSERT((rareFlags & HasDynamicModuleFlag) != 0);
+        return cbOffset;
+    }
+
+    if ((rareFlags & HasDynamicModuleFlag) != 0)
+        cbOffset += sizeof(UIntTarget);
+
+    if (eField == ETF_DynamicTemplateType)
+    {
+        ASSERT(IsDynamicType());
+        return cbOffset;
+    }
+    if (IsDynamicType())
+        cbOffset += sizeof(UIntTarget);
+
+    if (eField == ETF_DynamicGcStatics)
+    {
+        ASSERT((rareFlags & IsDynamicTypeWithGcStaticsFlag) != 0);
+        return cbOffset;
+    }
+    if ((rareFlags & IsDynamicTypeWithGcStaticsFlag) != 0)
+        cbOffset += sizeof(UIntTarget);
+
+    if (eField == ETF_DynamicNonGcStatics)
+    {
+        ASSERT((rareFlags & IsDynamicTypeWithNonGcStaticsFlag) != 0);
+        return cbOffset;
+    }
+    if ((rareFlags & IsDynamicTypeWithNonGcStaticsFlag) != 0)
+        cbOffset += sizeof(UIntTarget);
+
+    if (eField == ETF_DynamicThreadStaticOffset)
+    {
+        ASSERT((rareFlags & IsDynamicTypeWithThreadStaticsFlag) != 0);
+        return cbOffset;
+    }
+    if ((rareFlags & IsDynamicTypeWithThreadStaticsFlag) != 0)
+        cbOffset += sizeof(UInt32);
+
+    ASSERT(!"Unknown EEType field type");
+    return 0;
+}
+#endif // __eetype_inl__
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/gcinfo.h b/src/coreclr/src/nativeaot/Runtime/inc/gcinfo.h
new file mode 100644
index 0000000000000..25f4d1c4458f5
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/gcinfo.h
@@ -0,0 +1,1588 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/*****************************************************************************/
+#ifndef _GCINFO_H_
+#define _GCINFO_H_
+/*****************************************************************************/
+
+// Keep definitions in this file in sync with Nutc\UTC\gcinfo.h
+
+#ifdef TARGET_ARM
+
+#define NUM_PRESERVED_REGS 9
+
+enum RegMask
+{
+    RBM_R0  = 0x0001,
+    RBM_R1  = 0x0002,
+    RBM_R2  = 0x0004,
+    RBM_R3  = 0x0008,
+    RBM_R4  = 0x0010,   // callee saved
+    RBM_R5  = 0x0020,   // callee saved
+    RBM_R6  = 0x0040,   // callee saved
+    RBM_R7  = 0x0080,   // callee saved
+    RBM_R8  = 0x0100,   // callee saved
+    RBM_R9  = 0x0200,   // callee saved
+    RBM_R10 = 0x0400,   // callee saved
+    RBM_R11 = 0x0800,   // callee saved
+    RBM_R12 = 0x1000,
+    RBM_SP  = 0x2000,
+    RBM_LR  = 0x4000,   // callee saved, but not valid to be alive across a call!
+    RBM_PC  = 0x8000,
+    RBM_RETVAL = RBM_R0,
+    RBM_CALLEE_SAVED_REGS = (RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_LR),
+    RBM_CALLEE_SAVED_REG_COUNT = 9,
+    // Special case: LR is callee saved, but may not appear as a live GC ref except 
+    // in the leaf frame because calls will trash it.  Therefore, we ALSO consider 
+    // it a scratch register.
+    RBM_SCRATCH_REGS = (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R12|RBM_LR),
+    RBM_SCRATCH_REG_COUNT = 6,
+};
+
+enum RegNumber
+{
+    RN_R0   = 0,
+    RN_R1   = 1,
+    RN_R2   = 2,
+    RN_R3   = 3,
+    RN_R4   = 4,
+    RN_R5   = 5,
+    RN_R6   = 6,
+    RN_R7   = 7,
+    RN_R8   = 8,
+    RN_R9   = 9,
+    RN_R10  = 10,
+    RN_R11  = 11,
+    RN_R12  = 12,
+    RN_SP   = 13,
+    RN_LR   = 14,
+    RN_PC   = 15,
+
+    RN_NONE = 16,
+};
+
+enum CalleeSavedRegNum
+{
+    CSR_NUM_R4  = 0x00,
+    CSR_NUM_R5  = 0x01,
+    CSR_NUM_R6  = 0x02,
+    CSR_NUM_R7  = 0x03,
+    CSR_NUM_R8  = 0x04,
+    CSR_NUM_R9  = 0x05,
+    CSR_NUM_R10 = 0x06,
+    CSR_NUM_R11 = 0x07,
+    // NOTE: LR is omitted because it may not be live except as a 'scratch' reg
+};
+
+enum CalleeSavedRegMask
+{
+    CSR_MASK_NONE = 0x00,
+    CSR_MASK_R4   = 0x001,
+    CSR_MASK_R5   = 0x002,
+    CSR_MASK_R6   = 0x004,
+    CSR_MASK_R7   = 0x008,
+    CSR_MASK_R8   = 0x010,
+    CSR_MASK_R9   = 0x020,
+    CSR_MASK_R10  = 0x040,
+    CSR_MASK_R11  = 0x080,
+    CSR_MASK_LR   = 0x100,
+
+    CSR_MASK_ALL  = 0x1ff,
+    CSR_MASK_HIGHEST = 0x100,
+};
+
+enum ScratchRegNum
+{
+    SR_NUM_R0   = 0x00,
+    SR_NUM_R1   = 0x01,
+    SR_NUM_R2   = 0x02,
+    SR_NUM_R3   = 0x03,
+    SR_NUM_R12  = 0x04,
+    SR_NUM_LR   = 0x05,
+};
+
+enum ScratchRegMask
+{
+    SR_MASK_NONE = 0x00,
+    SR_MASK_R0   = 0x01,
+    SR_MASK_R1   = 0x02,
+    SR_MASK_R2   = 0x04,
+    SR_MASK_R3   = 0x08,
+    SR_MASK_R12  = 0x10,
+    SR_MASK_LR   = 0x20,
+};
+
+#elif defined(TARGET_ARM64)
+
+enum RegMask
+{
+    RBM_NONE = 0,
+
+    RBM_X0 = 0x00000001,
+    RBM_X1 = 0x00000002,
+    RBM_X2 = 0x00000004,
+    RBM_X3 = 0x00000008,
+    RBM_X4 = 0x00000010,
+    RBM_X5 = 0x00000020,
+    RBM_X6 = 0x00000040,
+    RBM_X7 = 0x00000080,
+    RBM_X8 = 0x00000100, // ARM64 ABI: indirect result register
+    RBM_X9 = 0x00000200,
+    RBM_X10 = 0x00000400,
+    RBM_X11 = 0x00000800,
+    RBM_X12 = 0x00001000,
+    RBM_X13 = 0x00002000,
+    RBM_X14 = 0x00004000,
+    RBM_X15 = 0x00008000,
+
+    RBM_XIP0 = 0x00010000, // This one is occasionally used as a scratch register (but can be destroyed by branching or a call)
+    RBM_XIP1 = 0x00020000, // This one may be also used as a scratch register (but can be destroyed by branching or a call)
+    RBM_XPR = 0x00040000,
+
+    RBM_X19 = 0x00080000, // RA_CALLEESAVE
+    RBM_X20 = 0x00100000, // RA_CALLEESAVE
+    RBM_X21 = 0x00200000, // RA_CALLEESAVE
+    RBM_X22 = 0x00400000, // RA_CALLEESAVE
+    RBM_X23 = 0x00800000, // RA_CALLEESAVE
+    RBM_X24 = 0x01000000, // RA_CALLEESAVE
+    RBM_X25 = 0x02000000, // RA_CALLEESAVE
+    RBM_X26 = 0x04000000, // RA_CALLEESAVE
+    RBM_X27 = 0x08000000, // RA_CALLEESAVE
+    RBM_X28 = 0x10000000, // RA_CALLEESAVE
+
+    RBM_FP = 0x20000000,
+    RBM_LR = 0x40000000,
+    RBM_SP = 0x80000000,
+
+    RBM_RETVAL = RBM_X8,
+    // Note: Callee saved regs: X19-X28; FP and LR are treated as callee-saved in unwinding code
+    RBM_CALLEE_SAVED_REG_COUNT = 12,
+
+    // Scratch regs: X0-X15, XIP0, XIP1, LR
+    RBM_SCRATCH_REG_COUNT = 19,
+};
+
+#define NUM_PRESERVED_REGS RBM_CALLEE_SAVED_REG_COUNT
+
+// Number of the callee-saved registers stored in the fixed header
+#define NUM_PRESERVED_REGS_LOW 9
+#define MASK_PRESERVED_REGS_LOW ((1 << NUM_PRESERVED_REGS_LOW) - 1)
+
+enum RegNumber
+{
+    RN_X0 = 0,
+    RN_X1 = 1,
+    RN_X2 = 2,
+    RN_X3 = 3,
+    RN_X4 = 4,
+    RN_X5 = 5,
+    RN_X6 = 6,
+    RN_X7 = 7,
+    RN_X8 = 8, // indirect result register
+    RN_X9 = 9,
+    RN_X10 = 10,
+    RN_X11 = 11,
+    RN_X12 = 12,
+    RN_X13 = 13,
+    RN_X14 = 14,
+    RN_X15 = 15,
+
+    RN_XIP0 = 16,
+    RN_XIP1 = 17,
+    RN_XPR = 18,
+
+    RN_X19 = 19, // RA_CALLEESAVE
+    RN_X20 = 20, // RA_CALLEESAVE
+    RN_X21 = 21, // RA_CALLEESAVE
+    RN_X22 = 22, // RA_CALLEESAVE
+    RN_X23 = 23, // RA_CALLEESAVE
+    RN_X24 = 24, // RA_CALLEESAVE
+    RN_X25 = 25, // RA_CALLEESAVE
+    RN_X26 = 26, // RA_CALLEESAVE
+    RN_X27 = 27, // RA_CALLEESAVE
+    RN_X28 = 28, // RA_CALLEESAVE
+
+    RN_FP = 29,
+    RN_LR = 30,
+    RN_SP = 31,
+
+    RN_NONE = 32,
+};
+
+enum CalleeSavedRegNum
+{
+    // NOTE: LR is omitted because it may not be live except as a 'scratch' reg
+    CSR_NUM_X19 = 1,
+    CSR_NUM_X20 = 2,
+    CSR_NUM_X21 = 3,
+    CSR_NUM_X22 = 4,
+    CSR_NUM_X23 = 5,
+    CSR_NUM_X24 = 6,
+    CSR_NUM_X25 = 7,
+    CSR_NUM_X26 = 8,
+    CSR_NUM_X27 = 9,
+    CSR_NUM_X28 = 10,
+    CSR_NUM_FP = 11,
+    CSR_NUM_NONE = 12,
+};
+
+enum CalleeSavedRegMask
+{
+    CSR_MASK_NONE = 0x00,
+    // LR is placed here to reduce the frequency of the long encoding
+    CSR_MASK_LR = 0x001,
+    CSR_MASK_X19 = 0x002,
+    CSR_MASK_X20 = 0x004,
+    CSR_MASK_X21 = 0x008,
+    CSR_MASK_X22 = 0x010,
+    CSR_MASK_X23 = 0x020,
+    CSR_MASK_X24 = 0x040,
+    CSR_MASK_X25 = 0x080,
+    CSR_MASK_X26 = 0x100,
+    CSR_MASK_X27 = 0x200,
+    CSR_MASK_X28 = 0x400,
+    CSR_MASK_FP = 0x800,
+
+    CSR_MASK_ALL = 0xfff,
+    CSR_MASK_HIGHEST = 0x800,
+};
+
+enum ScratchRegNum
+{
+    SR_NUM_X0 = 0,
+    SR_NUM_X1 = 1,
+    SR_NUM_X2 = 2,
+    SR_NUM_X3 = 3,
+    SR_NUM_X4 = 4,
+    SR_NUM_X5 = 5,
+    SR_NUM_X6 = 6,
+    SR_NUM_X7 = 7,
+    SR_NUM_X8 = 8,
+    SR_NUM_X9 = 9,
+    SR_NUM_X10 = 10,
+    SR_NUM_X11 = 11,
+    SR_NUM_X12 = 12,
+    SR_NUM_X13 = 13,
+    SR_NUM_X14 = 14,
+    SR_NUM_X15 = 15,
+
+    SR_NUM_XIP0 = 16,
+    SR_NUM_XIP1 = 17,
+    SR_NUM_LR = 18,
+
+    SR_NUM_NONE = 19,
+};
+
+enum ScratchRegMask
+{
+    SR_MASK_NONE = 0x00,
+    SR_MASK_X0 = 0x01,
+    SR_MASK_X1 = 0x02,
+    SR_MASK_X2 = 0x04,
+    SR_MASK_X3 = 0x08,
+    SR_MASK_X4 = 0x10,
+    SR_MASK_X5 = 0x20,
+    SR_MASK_X6 = 0x40,
+    SR_MASK_X7 = 0x80,
+    SR_MASK_X8 = 0x100,
+    SR_MASK_X9 = 0x200,
+    SR_MASK_X10 = 0x400,
+    SR_MASK_X11 = 0x800,
+    SR_MASK_X12 = 0x1000,
+    SR_MASK_X13 = 0x2000,
+    SR_MASK_X14 = 0x4000,
+    SR_MASK_X15 = 0x8000,
+
+    SR_MASK_XIP0 = 0x10000,
+    SR_MASK_XIP1 = 0x20000,
+    SR_MASK_LR = 0x40000,
+};
+
+#else // TARGET_ARM
+
+#ifdef TARGET_AMD64
+#define NUM_PRESERVED_REGS 8
+#else
+#define NUM_PRESERVED_REGS 4
+#endif
+
+enum RegMask
+{
+    RBM_EAX = 0x0001,
+    RBM_ECX = 0x0002,
+    RBM_EDX = 0x0004,
+    RBM_EBX = 0x0008,   // callee saved
+    RBM_ESP = 0x0010,
+    RBM_EBP = 0x0020,   // callee saved
+    RBM_ESI = 0x0040,   // callee saved
+    RBM_EDI = 0x0080,   // callee saved
+
+    RBM_R8  = 0x0100,
+    RBM_R9  = 0x0200,
+    RBM_R10 = 0x0400,
+    RBM_R11 = 0x0800,
+    RBM_R12 = 0x1000,   // callee saved
+    RBM_R13 = 0x2000,   // callee saved
+    RBM_R14 = 0x4000,   // callee saved
+    RBM_R15 = 0x8000,   // callee saved
+
+    RBM_RETVAL = RBM_EAX,
+
+#ifdef TARGET_AMD64
+    RBM_CALLEE_SAVED_REGS = (RBM_EDI|RBM_ESI|RBM_EBX|RBM_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15),
+    RBM_CALLEE_SAVED_REG_COUNT = 8,
+    RBM_SCRATCH_REGS = (RBM_EAX|RBM_ECX|RBM_EDX|RBM_R8|RBM_R9|RBM_R10|RBM_R11),
+    RBM_SCRATCH_REG_COUNT = 7,
+#else
+    RBM_CALLEE_SAVED_REGS = (RBM_EDI|RBM_ESI|RBM_EBX|RBM_EBP),
+    RBM_CALLEE_SAVED_REG_COUNT = 4,
+    RBM_SCRATCH_REGS = (RBM_EAX|RBM_ECX|RBM_EDX),
+    RBM_SCRATCH_REG_COUNT = 3,
+#endif // TARGET_AMD64
+};
+
+enum RegNumber
+{
+    RN_EAX = 0,
+    RN_ECX = 1,
+    RN_EDX = 2,
+    RN_EBX = 3,
+    RN_ESP = 4,
+    RN_EBP = 5,
+    RN_ESI = 6,
+    RN_EDI = 7,
+    RN_R8  = 8,
+    RN_R9  = 9,
+    RN_R10 = 10,
+    RN_R11 = 11,
+    RN_R12 = 12,
+    RN_R13 = 13,
+    RN_R14 = 14,
+    RN_R15 = 15,
+
+    RN_NONE = 16,
+};
+
+enum CalleeSavedRegNum
+{
+    CSR_NUM_RBX = 0x00,
+    CSR_NUM_RSI = 0x01,
+    CSR_NUM_RDI = 0x02,
+    CSR_NUM_RBP = 0x03,
+#ifdef TARGET_AMD64
+    CSR_NUM_R12 = 0x04,
+    CSR_NUM_R13 = 0x05,
+    CSR_NUM_R14 = 0x06,
+    CSR_NUM_R15 = 0x07,
+#endif // TARGET_AMD64
+};
+
+enum CalleeSavedRegMask
+{
+    CSR_MASK_NONE = 0x00,
+    CSR_MASK_RBX = 0x01,
+    CSR_MASK_RSI = 0x02,
+    CSR_MASK_RDI = 0x04,
+    CSR_MASK_RBP = 0x08,
+    CSR_MASK_R12 = 0x10,
+    CSR_MASK_R13 = 0x20,
+    CSR_MASK_R14 = 0x40,
+    CSR_MASK_R15 = 0x80,
+
+#ifdef TARGET_AMD64
+    CSR_MASK_ALL = 0xFF,
+    CSR_MASK_HIGHEST = 0x80,
+#else
+    CSR_MASK_ALL = 0x0F,
+    CSR_MASK_HIGHEST = 0x08,
+#endif
+};
+
+enum ScratchRegNum
+{
+    SR_NUM_RAX = 0x00,
+    SR_NUM_RCX = 0x01,
+    SR_NUM_RDX = 0x02,
+#ifdef TARGET_AMD64
+    SR_NUM_R8  = 0x03,
+    SR_NUM_R9  = 0x04,
+    SR_NUM_R10 = 0x05,
+    SR_NUM_R11 = 0x06,
+#endif // TARGET_AMD64
+};
+
+enum ScratchRegMask
+{
+    SR_MASK_NONE = 0x00,
+    SR_MASK_RAX  = 0x01,
+    SR_MASK_RCX  = 0x02,
+    SR_MASK_RDX  = 0x04,
+    SR_MASK_R8   = 0x08,
+    SR_MASK_R9   = 0x10,
+    SR_MASK_R10  = 0x20,
+    SR_MASK_R11  = 0x40,
+};
+
+#endif // TARGET_ARM
+
+struct GCInfoHeader
+{
+private:
+    UInt16  prologSize               : 6; // 0 [0:5]  // @TODO: define an 'overflow' encoding for big prologs?
+    UInt16  hasFunclets              : 1; // 0 [6]
+    UInt16  fixedEpilogSize          : 6; // 0 [7] + 1 [0:4]  '0' encoding implies that epilog size varies and is encoded for each epilog
+    UInt16  epilogCountSmall         : 2; // 1 [5:6] '3' encoding implies the number of epilogs is encoded separately
+    UInt16  hasExtraData             : 1; // 1 [7]  1: more data follows (dynamic alignment, GS cookie, common vars, etc.)
+
+#ifdef TARGET_ARM
+    UInt16  returnKind              : 2; // 2 [0:1] one of: MethodReturnKind enum
+    UInt16  ebpFrame                : 1; // 2 [2]   on x64, this means "has frame pointer and it is RBP", on ARM R7
+    UInt16  epilogAtEnd             : 1; // 2 [3]
+    UInt16  hasFrameSize            : 1; // 2 [4]   1: frame size is encoded below, 0: frame size is 0
+    UInt16 calleeSavedRegMask       : NUM_PRESERVED_REGS;   // 2 [5:7]    3 [0:5]
+    UInt16 arm_areParmOrVfpRegsPushed:1; // 3 [6]   1: pushed param reg set (R0-R3) and pushed fp reg start and count are encoded below, 0: no pushed param or fp registers
+#elif defined (TARGET_ARM64)
+    UInt16  returnKind              : 2; // 2 [0:1] one of: MethodReturnKind enum
+    UInt16  ebpFrame                : 1; // 2 [2]   1: has frame pointer and it is FP
+    UInt16  epilogAtEnd             : 1; // 2 [3]
+    UInt16  hasFrameSize            : 1; // 2 [4]   1: frame size is encoded below, 0: frame size is 0
+    UInt16  arm64_longCsrMask            : 1; // 2 [5]  1: high bits of calleeSavedRegMask are encoded below
+    UInt16  arm64_areParmOrVfpRegsPushed : 1; // 2 [6]  1: pushed param reg count (X0-X7) and pushed fp reg set (D8-D15) are encoded below, 0: no pushed param or fp registers
+    UInt16  arm64_calleeSavedRegMaskLow  : NUM_PRESERVED_REGS_LOW;  // 2 [7]    3 [0:7]
+#else
+    UInt8  returnKind               : 2; // 2 [0:1] one of: MethodReturnKind enum
+    UInt8  ebpFrame                 : 1; // 2 [2]   on x64, this means "has frame pointer and it is RBP", on ARM R7
+    UInt8  epilogAtEnd              : 1; // 2 [3]
+#ifdef TARGET_AMD64
+    UInt8  hasFrameSize             : 1; // 2 [4]   1: frame size is encoded below, 0: frame size is 0
+    UInt8  x64_framePtrOffsetSmall  : 2; // 2 [5:6] 00: framePtrOffset = 0x20
+                                         //         01: framePtrOffset = 0x30
+                                         //         10: framePtrOffset = 0x40
+                                         //         11: a variable-length integer 'x64_frameOffset' follows.
+    UInt8  x64_hasSavedXmmRegs      : 1; // 2 [7]   any saved xmm registers?
+#endif
+                                                            // X86        X64
+    UInt8  calleeSavedRegMask       : NUM_PRESERVED_REGS;   // 2 [4:7]    3 [0:7]
+
+#ifdef TARGET_X86
+    UInt8  x86_argCountLow          : 5; // 3 [0-4]  expressed in pointer-sized units    // @TODO: steal more bits here?
+    UInt8  x86_argCountIsLarge      : 1; // 3 [5]    if this bit is set, then the high 8 bits are encoded in x86_argCountHigh
+    UInt8  x86_hasStackChanges      : 1; // 3 [6]    x86-only, !ebpFrame-only, this method has pushes 
+                                         //          and pops in it, and a string follows this header
+                                         //          which describes them
+    UInt8  hasFrameSize             : 1; // 3 [7]    1: frame size is encoded below, 0: frame size is 0
+#endif
+#endif
+
+    //
+    // OPTIONAL FIELDS FOLLOW
+    //
+    // The following values are encoded with variable-length integers on disk, but are decoded into these 
+    // fields in memory.
+    //
+
+    // For ARM and ARM64 this field stores the offset of the callee-saved area relative to FP/SP
+    UInt32  frameSize;                   // expressed in pointer-sized units, only encoded if hasFrameSize==1
+    // OPTIONAL: only encoded if returnKind = MRK_ReturnsToNative
+    UInt32  reversePinvokeFrameOffset;   // expressed in pointer-sized units away from the frame pointer
+
+#ifdef TARGET_AMD64
+    // OPTIONAL: only encoded if x64_framePtrOffsetSmall = 11
+    //
+    // ENCODING NOTE: In the encoding, the variable-sized unsigned will be 7 less than the total number
+    // of 16-byte units that make up the frame pointer offset.  
+    //
+    // In memory, this value will always be set and will always be the total number of 16-byte units that make 
+    // up the frame pointer offset.
+    UInt8   x64_framePtrOffset;       // expressed in 16-byte unit
+
+    // OPTIONAL: only encoded using a variable-sized unsigned if x64_hasSavedXmmRegs is set.
+    //
+    // An additional optimization is possible because registers xmm0 .. xmm5 should never be saved,
+    // so they are not encoded in the variable-sized unsigned - instead the mask is shifted right 6 bits
+    // for encoding. Thus, any subset of registers xmm6 .. xmm12 can be represented using one byte
+    // - this covers the most frequent cases.
+    //
+    // The shift applies to decoding/encoding only though - the actual header field below uses the
+    // straightforward mapping where bit 0 corresponds to xmm0, bit 1 corresponds to xmm1 and so on.
+    //
+    UInt16  x64_savedXmmRegMask;      // which xmm regs were saved
+#elif defined(TARGET_X86)
+    // OPTIONAL: only encoded if x86_argCountIsLarge = 1
+    // NOTE: because we are using pointer-sized units, only 14 bits are required to represent the entire range
+    // that can be expressed by a 'ret NNNN' instruction.  Therefore, with 6 in the 'low' field and 8 in the
+    // 'high' field, we are not losing any range here.  (Although the need for that full range is debatable.)
+    UInt8   x86_argCountHigh; 
+#elif defined(TARGET_ARM)
+    // OPTIONAL: only encoded if arm_areParmOrVfpRegsPushed = 1
+    UInt8   arm_parmRegsPushedSet;
+    UInt8   arm_vfpRegFirstPushed;
+    UInt8   arm_vfpRegPushedCount;
+#elif defined(TARGET_ARM64)
+    // OPTIONAL: high bits of calleeSavedRegMask are encoded only if arm64_longCsrMask = 1; low bits equal to arm64_calleeSavedRegMaskLow
+    UInt16  calleeSavedRegMask;
+
+    // OPTIONAL: only encoded if arm64_areParmOrVfpRegsPushed = 1
+    UInt8   arm64_parmRegsPushedCount;  // how many of X0-X7 registers are saved
+    UInt8   arm64_vfpRegsPushedMask;    // which of D8-D15 registers are saved
+#endif
+
+    //
+    // OPTIONAL: only encoded if hasExtraData = 1
+    union
+    {
+        struct
+        {
+#if defined(TARGET_ARM64)
+            UInt8 FPLRAreOnTop      : 1;    // [0]      1: FP and LR are saved on top of locals, not at the bottom (see MdmSaveFPAndLRAtTopOfLocalsArea)
+            UInt8 reg1ReturnKind    : 2;    // [1:2]    One of MRK_Returns{Scalar|Object|Byref} constants describing value returned in x1 if any
+            UInt8 hasGSCookie       : 1;    // [3]      1: frame uses GS cookie
+            UInt8 hasCommonVars     : 1;    // [4]      1: method has a list of "common vars"
+                                            //          as an optimization for methods with many call sites and variables
+            UInt8                   : 3;    // [5:7]    unused bits
+#else
+            UInt8 logStackAlignment : 4;    // [0:3]    binary logarithm of frame alignment (3..15) or 0
+            UInt8 hasGSCookie       : 1;    // [4]      1: frame uses GS cookie
+            UInt8 hasCommonVars     : 1;    // [5]      1: method has a list of "common vars"
+                                            //          as an optimization for methods with many call sites and variables
+            UInt8                   : 2;    // [6:7]    unused bits
+#endif
+#pragma warning(suppress:4201) // nameless struct
+        };
+        UInt8 extraDataHeader;
+    };
+
+    // OPTIONAL: only encoded if logStackAlignment != 0
+    UInt8 paramPointerReg;
+
+    // OPTIONAL: only encoded if epilogCountSmall = 3
+    UInt16 epilogCount;
+
+    // OPTIONAL: only encoded if gsCookie = 1
+    UInt32 gsCookieOffset;      // expressed in pointer-sized units away from the frame pointer
+
+    //
+    // OPTIONAL: only encoded if hasFunclets = 1
+    //  {numFunclets}           // encoded as variable-length unsigned
+    //      {start-funclet0}    // offset from start of previous funclet, encoded as variable-length unsigned
+    //      {start-funclet1}    // 
+    //      {start-funclet2}
+    //       ...
+    //      {sizeof-funclet(N-1)}   // numFunclets == N  (i.e. there are N+1 sizes here)
+    //      -----------------
+    //      {GCInfoHeader-funclet0}  // encoded as normal, must not have 'hasFunclets' set.
+    //      {GCInfoHeader-funclet1}
+    //       ...
+    //      {GCInfoHeader-funclet(N-1)}
+
+    // WARNING: 
+    // WARNING: Do not add fields to the file-format after the funclet header encodings -- these are decoded
+    // WARNING: recursively and 'in-place' when looking for the info associated with a funclet.  Therefore, 
+    // WARNING: in that case, we cannot easily continue to decode things associated with the main body 
+    // WARNING: GCInfoHeader once we start this recursive decode.
+    // WARNING: 
+
+    // -------------------------------------------------------------------------------------------------------
+    // END of file-encoding-related-fields
+    // -------------------------------------------------------------------------------------------------------
+
+    // The following fields are not encoded in the file format, they are just used as convenience placeholders 
+    // for decode state.
+    UInt32 funcletOffset; // non-zero indicates that this GCInfoHeader is for a funclet
+
+public:
+    //
+    // CONSTANTS / STATIC STUFF
+    //
+
+    enum MethodReturnKind
+    {
+        MRK_ReturnsScalar   = 0,
+        MRK_ReturnsObject   = 1,
+        MRK_ReturnsByref    = 2,
+        MRK_ReturnsToNative = 3,
+
+#if defined(TARGET_ARM64)
+        // Cases for structs returned in two registers.
+        // Naming scheme: MRK_reg0Kind_reg1Kind.
+        // Encoding scheme: <two bits for reg1Kind> <two bits for reg0Kind>.
+        // We do not distinguish returning a scalar in reg1 and no return value in reg1,
+        // which means we can use MRK_ReturnsObject for MRK_Obj_Scalar, etc.
+        MRK_Scalar_Obj      = (MRK_ReturnsObject << 2) | MRK_ReturnsScalar,
+        MRK_Obj_Obj         = (MRK_ReturnsObject << 2) | MRK_ReturnsObject,
+        MRK_Byref_Obj       = (MRK_ReturnsObject << 2) | MRK_ReturnsByref,
+        MRK_Scalar_Byref    = (MRK_ReturnsByref  << 2) | MRK_ReturnsScalar,
+        MRK_Obj_Byref       = (MRK_ReturnsByref  << 2) | MRK_ReturnsObject,
+        MRK_Byref_Byref     = (MRK_ReturnsByref  << 2) | MRK_ReturnsByref,
+
+        MRK_LastValid       = MRK_Byref_Byref,
+        // Illegal or uninitialized value. Never written to the image.
+        MRK_Unknown         = 0xff,
+#else
+        MRK_LastValid       = MRK_ReturnsToNative,
+        // Illegal or uninitialized value. Never written to the image.
+        MRK_Unknown         = 4,
+#endif
+    };
+
+    enum EncodingConstants
+    {
+        EC_SizeOfFixedHeader = 4,
+        EC_MaxFrameByteSize                 = 10*1024*1024,
+        EC_MaxReversePInvokeFrameByteOffset = 10*1024*1024,
+        EC_MaxX64FramePtrByteOffset         = UInt16_MAX * 0x10,
+        EC_MaxEpilogCountSmall              = 3,
+        EC_MaxEpilogCount                   = 64*1024 - 1,
+    };
+
+    //
+    // MEMBER FUNCTIONS
+    //
+
+    void Init()
+    {
+        memset(this, 0, sizeof(GCInfoHeader));
+    }
+
+    //
+    // SETTERS
+    //
+
+    void SetPrologSize(UInt32 sizeInBytes)
+    {
+#if defined (TARGET_ARM64)
+        // For arm64 we encode multiples of 4, rather than raw bytes, since instructions are all same size.
+        ASSERT((sizeInBytes & 3) == 0);
+        prologSize = sizeInBytes >> 2;
+        ASSERT(prologSize == sizeInBytes >> 2);
+#else
+        prologSize = sizeInBytes;
+        ASSERT(prologSize == sizeInBytes);
+#endif
+    }
+
+    void SetHasFunclets(bool fHasFunclets)
+    {
+        hasFunclets = fHasFunclets ? 1 : 0;
+    }
+
+    void PokeFixedEpilogSize(UInt32 sizeInBytes)
+    {
+#if defined (TARGET_ARM64)
+        // For arm64 we encode multiples of 4, rather than raw bytes, since instructions are all same size.
+        ASSERT((sizeInBytes & 3) == 0);
+        fixedEpilogSize = sizeInBytes >> 2;
+        ASSERT(fixedEpilogSize == sizeInBytes >> 2);
+#else
+        fixedEpilogSize = sizeInBytes;
+        ASSERT(fixedEpilogSize == sizeInBytes);
+#endif
+    }
+
+    void SetFixedEpilogSize(UInt32 sizeInBytes, bool varyingSizes)
+    {
+        if (varyingSizes)
+            fixedEpilogSize = 0;
+        else
+        {
+            ASSERT(sizeInBytes != 0);
+#if defined (TARGET_ARM64)
+            // For arm64 we encode multiples of 4, rather than raw bytes, since instructions are all same size.
+            ASSERT((sizeInBytes & 3) == 0);
+            fixedEpilogSize = sizeInBytes >> 2;
+            ASSERT(fixedEpilogSize == sizeInBytes >> 2);
+#else
+            fixedEpilogSize = sizeInBytes;
+            ASSERT(fixedEpilogSize == sizeInBytes);
+#endif
+        }
+    }
+
+    void SetEpilogCount(UInt32 count, bool isAtEnd)
+    {
+        epilogCount = ToUInt16(count);
+        epilogAtEnd = isAtEnd ? 1 : 0;
+
+        ASSERT(epilogCount == count);
+        ASSERT((count == 1) || !isAtEnd);
+        epilogCountSmall = count < EC_MaxEpilogCountSmall ? count : EC_MaxEpilogCountSmall;
+    }
+
+#if !defined(TARGET_ARM64)
+    void SetReturnKind(MethodReturnKind kind)
+    {
+        ASSERT(kind <= MRK_ReturnsToNative); // not enough bits to encode 'unknown'
+        returnKind = kind;
+    }
+
+    void SetDynamicAlignment(UInt8 logByteAlignment)
+    {
+#ifdef TARGET_X86
+        ASSERT(logByteAlignment >= 3); // 4 byte aligned frames
+#else
+        ASSERT(logByteAlignment >= 4); // 8 byte aligned frames
+#endif
+
+        hasExtraData = 1;
+        logStackAlignment = logByteAlignment;
+        ASSERT(logStackAlignment == logByteAlignment);
+        paramPointerReg = RN_NONE;
+    }
+#endif // !defined(TARGET_ARM64)
+
+#if defined(TARGET_ARM64)
+    void SetFPLROnTop(void)
+    {
+        hasExtraData = 1;
+        FPLRAreOnTop = 1;
+    }
+#endif
+
+    void SetGSCookieOffset(UInt32 offsetInBytes)
+    {
+        ASSERT(offsetInBytes != 0);
+        ASSERT(0 == (offsetInBytes % POINTER_SIZE));
+        hasExtraData = 1;
+        hasGSCookie = 1;
+        gsCookieOffset = offsetInBytes / POINTER_SIZE;
+    }
+
+    void SetHasCommonVars()
+    {
+        hasExtraData = 1;
+        hasCommonVars = 1;
+    }
+
+    void SetParamPointer(RegNumber regNum, UInt32 offsetInBytes, bool isOffsetFromSP = false)
+    {
+        UNREFERENCED_PARAMETER(offsetInBytes);
+        UNREFERENCED_PARAMETER(isOffsetFromSP);
+        ASSERT(HasDynamicAlignment()); // only expected for dynamic aligned frames
+        ASSERT(offsetInBytes==0); // not yet supported
+
+        paramPointerReg = (UInt8)regNum;
+    }
+
+    void SetFramePointer(RegNumber regNum, UInt32 offsetInBytes, bool isOffsetFromSP = false)
+    {
+        UNREFERENCED_PARAMETER(offsetInBytes);
+        UNREFERENCED_PARAMETER(isOffsetFromSP);
+
+        if (regNum == RN_NONE)
+        {
+            ebpFrame = 0;
+        }
+        else
+        {
+#ifdef TARGET_ARM
+            ASSERT(regNum == RN_R7);
+#elif defined(TARGET_AMD64) || defined(TARGET_X86)
+            ASSERT(regNum == RN_EBP);
+#elif defined(TARGET_ARM64)
+            ASSERT(regNum == RN_FP);
+#else
+            ASSERT(!"NYI");
+#endif
+            ebpFrame = 1;
+        }
+        ASSERT(offsetInBytes == 0 || isOffsetFromSP);
+
+#ifdef TARGET_AMD64
+        if (isOffsetFromSP)
+            offsetInBytes += SKEW_FOR_OFFSET_FROM_SP;
+
+        ASSERT((offsetInBytes % 0x10) == 0);
+        UInt32 offsetInSlots = offsetInBytes / 0x10;
+        if (offsetInSlots >= 3 && offsetInSlots <= 3 + 2)
+        {
+            x64_framePtrOffsetSmall = offsetInSlots - 3;
+        }
+        else
+        {
+            x64_framePtrOffsetSmall = 3;
+        }
+        x64_framePtrOffset = (UInt8)offsetInSlots;
+        ASSERT(x64_framePtrOffset == offsetInSlots);
+#else
+        ASSERT(offsetInBytes == 0 && !isOffsetFromSP);
+#endif // TARGET_AMD64
+    }
+
+    void SetFrameSize(UInt32 frameSizeInBytes)
+    {
+        ASSERT(0 == (frameSizeInBytes % POINTER_SIZE));
+        frameSize = (frameSizeInBytes / POINTER_SIZE);
+        ASSERT(frameSize == (frameSizeInBytes / POINTER_SIZE));
+        if (frameSize != 0)
+        {
+            hasFrameSize = 1;
+        }
+    }
+
+    void SetSavedRegs(CalleeSavedRegMask regMask)
+    {
+        calleeSavedRegMask = (UInt16)regMask;
+    }
+
+    void SetRegSaved(CalleeSavedRegMask regMask)
+    {
+        calleeSavedRegMask |= regMask;
+    }
+
+    void SetReversePinvokeFrameOffset(int offsetInBytes)
+    {
+        ASSERT(HasFramePointer());
+        ASSERT((offsetInBytes % POINTER_SIZE) == 0);
+        ASSERT(GetReturnKind() == MRK_ReturnsToNative);
+
+#if defined(TARGET_ARM) || defined(TARGET_AMD64) || defined(TARGET_ARM64)
+        // The offset can be either positive or negative on ARM and x64.
+        bool isNeg = (offsetInBytes < 0);
+        UInt32 uOffsetInBytes = isNeg ? -offsetInBytes : offsetInBytes;
+        UInt32 uEncodedVal = ((uOffsetInBytes / POINTER_SIZE) << 1) | (isNeg ? 1 : 0);
+        reversePinvokeFrameOffset = uEncodedVal;
+        ASSERT(reversePinvokeFrameOffset == uEncodedVal);
+#elif defined (TARGET_X86)
+        // Use a positive number because it encodes better and 
+        // the offset is always negative on x86.
+        ASSERT(offsetInBytes < 0);
+        reversePinvokeFrameOffset = (-offsetInBytes / POINTER_SIZE);
+        ASSERT(reversePinvokeFrameOffset == (UInt32)(-offsetInBytes / POINTER_SIZE));
+#else
+        ASSERT(!"NYI");
+#endif
+    }
+
+#ifdef TARGET_X86
+    void SetReturnPopSize(UInt32 popSizeInBytes)
+    {
+        ASSERT(0 == (popSizeInBytes % POINTER_SIZE));
+        ASSERT(GetReturnPopSize() == 0 || GetReturnPopSize() == (int)popSizeInBytes);
+
+        UInt32 argCount = popSizeInBytes / POINTER_SIZE;
+        x86_argCountLow = argCount & 0x1F;
+        if (argCount != x86_argCountLow)
+        {
+            x86_argCountIsLarge = 1;
+            x86_argCountHigh = (UInt8)(argCount >> 5);
+        }
+    }
+
+    void SetHasStackChanges()
+    {
+        x86_hasStackChanges = 1;
+    }
+#endif // TARGET_X86
+
+#ifdef TARGET_ARM
+    void SetParmRegsPushed(ScratchRegMask pushedParmRegs)
+    {
+        // should be a subset of {RO-R3}
+        ASSERT((pushedParmRegs & ~(SR_MASK_R0|SR_MASK_R1|SR_MASK_R2|SR_MASK_R3)) == 0);
+        arm_areParmOrVfpRegsPushed = pushedParmRegs != 0 || arm_vfpRegPushedCount != 0;
+        arm_parmRegsPushedSet = (UInt8)pushedParmRegs;
+    }
+
+    void SetVfpRegsPushed(UInt8 vfpRegFirstPushed, UInt8 vfpRegPushedCount)
+    {
+        // mrt100.dll really only supports pushing a subinterval of d8-d15
+        // these are the preserved floating point registers according to the ABI spec
+        ASSERT(8 <= vfpRegFirstPushed && vfpRegFirstPushed + vfpRegPushedCount <= 16 || vfpRegPushedCount == 0);
+        arm_vfpRegFirstPushed = vfpRegFirstPushed;
+        arm_vfpRegPushedCount = vfpRegPushedCount;
+        arm_areParmOrVfpRegsPushed = arm_parmRegsPushedSet != 0 || vfpRegPushedCount != 0;
+    }
+#elif defined(TARGET_ARM64)
+    void SetParmRegsPushedCount(UInt8 parmRegsPushedCount)
+    {
+        // pushed parameter registers are a subset of {R0-R7}
+        ASSERT(parmRegsPushedCount <= 8);
+        arm64_parmRegsPushedCount = parmRegsPushedCount;
+        arm64_areParmOrVfpRegsPushed = (arm64_parmRegsPushedCount != 0) || (arm64_vfpRegsPushedMask != 0);
+    }
+
+    void SetVfpRegsPushed(UInt8 vfpRegsPushedMask)
+    {
+        arm64_vfpRegsPushedMask = vfpRegsPushedMask;
+        arm64_areParmOrVfpRegsPushed = (arm64_parmRegsPushedCount != 0) || (arm64_vfpRegsPushedMask != 0);
+    }
+#elif defined(TARGET_AMD64)
+    void SetSavedXmmRegs(UInt32 savedXmmRegMask)
+    {
+        // any subset of xmm6-xmm15 may be saved, but no registers in xmm0-xmm5 should be present
+        ASSERT((savedXmmRegMask & 0xffff003f) == 0);
+        x64_hasSavedXmmRegs = savedXmmRegMask != 0;
+        x64_savedXmmRegMask = (UInt16)savedXmmRegMask;
+    }
+#endif
+
+    void SetFuncletOffset(UInt32 offset)
+    {
+        funcletOffset = offset;
+    }
+
+    //
+    // GETTERS
+    //
+    UInt32 GetPrologSize()
+    {
+#if defined (TARGET_ARM64)
+        return prologSize << 2;
+#else
+        return prologSize;
+#endif
+    }
+
+    bool HasFunclets()
+    {
+        return (hasFunclets != 0);
+    }
+
+    bool HasVaryingEpilogSizes()
+    {
+        return fixedEpilogSize == 0;
+    }
+
+    UInt32 PeekFixedEpilogSize()
+    {
+#if defined (TARGET_ARM64)
+        return fixedEpilogSize << 2;
+#else
+        return fixedEpilogSize;
+#endif
+    }
+
+    UInt32 GetFixedEpilogSize()
+    {
+        ASSERT(!HasVaryingEpilogSizes());
+#if defined (TARGET_ARM64)
+        return fixedEpilogSize << 2;
+#else
+        return fixedEpilogSize;
+#endif
+    }
+
+    UInt32 GetEpilogCount()
+    {
+        return epilogCount;
+    }
+    
+    bool IsEpilogAtEnd()
+    {
+        return (epilogAtEnd != 0);
+    }
+
+    MethodReturnKind GetReturnKind()
+    {
+#if defined(TARGET_ARM64)
+        return (MethodReturnKind)((reg1ReturnKind << 2) | returnKind);
+#else
+        return (MethodReturnKind)returnKind;
+#endif
+    }
+
+    bool ReturnsToNative()
+    {
+        return (GetReturnKind() == MRK_ReturnsToNative);
+    }
+
+    bool HasFramePointer() const
+    {
+        return !!ebpFrame;
+    }
+
+    bool IsFunclet()
+    {
+        return funcletOffset != 0;
+    }
+
+    UInt32 GetFuncletOffset()
+    {
+        return funcletOffset;
+    }
+
+    int GetPreservedRegsSaveSize() const // returned in bytes
+    {
+        UInt32 count = 0;
+        UInt32 mask = calleeSavedRegMask;
+        while (mask != 0)
+        {
+            count += mask & 1;
+            mask >>= 1;
+        }
+
+        return count * POINTER_SIZE;
+    }
+    
+    int GetParamPointerReg()
+    {
+        return paramPointerReg;
+    }
+
+    bool HasDynamicAlignment()
+    {
+#if defined(TARGET_ARM64)
+        return false;
+#else
+        return !!logStackAlignment;
+#endif
+    }
+
+    UInt32 GetDynamicAlignment()
+    {
+#if defined(TARGET_ARM64)
+        ASSERT(!"Not supported");
+        return 1;
+#else
+        return 1 << logStackAlignment;
+#endif
+    }
+
+    bool HasGSCookie()
+    {
+        return hasGSCookie;
+    }
+
+#if defined(TARGET_ARM64)
+    bool AreFPLROnTop() const
+    {
+        return FPLRAreOnTop;
+    }
+#endif
+
+    UInt32 GetGSCookieOffset()
+    {
+        ASSERT(hasGSCookie);
+        return gsCookieOffset * POINTER_SIZE;
+    }
+
+    bool HasCommonVars() const
+    {
+        return hasCommonVars;
+    }
+
+#ifdef TARGET_AMD64
+    static const UInt32 SKEW_FOR_OFFSET_FROM_SP = 0x10;
+
+    int GetFramePointerOffset() const // returned in bytes
+    {
+        // traditional frames where FP points to the pushed FP have fp offset == 0
+        if (x64_framePtrOffset == 0)
+            return 0;
+
+        // otherwise it's an x64 style frame where the fp offset is measured from the sp
+        // at the end of the prolog
+        int offsetFromSP  = GetFramePointerOffsetFromSP();
+
+        int preservedRegsSaveSize = GetPreservedRegsSaveSize();
+        
+        // we when called from the binder, rbp isn't set to be a preserved reg,
+        // when called from the runtime, it is - compensate for this inconsistency
+        if (IsRegSaved(CSR_MASK_RBP))
+            preservedRegsSaveSize -= POINTER_SIZE;
+
+        return offsetFromSP - preservedRegsSaveSize - GetFrameSize();
+    }
+
+    bool IsFramePointerOffsetFromSP() const
+    {
+        return x64_framePtrOffset != 0;
+    }
+
+    int GetFramePointerOffsetFromSP() const
+    {
+        ASSERT(IsFramePointerOffsetFromSP());
+        int offsetFromSP;
+        offsetFromSP = x64_framePtrOffset * 0x10;
+        ASSERT(offsetFromSP >= SKEW_FOR_OFFSET_FROM_SP);
+        offsetFromSP -= SKEW_FOR_OFFSET_FROM_SP;
+
+        return offsetFromSP;
+    }
+
+    int GetFramePointerReg()
+    {
+        return RN_EBP;
+    }
+    
+    bool HasSavedXmmRegs()
+    {
+        return x64_hasSavedXmmRegs != 0;
+    }
+
+    UInt16 GetSavedXmmRegMask()
+    {
+        ASSERT(x64_hasSavedXmmRegs);
+        return x64_savedXmmRegMask;
+    }
+#elif defined(TARGET_X86)
+    int GetReturnPopSize() // returned in bytes
+    {
+        if (!x86_argCountIsLarge)
+        {
+            return x86_argCountLow * POINTER_SIZE;
+        }
+        return ((x86_argCountHigh << 5) | x86_argCountLow) * POINTER_SIZE;
+    }
+
+    bool HasStackChanges()
+    {
+        return !!x86_hasStackChanges;
+    }
+#endif
+
+    int GetFrameSize() const
+    {
+        return frameSize * POINTER_SIZE;
+    }
+
+
+    int GetReversePinvokeFrameOffset()
+    {
+#if defined(TARGET_ARM) || defined(TARGET_AMD64) || defined(TARGET_ARM64)
+        // The offset can be either positive or negative on ARM.
+        Int32 offsetInBytes;
+        UInt32 uEncodedVal = reversePinvokeFrameOffset;
+        bool isNeg = ((uEncodedVal & 1) == 1);
+        offsetInBytes = (uEncodedVal >> 1) * POINTER_SIZE;
+        offsetInBytes = isNeg ? -offsetInBytes : offsetInBytes;
+        return offsetInBytes;
+#elif defined(TARGET_X86)
+        // it's always at "EBP - something", so we encode it as a positive 
+        // number and then apply the negative here.
+        int unsignedOffset = reversePinvokeFrameOffset * POINTER_SIZE;
+        return -unsignedOffset;
+#else
+        ASSERT(!"NYI");
+#endif
+    }
+
+    CalleeSavedRegMask GetSavedRegs()
+    {
+        return (CalleeSavedRegMask) calleeSavedRegMask;
+    }
+
+    bool IsRegSaved(CalleeSavedRegMask reg) const
+    {
+        return (0 != (calleeSavedRegMask & reg));
+    }
+
+#ifdef TARGET_ARM
+    bool AreParmRegsPushed()
+    {
+        return arm_parmRegsPushedSet != 0;
+    }
+
+    UInt16 ParmRegsPushedCount()
+    {
+        UInt8 set = arm_parmRegsPushedSet;
+        UInt8 count = 0;
+        while (set != 0)
+        {
+            count += set & 1;
+            set >>= 1;
+        }
+        return count;
+    }
+
+    UInt8 GetVfpRegFirstPushed()
+    {
+        return arm_vfpRegFirstPushed;
+    }
+
+    UInt8 GetVfpRegPushedCount()
+    {
+        return arm_vfpRegPushedCount;
+    }
+#elif defined(TARGET_ARM64)
+    UInt8 ParmRegsPushedCount()
+    {
+        return arm64_parmRegsPushedCount;
+    }
+
+    UInt8 GetVfpRegsPushedMask()
+    {
+        return arm64_vfpRegsPushedMask;
+    }
+#endif
+
+    //
+    // ENCODING HELPERS
+    //
+#ifndef DACCESS_COMPILE
+    size_t EncodeHeader(UInt8 * & pDest)
+    {
+#ifdef _DEBUG
+        UInt8 * pStart = pDest;
+#endif // _DEBUG
+
+#if defined(TARGET_ARM64)
+        UInt8 calleeSavedRegMaskHigh = calleeSavedRegMask >> NUM_PRESERVED_REGS_LOW;
+        arm64_calleeSavedRegMaskLow = calleeSavedRegMask & MASK_PRESERVED_REGS_LOW;
+        if (calleeSavedRegMaskHigh)
+        {
+            arm64_longCsrMask = 1;
+        }
+#endif
+
+        size_t size = EC_SizeOfFixedHeader;
+        if (pDest)
+        {
+            memcpy(pDest, this, EC_SizeOfFixedHeader);
+            pDest += EC_SizeOfFixedHeader;
+        }
+
+        if (hasFrameSize)
+            size += WriteUnsigned(pDest, frameSize);
+
+        if (returnKind == MRK_ReturnsToNative)
+            size += WriteUnsigned(pDest, reversePinvokeFrameOffset);
+
+#ifdef TARGET_AMD64
+        if (x64_framePtrOffsetSmall == 0x3)
+            size += WriteUnsigned(pDest, x64_framePtrOffset);
+
+        if (x64_hasSavedXmmRegs)
+        {
+            ASSERT((x64_savedXmmRegMask & 0x3f) == 0);
+            UInt32 encodedValue = x64_savedXmmRegMask >> 6;
+            size += WriteUnsigned(pDest, encodedValue);
+        }
+#elif defined(TARGET_X86)
+        if (x86_argCountIsLarge)
+        {
+            size += 1;
+            if (pDest)
+                *pDest++ = x86_argCountHigh;
+        }
+        ASSERT(!x86_hasStackChanges || !"NYI -- stack changes for ESP frames");
+#elif defined(TARGET_ARM)
+        if (arm_areParmOrVfpRegsPushed)
+        {
+            // we encode a bit field where the low 4 bits represent the pushed parameter register
+            // set, the next 8 bits are the number of pushed floating point registers, and the highest
+            // bits are the first pushed floating point register plus 1. 
+            // The 0 encoding means the first floating point register is 8 as this is the most frequent.
+            UInt32 encodedValue = arm_parmRegsPushedSet | (arm_vfpRegPushedCount << 4);
+            // usually, the first pushed floating point register is d8
+            if (arm_vfpRegFirstPushed != 8)
+                encodedValue |= (arm_vfpRegFirstPushed+1) << (8+4);
+
+            size += WriteUnsigned(pDest, encodedValue);
+        }
+#elif defined(TARGET_ARM64)
+        if (calleeSavedRegMaskHigh)
+        {
+            size += 1;
+            if (pDest)
+                *pDest++ = calleeSavedRegMaskHigh;
+        }
+
+        if (arm64_areParmOrVfpRegsPushed)
+        {
+            // At present arm64_parmRegsPushedCount is non-zero only for variadic functions, so place this field higher
+            UInt32 encodedValue = arm64_vfpRegsPushedMask | (arm64_parmRegsPushedCount << 8);
+            size += WriteUnsigned(pDest, encodedValue);
+        }
+#endif
+
+        // encode dynamic alignment and GS cookie information
+        if (hasExtraData)
+        {
+            size += WriteUnsigned(pDest, extraDataHeader);
+        }
+        if (HasDynamicAlignment())
+        {
+            size += WriteUnsigned(pDest, paramPointerReg);
+        }
+        if (hasGSCookie)
+        {
+            size += WriteUnsigned(pDest, gsCookieOffset);
+        }
+
+        if (epilogCountSmall == EC_MaxEpilogCountSmall)
+        {
+            size += WriteUnsigned(pDest, epilogCount);
+        }
+
+        // WARNING: 
+        // WARNING: Do not add fields to the file-format after the funclet header encodings -- these are
+        // WARNING: decoded recursively and 'in-place' when looking for the info associated with a funclet.
+        // WARNING: Therefore, in that case, we cannot easily continue to decode things associated with the 
+        // WARNING: main body GCInfoHeader once we start this recursive decode.
+        // WARNING: 
+        size += EncodeFuncletInfo(pDest);
+
+#ifdef _DEBUG
+        ASSERT(!pDest || (size == (size_t)(pDest - pStart)));
+#endif // _DEBUG
+
+        return size;
+    }
+
+    size_t WriteUnsigned(UInt8 * & pDest, UInt32 value)
+    {
+        size_t size = (size_t)VarInt::WriteUnsigned(pDest, value);
+        pDest = pDest ? (pDest + size) : pDest;
+        return size;
+    }
+#endif // DACCESS_COMPILE
+
+    UInt16 ToUInt16(UInt32 val)
+    {
+        UInt16 result = (UInt16)val;
+        ASSERT(val == result);
+        return result;
+    }
+
+    UInt8 ToUInt8(UInt32 val)
+    {
+        UInt8 result = (UInt8)val;
+        ASSERT(val == result);
+        return result;
+    }
+
+    //
+    // DECODING HELPERS
+    //
+    // Returns a pointer to the 'stack change string' on x86.
+    PTR_UInt8 DecodeHeader(UInt32 methodOffset, PTR_UInt8 pbHeaderEncoding, size_t* pcbHeader)
+    {
+        PTR_UInt8 pbStackChangeString = NULL;
+
+        TADDR pbTemp = PTR_TO_TADDR(pbHeaderEncoding);
+        memcpy(this, PTR_READ(pbTemp, EC_SizeOfFixedHeader), EC_SizeOfFixedHeader);
+
+        PTR_UInt8 pbDecode = pbHeaderEncoding + EC_SizeOfFixedHeader;
+        frameSize = hasFrameSize 
+            ? VarInt::ReadUnsigned(pbDecode)
+            : 0;
+
+        reversePinvokeFrameOffset = (returnKind == MRK_ReturnsToNative) 
+            ? VarInt::ReadUnsigned(pbDecode)
+            : 0;
+
+#ifdef TARGET_AMD64
+        x64_framePtrOffset = (x64_framePtrOffsetSmall == 0x3)
+            ? ToUInt8(VarInt::ReadUnsigned(pbDecode))
+            : x64_framePtrOffsetSmall + 3;
+
+
+        x64_savedXmmRegMask = 0;
+        if (x64_hasSavedXmmRegs)
+        {
+            UInt32 encodedValue = VarInt::ReadUnsigned(pbDecode);
+            ASSERT((encodedValue & ~0x3ff) == 0);
+            x64_savedXmmRegMask = ToUInt16(encodedValue << 6);
+        }
+
+#elif defined(TARGET_X86)
+        if (x86_argCountIsLarge)
+            x86_argCountHigh = *pbDecode++;
+        else
+            x86_argCountHigh = 0;
+
+        if (x86_hasStackChanges)
+        {
+            pbStackChangeString = pbDecode;
+
+            bool last = false;
+            while (!last)
+            {
+                UInt8 b = *pbDecode++;
+                // 00111111 {delta}     forwarder
+                // 00dddddd             push 1, dddddd = delta
+                // nnnldddd             pop nnn-1, l = last, dddd = delta (nnn=0 and nnn=1 are disallowed)
+                if (b == 0x3F)
+                {
+                    // 00111111 {delta}     forwarder
+                    VarInt::ReadUnsigned(pbDecode);
+                }
+                else if (0 != (b & 0xC0))
+                {
+                    // nnnldddd             pop nnn-1, l = last, dddd = delta (nnn=0 and nnn=1 are disallowed)
+                    last = ((b & 0x10) == 0x10);
+                }
+            }
+        }
+#elif defined(TARGET_ARM)
+        arm_parmRegsPushedSet = 0;
+        arm_vfpRegPushedCount = 0;
+        arm_vfpRegFirstPushed = 0;
+        if (arm_areParmOrVfpRegsPushed)
+        {
+            UInt32 encodedValue = VarInt::ReadUnsigned(pbDecode);
+            arm_parmRegsPushedSet = encodedValue & 0x0f;
+            arm_vfpRegPushedCount = (UInt8)(encodedValue >> 4);
+            UInt32 vfpRegFirstPushed = encodedValue >> (8 + 4);
+            if (vfpRegFirstPushed == 0)
+                arm_vfpRegFirstPushed = 8;
+            else
+                arm_vfpRegFirstPushed = (UInt8)(vfpRegFirstPushed - 1);
+        }
+#elif defined(TARGET_ARM64)
+        calleeSavedRegMask = arm64_calleeSavedRegMaskLow;
+        if (arm64_longCsrMask)
+        {
+            calleeSavedRegMask |= (*pbDecode++ << NUM_PRESERVED_REGS_LOW);
+        }
+
+        arm64_parmRegsPushedCount = 0;
+        arm64_vfpRegsPushedMask = 0;
+        if (arm64_areParmOrVfpRegsPushed)
+        {
+            UInt32 encodedValue = VarInt::ReadUnsigned(pbDecode);
+            arm64_vfpRegsPushedMask = (UInt8)encodedValue;
+            arm64_parmRegsPushedCount = (UInt8)(encodedValue >> 8);
+            ASSERT(arm64_parmRegsPushedCount <= 8);
+        }
+#endif
+
+        extraDataHeader = hasExtraData ? ToUInt8(VarInt::ReadUnsigned(pbDecode)) : 0;
+        paramPointerReg = HasDynamicAlignment() ? ToUInt8(VarInt::ReadUnsigned(pbDecode)) : (UInt8)RN_NONE;
+        gsCookieOffset = hasGSCookie ? VarInt::ReadUnsigned(pbDecode) : 0;
+
+        epilogCount = epilogCountSmall < EC_MaxEpilogCountSmall ? epilogCountSmall : ToUInt16(VarInt::ReadUnsigned(pbDecode));
+
+        this->funcletOffset = 0;
+        if (hasFunclets)
+        {
+            // WORKAROUND: Epilog tables are still per-method instead of per-funclet, but we don't deal with
+            //             them here.  So we will simply overwrite the funclet's epilogAtEnd and epilogCount
+            //             with the values from the main code body -- these were the values used to generate
+            //             the per-method epilog table, so at least we're consistent with what is encoded.
+            UInt8  mainEpilogAtEnd      = epilogAtEnd;
+            UInt16 mainEpilogCount      = epilogCount;
+            UInt16 mainFixedEpilogSize  = fixedEpilogSize;  // Either in bytes or in instructions
+            UInt8  mainHasCommonVars    = hasCommonVars;
+            // -------
+
+            int nFunclets = (int)VarInt::ReadUnsigned(pbDecode);
+            int idxFunclet = -2;
+            UInt32 offsetFunclet = 0;
+            // Decode the funclet start offsets, remembering which one is of interest.
+            UInt32 prevFuncletStart = 0;
+            for (int i = 0; i < nFunclets; i++)
+            {
+                UInt32 offsetThisFunclet = prevFuncletStart + VarInt::ReadUnsigned(pbDecode);
+                if ((idxFunclet == -2) && (methodOffset < offsetThisFunclet))
+                {
+                    idxFunclet = (i - 1);
+                    offsetFunclet = prevFuncletStart;
+                }
+                prevFuncletStart = offsetThisFunclet; 
+            }
+            if ((idxFunclet == -2) && (methodOffset >= prevFuncletStart))
+            {
+                idxFunclet = (nFunclets - 1);
+                offsetFunclet = prevFuncletStart;
+            }
+
+            // Now decode headers until we find the one we want.  Keep decoding if we need to report a size.
+            if (pcbHeader || (idxFunclet >= 0))
+            {
+                for (int i = 0; i < nFunclets; i++)
+                {
+                    size_t hdrSize;
+                    if (i == idxFunclet)
+                    {
+                        this->DecodeHeader(methodOffset, pbDecode, &hdrSize);
+                        pbDecode += hdrSize;
+                        this->funcletOffset = offsetFunclet;
+                        if (!pcbHeader) // if nobody is going to look at the header size, we don't need to keep going
+                            break;
+                    }
+                    else
+                    {
+                        // keep decoding into a temp just to get the right header size
+                        GCInfoHeader tmp;
+                        tmp.DecodeHeader(methodOffset, pbDecode, &hdrSize);
+                        pbDecode += hdrSize;
+                    }
+                }
+            }
+
+            // WORKAROUND: see above
+            this->epilogAtEnd      = mainEpilogAtEnd;
+            this->epilogCount      = mainEpilogCount;
+            this->PokeFixedEpilogSize(mainFixedEpilogSize);
+            this->hasCommonVars    = mainHasCommonVars;
+
+            // -------
+        }
+
+        // WARNING: 
+        // WARNING: Do not add fields to the file-format after the funclet header encodings -- these are
+        // WARNING: decoded recursively and 'in-place' when looking for the info associated with a funclet.
+        // WARNING: Therefore, in that case, we cannot easily continue to decode things associated with the 
+        // WARNING: main body GCInfoHeader once we start this recursive decode.
+        // WARNING: 
+
+        if (pcbHeader)
+            *pcbHeader = pbDecode - pbHeaderEncoding;
+
+        return pbStackChangeString;
+    }
+
+    void GetFuncletInfo(PTR_UInt8 pbHeaderEncoding, UInt32* pnFuncletsOut, PTR_UInt8* pEncodedFuncletStartOffsets)
+    {
+        ASSERT(hasFunclets);
+
+        PTR_UInt8 pbDecode = pbHeaderEncoding + EC_SizeOfFixedHeader;
+        if (hasFrameSize) { VarInt::SkipUnsigned(pbDecode); }
+        if (returnKind == MRK_ReturnsToNative)  { VarInt::SkipUnsigned(pbDecode); }
+        if (hasExtraData) { VarInt::SkipUnsigned(pbDecode); }
+        if (HasDynamicAlignment()) { VarInt::SkipUnsigned(pbDecode); }
+        if (hasGSCookie) { VarInt::SkipUnsigned(pbDecode); }
+
+#ifdef TARGET_AMD64
+        if (x64_framePtrOffsetSmall == 0x3) { VarInt::SkipUnsigned(pbDecode); }
+#elif defined(TARGET_X86)
+        if (x86_argCountIsLarge)
+            pbDecode++;
+
+        if (x86_hasStackChanges)
+        {
+            bool last = false;
+            while (!last)
+            {
+                UInt8 b = *pbDecode++;
+                // 00111111 {delta}     forwarder
+                // 00dddddd             push 1, dddddd = delta
+                // nnnldddd             pop nnn-1, l = last, dddd = delta (nnn=0 and nnn=1 are disallowed)
+                if (b == 0x3F)
+                {
+                    // 00111111 {delta}     forwarder
+                    VarInt::SkipUnsigned(pbDecode);
+                }
+                else if (0 != (b & 0xC0))
+                {
+                    // nnnldddd             pop nnn-1, l = last, dddd = delta (nnn=0 and nnn=1 are disallowed)
+                    last = ((b & 0x10) == 0x10);
+                }
+            }
+        }
+#elif defined(TARGET_ARM)
+        if (arm_areParmOrVfpRegsPushed) { VarInt::SkipUnsigned(pbDecode); }
+#elif defined(TARGET_ARM64)
+        if (arm64_longCsrMask) { pbDecode++; }
+        if (arm64_areParmOrVfpRegsPushed) { VarInt::SkipUnsigned(pbDecode); }
+#endif
+
+        *pnFuncletsOut = VarInt::ReadUnsigned(pbDecode);
+        *pEncodedFuncletStartOffsets = pbDecode;
+    }
+
+    bool IsValidEpilogOffset(UInt32 epilogOffset, UInt32 epilogSize)
+    {
+        if (!this->HasVaryingEpilogSizes())
+            return (epilogOffset < this->GetFixedEpilogSize());
+        else
+            return (epilogOffset < epilogSize);
+    }
+};
+
+/*****************************************************************************/
+#endif //_GCINFO_H_
+/*****************************************************************************/
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/src/nativeaot/Runtime/inc/rhbinder.h
new file mode 100644
index 0000000000000..480aa8e8ce7a6
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/rhbinder.h
@@ -0,0 +1,664 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This header contains binder-generated data structures that the runtime consumes.
+//
+#include "TargetPtrs.h"
+
+class GcPollInfo
+{
+public:
+    static const UInt32 indirCellsPerBitmapBit  = 64 / POINTER_SIZE;    // one cache line per bit
+
+    static const UInt32 cbChunkCommonCode_X64   = 17;
+    static const UInt32 cbChunkCommonCode_X86   = 16;
+    static const UInt32 cbChunkCommonCode_ARM   = 32;
+#ifdef TARGET_ARM
+    // on ARM, the index of the indirection cell can be computed
+    // from the pointer to the indirection cell left in R12, 
+    // thus we need only one entry point on ARM,
+    // thus entries take no space, and you can have as many as you want
+    static const UInt32 cbEntry                 = 0;
+    static const UInt32 cbBundleCommonCode      = 0;
+    static const UInt32 entriesPerBundle        = 0x7fffffff;
+    static const UInt32 bundlesPerChunk         = 0x7fffffff;
+    static const UInt32 entriesPerChunk         = 0x7fffffff;
+#else
+    static const UInt32 cbEntry                 = 4;    // push imm8 / jmp rel8
+    static const UInt32 cbBundleCommonCode      = 5;    // jmp rel32
+
+    static const UInt32 entriesPerSubBundlePos  = 32;   // for the half with forward jumps
+    static const UInt32 entriesPerSubBundleNeg  = 30;   // for the half with negative jumps
+    static const UInt32 entriesPerBundle        = entriesPerSubBundlePos + entriesPerSubBundleNeg;
+    static const UInt32 bundlesPerChunk         = 4;
+    static const UInt32 entriesPerChunk         = bundlesPerChunk * entriesPerBundle;
+#endif
+
+    static const UInt32 cbFullBundle            = cbBundleCommonCode + 
+                                                  (entriesPerBundle * cbEntry);
+
+    static UInt32 EntryIndexToStubOffset(UInt32 entryIndex)
+    {
+# if defined(TARGET_ARM)
+        return EntryIndexToStubOffset(entryIndex, cbChunkCommonCode_ARM);
+# elif defined(TARGET_AMD64)
+        return EntryIndexToStubOffset(entryIndex, cbChunkCommonCode_X64);
+# else
+        return EntryIndexToStubOffset(entryIndex, cbChunkCommonCode_X86);
+# endif
+    }
+
+    static UInt32 EntryIndexToStubOffset(UInt32 entryIndex, UInt32 cbChunkCommonCode)
+    {
+# if defined(TARGET_ARM)
+        UNREFERENCED_PARAMETER(entryIndex);
+        UNREFERENCED_PARAMETER(cbChunkCommonCode);
+
+        return 0;
+# else
+        UInt32 cbFullChunk              = cbChunkCommonCode + 
+                                          (bundlesPerChunk * cbBundleCommonCode) +
+                                          (entriesPerChunk * cbEntry);
+
+        UInt32 numFullChunks             = entryIndex / entriesPerChunk;
+        UInt32 numEntriesInLastChunk     = entryIndex - (numFullChunks * entriesPerChunk);
+
+        UInt32 numFullBundles            = numEntriesInLastChunk / entriesPerBundle;
+        UInt32 numEntriesInLastBundle    = numEntriesInLastChunk - (numFullBundles * entriesPerBundle);
+
+        UInt32 offset                    = (numFullChunks * cbFullChunk) +
+                                          cbChunkCommonCode + 
+                                          (numFullBundles * cbFullBundle) +
+                                          (numEntriesInLastBundle * cbEntry);
+
+        if (numEntriesInLastBundle >= entriesPerSubBundlePos)
+            offset += cbBundleCommonCode;
+
+        return offset;
+# endif
+    }
+};
+
+struct StaticGcDesc
+{
+    struct GCSeries
+    {
+        UInt32 m_size;
+        UInt32 m_startOffset;
+    };
+
+    UInt32   m_numSeries;
+    GCSeries m_series[1];
+
+    UInt32 GetSize()
+    {
+        return (UInt32)(offsetof(StaticGcDesc, m_series) + (m_numSeries * sizeof(GCSeries)));
+    }
+    
+#ifdef DACCESS_COMPILE
+    static UInt32 DacSize(TADDR addr);
+#endif
+};
+
+typedef SPTR(StaticGcDesc) PTR_StaticGcDesc;
+typedef DPTR(StaticGcDesc::GCSeries) PTR_StaticGcDescGCSeries;
+
+class EEType;
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+
+enum class DispatchCellType
+{
+    InterfaceAndSlot = 0x0,
+    MetadataToken = 0x1,
+    VTableOffset = 0x2,
+};
+
+struct DispatchCellInfo
+{
+    DispatchCellType CellType;
+    EEType *InterfaceType = nullptr;
+    UInt16 InterfaceSlot = 0;
+    UInt8 HasCache = 0;
+    UInt32 MetadataToken = 0;
+    UInt32 VTableOffset = 0;
+};
+
+struct InterfaceDispatchCacheHeader
+{
+private:
+    enum Flags
+    {
+        CH_TypeAndSlotIndex = 0x0,
+        CH_MetadataToken = 0x1,
+        CH_Mask = 0x3,
+        CH_Shift = 0x2,
+    };
+
+public:
+    void Initialize(EEType *pInterfaceType, UInt16 interfaceSlot, UInt32 metadataToken)
+    {
+        if (pInterfaceType != nullptr)
+        {
+            ASSERT(metadataToken == 0);
+            m_pInterfaceType = pInterfaceType;
+            m_slotIndexOrMetadataTokenEncoded = CH_TypeAndSlotIndex | (((UInt32)interfaceSlot) << CH_Shift);
+        }
+        else
+        {
+            ASSERT(pInterfaceType == nullptr);
+            ASSERT(interfaceSlot == 0);
+            m_pInterfaceType = nullptr;
+            m_slotIndexOrMetadataTokenEncoded = CH_MetadataToken | (metadataToken << CH_Shift);
+        }
+    }
+
+    void Initialize(const DispatchCellInfo *pCellInfo)
+    {
+        ASSERT((pCellInfo->CellType == DispatchCellType::InterfaceAndSlot) ||
+               (pCellInfo->CellType == DispatchCellType::MetadataToken));
+        if (pCellInfo->CellType == DispatchCellType::InterfaceAndSlot)
+        {
+            ASSERT(pCellInfo->MetadataToken == 0);
+            Initialize(pCellInfo->InterfaceType, pCellInfo->InterfaceSlot, 0);
+        }
+        else
+        {
+            ASSERT(pCellInfo->CellType == DispatchCellType::MetadataToken);
+            ASSERT(pCellInfo->InterfaceType == nullptr);
+            Initialize(nullptr, 0, pCellInfo->MetadataToken);
+        }
+    }
+
+    DispatchCellInfo GetDispatchCellInfo()
+    {
+        DispatchCellInfo cellInfo;
+        
+        if ((m_slotIndexOrMetadataTokenEncoded & CH_Mask) == CH_TypeAndSlotIndex)
+        {
+            cellInfo.InterfaceType = m_pInterfaceType;
+            cellInfo.InterfaceSlot = (UInt16)(m_slotIndexOrMetadataTokenEncoded >> CH_Shift);
+            cellInfo.CellType = DispatchCellType::InterfaceAndSlot;
+        }
+        else
+        {
+            cellInfo.MetadataToken = m_slotIndexOrMetadataTokenEncoded >> CH_Shift;
+            cellInfo.CellType = DispatchCellType::MetadataToken;
+        }
+        cellInfo.HasCache = 1;
+        return cellInfo;
+    }
+
+private:
+    EEType *    m_pInterfaceType;   // EEType of interface to dispatch on
+    UInt32      m_slotIndexOrMetadataTokenEncoded;
+};
+
+// One of these is allocated per interface call site. It holds the stub to call, data to pass to that stub
+// (cache information) and the interface contract, i.e. the interface type and slot being called.
+struct InterfaceDispatchCell
+{
+    // The first two fields must remain together and at the beginning of the structure. This is due to the
+    // synchronization requirements of the code that updates these at runtime and the instructions generated
+    // by the binder for interface call sites.
+    UIntTarget      m_pStub;    // Call this code to execute the interface dispatch
+    volatile UIntTarget m_pCache;   // Context used by the stub above (one or both of the low two bits are set
+                                    // for initial dispatch, and if not set, using this as a cache pointer or 
+                                    // as a vtable offset.)
+                                    //
+                                    // In addition, there is a Slot/Flag use of this field. DispatchCells are
+                                    // emitted as a group, and the final one in the group (identified by m_pStub
+                                    // having the null value) will have a Slot field is the low 16 bits of the
+                                    // m_pCache field, and in the second lowest 16 bits, a Flags field. For the interface
+                                    // case Flags shall be 0, and for the metadata token case, Flags shall be 1.
+
+    //
+    // Keep these in sync with the managed copy in src\Common\src\Internal\Runtime\InterfaceCachePointerType.cs
+    //
+    enum Flags
+    {
+        // The low 2 bits of the m_pCache pointer are treated specially so that we can avoid the need for 
+        // extra fields on this type.
+        // OR if the m_pCache value is less than 0x1000 then this it is a vtable offset and should be used as such
+        IDC_CachePointerIsInterfaceRelativePointer = 0x3,
+        IDC_CachePointerIsIndirectedInterfaceRelativePointer = 0x2,
+        IDC_CachePointerIsInterfacePointerOrMetadataToken = 0x1, // Metadata token is a 30 bit number in this case. 
+                                                                 // Tokens are required to have at least one of their upper 20 bits set
+                                                                 // But they are not required by this part of the system to follow any specific
+                                                                 // token format
+        IDC_CachePointerPointsAtCache = 0x0,
+        IDC_CachePointerMask = 0x3,
+        IDC_CachePointerMaskShift = 0x2,
+        IDC_MaxVTableOffsetPlusOne = 0x1000,
+    };
+
+    DispatchCellInfo GetDispatchCellInfo()
+    {
+        // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be
+        // modified on another thread while this function is executing.)
+        UIntTarget cachePointerValue = m_pCache;
+        DispatchCellInfo cellInfo;
+
+        if ((cachePointerValue < IDC_MaxVTableOffsetPlusOne) && ((cachePointerValue & IDC_CachePointerMask) == IDC_CachePointerPointsAtCache))
+        {
+            cellInfo.VTableOffset = (UInt32)cachePointerValue;
+            cellInfo.CellType = DispatchCellType::VTableOffset;
+            cellInfo.HasCache = 1;
+            return cellInfo;
+        }
+
+        // If there is a real cache pointer, grab the data from there.
+        if ((cachePointerValue & IDC_CachePointerMask) == IDC_CachePointerPointsAtCache)
+        {
+            return ((InterfaceDispatchCacheHeader*)cachePointerValue)->GetDispatchCellInfo();
+        }
+
+        // Otherwise, walk to cell with Flags and Slot field
+
+        // The slot number/flags for a dispatch cell is encoded once per run of DispatchCells
+        // The run is terminated by having an dispatch cell with a null stub pointer.
+        const InterfaceDispatchCell *currentCell = this;
+        while (currentCell->m_pStub != 0)
+        {
+            currentCell = currentCell + 1;
+        } 
+        UIntTarget cachePointerValueFlags = currentCell->m_pCache;
+
+        DispatchCellType cellType = (DispatchCellType)(cachePointerValueFlags >> 16);
+        cellInfo.CellType = cellType;
+
+        if (cellType == DispatchCellType::InterfaceAndSlot)
+        {
+            cellInfo.InterfaceSlot = (UInt16)cachePointerValueFlags;
+
+            switch (cachePointerValue & IDC_CachePointerMask)
+            {
+            case IDC_CachePointerIsInterfacePointerOrMetadataToken:
+                cellInfo.InterfaceType = (EEType*)(cachePointerValue & ~IDC_CachePointerMask);
+                break;
+
+            case IDC_CachePointerIsInterfaceRelativePointer:
+            case IDC_CachePointerIsIndirectedInterfaceRelativePointer:
+                {
+                    UIntTarget interfacePointerValue = (UIntTarget)&m_pCache + (Int32)cachePointerValue;
+                    interfacePointerValue &= ~IDC_CachePointerMask;
+                    if ((cachePointerValue & IDC_CachePointerMask) == IDC_CachePointerIsInterfaceRelativePointer)
+                    {
+                        cellInfo.InterfaceType = (EEType*)interfacePointerValue;
+                    }
+                    else
+                    {
+                        cellInfo.InterfaceType = *(EEType**)interfacePointerValue;
+                    }
+                }
+                break;
+            }
+        }
+        else
+        {
+            cellInfo.MetadataToken = (UInt32)(cachePointerValue >> IDC_CachePointerMaskShift);
+        }
+
+        return cellInfo;
+    }
+
+    static bool IsCache(UIntTarget value)
+    {
+        if (((value & IDC_CachePointerMask) != 0) || (value < IDC_MaxVTableOffsetPlusOne))
+        {
+            return false;
+        }
+        else
+        {
+            return true;
+        }
+    }
+
+    InterfaceDispatchCacheHeader* GetCache() const
+    {
+        // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be
+        // modified on another thread while this function is executing.)
+        UIntTarget cachePointerValue = m_pCache;
+        if (IsCache(cachePointerValue))
+        {
+            return (InterfaceDispatchCacheHeader*)cachePointerValue;
+        }
+        else
+        {
+            return 0;
+        }
+    }
+};
+
+#endif // FEATURE_CACHED_INTERFACE_DISPATCH
+
+#ifdef TARGET_ARM
+// Note for ARM: try and keep the flags in the low 16-bits, since they're not easy to load into a register in
+// a single instruction within our stubs.
+enum PInvokeTransitionFrameFlags
+{
+    // NOTE: Keep in sync with ndp\FxCore\CoreRT\src\Native\Runtime\arm\AsmMacros.h
+
+    // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has 
+    //       to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp
+
+    // standard preserved registers
+    PTFF_SAVE_R4        = 0x00000001,
+    PTFF_SAVE_R5        = 0x00000002,
+    PTFF_SAVE_R6        = 0x00000004,
+    PTFF_SAVE_R7        = 0x00000008,   // should never be used, we require FP frames for methods with 
+                                        // pinvoke and it is saved into the frame pointer field instead
+    PTFF_SAVE_R8        = 0x00000010,
+    PTFF_SAVE_R9        = 0x00000020,
+    PTFF_SAVE_R10       = 0x00000040,
+    PTFF_SAVE_SP        = 0x00000100,   // Used for 'coop pinvokes' in runtime helper routines.  Methods with
+                                        // PInvokes are required to have a frame pointers, but methods which
+                                        // call runtime helpers are not.  Therefore, methods that call runtime
+                                        // helpers may need SP to seed the stackwalk.
+
+    // scratch registers
+    PTFF_SAVE_R0        = 0x00000200,
+    PTFF_SAVE_R1        = 0x00000400,
+    PTFF_SAVE_R2        = 0x00000800,
+    PTFF_SAVE_R3        = 0x00001000,
+    PTFF_SAVE_LR        = 0x00002000,   // this is useful for the case of loop hijacking where we need both
+                                        // a return address pointing into the hijacked method and that method's
+                                        // lr register, which may hold a gc pointer
+
+    PTFF_R0_IS_GCREF    = 0x00004000,   // used by hijack handler to report return value of hijacked method
+    PTFF_R0_IS_BYREF    = 0x00008000,   // used by hijack handler to report return value of hijacked method
+
+    PTFF_THREAD_ABORT   = 0x00010000,   // indicates that ThreadAbortException should be thrown when returning from the transition
+};
+#elif defined(TARGET_ARM64)
+enum PInvokeTransitionFrameFlags : UInt64
+{
+    // NOTE: Keep in sync with ndp\FxCore\CoreRT\src\Native\Runtime\arm64\AsmMacros.h
+
+    // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has 
+    //       to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp
+
+    // standard preserved registers
+    PTFF_SAVE_X19       = 0x0000000000000001,
+    PTFF_SAVE_X20       = 0x0000000000000002,
+    PTFF_SAVE_X21       = 0x0000000000000004,
+    PTFF_SAVE_X22       = 0x0000000000000008,
+    PTFF_SAVE_X23       = 0x0000000000000010,
+    PTFF_SAVE_X24       = 0x0000000000000020,
+    PTFF_SAVE_X25       = 0x0000000000000040,
+    PTFF_SAVE_X26       = 0x0000000000000080,
+    PTFF_SAVE_X27       = 0x0000000000000100,
+    PTFF_SAVE_X28       = 0x0000000000000200,
+
+    PTFF_SAVE_SP        = 0x0000000000000400,   // Used for 'coop pinvokes' in runtime helper routines.  Methods with
+                                                // PInvokes are required to have a frame pointers, but methods which
+                                                // call runtime helpers are not.  Therefore, methods that call runtime
+                                                // helpers may need SP to seed the stackwalk.
+
+    // Scratch registers
+    PTFF_SAVE_X0        = 0x0000000000000800,
+    PTFF_SAVE_X1        = 0x0000000000001000,
+    PTFF_SAVE_X2        = 0x0000000000002000,
+    PTFF_SAVE_X3        = 0x0000000000004000,
+    PTFF_SAVE_X4        = 0x0000000000008000,
+    PTFF_SAVE_X5        = 0x0000000000010000,
+    PTFF_SAVE_X6        = 0x0000000000020000,
+    PTFF_SAVE_X7        = 0x0000000000040000,
+    PTFF_SAVE_X8        = 0x0000000000080000,
+    PTFF_SAVE_X9        = 0x0000000000100000,
+    PTFF_SAVE_X10       = 0x0000000000200000,
+    PTFF_SAVE_X11       = 0x0000000000400000,
+    PTFF_SAVE_X12       = 0x0000000000800000,
+    PTFF_SAVE_X13       = 0x0000000001000000,
+    PTFF_SAVE_X14       = 0x0000000002000000,
+    PTFF_SAVE_X15       = 0x0000000004000000,
+    PTFF_SAVE_X16       = 0x0000000008000000,
+    PTFF_SAVE_X17       = 0x0000000010000000,
+    PTFF_SAVE_X18       = 0x0000000020000000,
+
+    PTFF_SAVE_FP        = 0x0000000040000000,   // should never be used, we require FP frames for methods with 
+                                                // pinvoke and it is saved into the frame pointer field instead
+
+    PTFF_SAVE_LR        = 0x0000000080000000,   // this is useful for the case of loop hijacking where we need both
+                                                // a return address pointing into the hijacked method and that method's
+                                                // lr register, which may hold a gc pointer
+
+    // used by hijack handler to report return value of hijacked method
+    PTFF_X0_IS_GCREF    = 0x0000000100000000,
+    PTFF_X0_IS_BYREF    = 0x0000000200000000,
+    PTFF_X1_IS_GCREF    = 0x0000000400000000,
+    PTFF_X1_IS_BYREF    = 0x0000000800000000,
+
+    PTFF_THREAD_ABORT   = 0x0000001000000000,   // indicates that ThreadAbortException should be thrown when returning from the transition
+};
+
+// TODO: Consider moving the PInvokeTransitionFrameFlags definition to a separate file to simplify header dependencies
+#ifdef ICODEMANAGER_INCLUDED
+// Verify that we can use bitwise shifts to convert from GCRefKind to PInvokeTransitionFrameFlags and back
+C_ASSERT(PTFF_X0_IS_GCREF == ((UInt64)GCRK_Object << 32));
+C_ASSERT(PTFF_X0_IS_BYREF == ((UInt64)GCRK_Byref << 32));
+C_ASSERT(PTFF_X1_IS_GCREF == ((UInt64)GCRK_Scalar_Obj << 32));
+C_ASSERT(PTFF_X1_IS_BYREF == ((UInt64)GCRK_Scalar_Byref << 32));
+
+inline UInt64 ReturnKindToTransitionFrameFlags(GCRefKind returnKind)
+{
+    if (returnKind == GCRK_Scalar)
+        return 0;
+
+    return PTFF_SAVE_X0 | PTFF_SAVE_X1 | ((UInt64)returnKind << 32);
+}
+
+inline GCRefKind TransitionFrameFlagsToReturnKind(UInt64 transFrameFlags)
+{
+    GCRefKind returnKind = (GCRefKind)((transFrameFlags & (PTFF_X0_IS_GCREF | PTFF_X0_IS_BYREF | PTFF_X1_IS_GCREF | PTFF_X1_IS_BYREF)) >> 32);
+    ASSERT((returnKind == GCRK_Scalar) || ((transFrameFlags & PTFF_SAVE_X0) && (transFrameFlags & PTFF_SAVE_X1)));
+    return returnKind;
+}
+#endif // ICODEMANAGER_INCLUDED
+#else // TARGET_ARM
+enum PInvokeTransitionFrameFlags
+{
+    // NOTE: Keep in sync with ndp\FxCore\CoreRT\src\Native\Runtime\[amd64|i386]\AsmMacros.inc
+
+    // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has 
+    //       to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp
+
+    // standard preserved registers
+    PTFF_SAVE_RBX       = 0x00000001,
+    PTFF_SAVE_RSI       = 0x00000002,
+    PTFF_SAVE_RDI       = 0x00000004,
+    PTFF_SAVE_RBP       = 0x00000008,   // should never be used, we require RBP frames for methods with 
+                                        // pinvoke and it is saved into the frame pointer field instead
+    PTFF_SAVE_R12       = 0x00000010,
+    PTFF_SAVE_R13       = 0x00000020,
+    PTFF_SAVE_R14       = 0x00000040,
+    PTFF_SAVE_R15       = 0x00000080,
+
+    PTFF_SAVE_RSP       = 0x00008000,   // Used for 'coop pinvokes' in runtime helper routines.  Methods with
+                                        // PInvokes are required to have a frame pointers, but methods which
+                                        // call runtime helpers are not.  Therefore, methods that call runtime
+                                        // helpers may need RSP to seed the stackwalk.
+                                        //
+                                        // NOTE: despite the fact that this flag's bit is out of order, it is
+                                        // still expected to be saved here after the preserved registers and
+                                        // before the scratch registers
+    PTFF_SAVE_RAX       = 0x00000100,
+    PTFF_SAVE_RCX       = 0x00000200,
+    PTFF_SAVE_RDX       = 0x00000400,
+    PTFF_SAVE_R8        = 0x00000800,
+    PTFF_SAVE_R9        = 0x00001000,
+    PTFF_SAVE_R10       = 0x00002000,
+    PTFF_SAVE_R11       = 0x00004000,
+
+    PTFF_RAX_IS_GCREF   = 0x00010000,   // used by hijack handler to report return value of hijacked method
+    PTFF_RAX_IS_BYREF   = 0x00020000,   // used by hijack handler to report return value of hijacked method
+
+    PTFF_THREAD_ABORT   = 0x00040000,   // indicates that ThreadAbortException should be thrown when returning from the transition
+};
+#endif // TARGET_ARM
+
+#pragma warning(push)
+#pragma warning(disable:4200) // nonstandard extension used: zero-sized array in struct/union
+class Thread;
+#if defined(USE_PORTABLE_HELPERS)
+//the members of this structure are currently unused except m_pThread and exist only to allow compilation
+//of StackFrameIterator their values are not currently being filled in and will require significant rework
+//in order to satisfy the runtime requirements of StackFrameIterator
+struct PInvokeTransitionFrame
+{
+    void*       m_RIP;
+    Thread*     m_pThread;  // unused by stack crawler, this is so GetThread is only called once per method
+                            // can be an invalid pointer in universal transition cases (which never need to call GetThread)
+    uint32_t    m_Flags;    // PInvokeTransitionFrameFlags
+};
+#else // USE_PORTABLE_HELPERS
+struct PInvokeTransitionFrame
+{
+#ifdef TARGET_ARM
+    TgtPTR_Void     m_ChainPointer; // R11, used by OS to walk stack quickly
+#endif
+#ifdef TARGET_ARM64
+    // On arm64, the FP and LR registers are pushed in that order when setting up frames
+    TgtPTR_Void     m_FramePointer;
+    TgtPTR_Void     m_RIP;
+#else
+    TgtPTR_Void     m_RIP;
+    TgtPTR_Void     m_FramePointer;
+#endif
+    TgtPTR_Thread   m_pThread;  // unused by stack crawler, this is so GetThread is only called once per method
+                                // can be an invalid pointer in universal transition cases (which never need to call GetThread)
+#ifdef TARGET_ARM64
+    UInt64          m_Flags;  // PInvokeTransitionFrameFlags
+#else   
+    UInt32          m_Flags;  // PInvokeTransitionFrameFlags
+#endif       
+    UIntTarget      m_PreservedRegs[];
+};
+#endif // USE_PORTABLE_HELPERS
+#pragma warning(pop)
+
+#ifdef TARGET_AMD64
+// RBX, RSI, RDI, R12, R13, R14, R15, RAX, RSP
+#define PInvokeTransitionFrame_SaveRegs_count 9
+#elif defined(TARGET_X86)
+// RBX, RSI, RDI, RAX, RSP
+#define PInvokeTransitionFrame_SaveRegs_count 5
+#elif defined(TARGET_ARM)
+// R4-R10, R0, SP
+#define PInvokeTransitionFrame_SaveRegs_count 9
+#endif
+#define PInvokeTransitionFrame_MAX_SIZE (sizeof(PInvokeTransitionFrame) + (POINTER_SIZE * PInvokeTransitionFrame_SaveRegs_count))
+
+#ifdef TARGET_AMD64
+#define OFFSETOF__Thread__m_pTransitionFrame 0x40
+#elif defined(TARGET_ARM64)
+#define OFFSETOF__Thread__m_pTransitionFrame 0x40
+#elif defined(TARGET_X86)
+#define OFFSETOF__Thread__m_pTransitionFrame 0x2c
+#elif defined(TARGET_ARM)
+#define OFFSETOF__Thread__m_pTransitionFrame 0x2c
+#endif
+
+typedef DPTR(EEType) PTR_EEType;
+typedef DPTR(PTR_EEType) PTR_PTR_EEType;
+
+struct EETypeRef
+{
+    union
+    {
+        EEType *    pEEType;
+        EEType **   ppEEType;
+        UInt8 *     rawPtr;
+        UIntTarget  rawTargetPtr; // x86_amd64: keeps union big enough for target-platform pointer
+    };
+
+    static const size_t DOUBLE_INDIR_FLAG = 1;
+
+    PTR_EEType GetValue()
+    {
+        if (dac_cast<TADDR>(rawTargetPtr) & DOUBLE_INDIR_FLAG)
+            return *dac_cast<PTR_PTR_EEType>(rawTargetPtr - DOUBLE_INDIR_FLAG);
+        else
+            return dac_cast<PTR_EEType>(rawTargetPtr);
+    }
+};
+
+// Blobs are opaque data passed from the compiler, through the binder and into the native image. At runtime we
+// provide a simple API to retrieve these blobs (they're keyed by a simple integer ID). Blobs are passed to
+// the binder from the compiler and stored in native images by the binder in a sequential stream, each blob
+// having the following header.
+struct BlobHeader
+{
+    UInt32 m_flags;  // Flags describing the blob (used by the binder only at the moment)
+    UInt32 m_id;     // Unique identifier of the blob (used to access the blob at runtime)
+                     // also used by BlobTypeFieldPreInit to identify (at bind time) which field to pre-init.
+    UInt32 m_size;   // Size of the individual blob excluding this header (DWORD aligned)
+};
+
+// Structure used in the runtime initialization of deferred static class constructors. Deferred here means
+// executed during normal code execution just prior to a static field on the type being accessed (as opposed
+// to eager cctors, which are run at module load time). This is the fixed portion of the context structure,
+// class libraries can add their own fields to the end.
+struct StaticClassConstructionContext
+{
+    // Pointer to the code for the static class constructor method. This is initialized by the
+    // binder/runtime.
+    TgtPTR_Void m_cctorMethodAddress;
+
+    // Initialization state of the class. This is initialized to 0. Every time managed code checks the
+    // cctor state the runtime will call the classlibrary's CheckStaticClassConstruction with this context
+    // structure unless initialized == 1. This check is specific to allow the classlibrary to store more
+    // than a binary state for each cctor if it so desires.
+    Int32       m_initialized;
+};
+
+#ifdef FEATURE_CUSTOM_IMPORTS
+struct CustomImportDescriptor
+{
+    UInt32  RvaEATAddr;  // RVA of the indirection cell of the address of the EAT for that module
+    UInt32  RvaIAT;      // RVA of IAT array for that module
+    UInt32  CountIAT;    // Count of entries in the above array
+};
+#endif // FEATURE_CUSTOM_IMPORTS
+
+enum RhEHClauseKind
+{
+    RH_EH_CLAUSE_TYPED              = 0,
+    RH_EH_CLAUSE_FAULT              = 1,
+    RH_EH_CLAUSE_FILTER             = 2,
+    RH_EH_CLAUSE_UNUSED             = 3
+};
+
+#define RH_EH_CLAUSE_TYPED_INDIRECT RH_EH_CLAUSE_UNUSED 
+
+// mapping of cold code blocks to the corresponding hot entry point RVA
+// format is a as follows:
+// -------------------
+// | subSectionCount |     # of subsections, where each subsection has a run of hot bodies
+// -------------------     followed by a run of cold bodies
+// | hotMethodCount  |     # of hot bodies in subsection
+// | coldMethodCount |     # of cold bodies in subsection
+// -------------------
+// ... possibly repeated on ARM
+// -------------------
+// | hotRVA #1       |     RVA of the hot entry point corresponding to the 1st cold body
+// | hotRVA #2       |     RVA of the hot entry point corresponding to the 2nd cold body
+// ... one entry for each cold body containing the corresponding hot entry point
+
+// number of hot and cold bodies in a subsection of code
+// in x86 and x64 there's only one subsection, on ARM there may be several
+// for large modules with > 16 MB of code
+struct SubSectionDesc
+{
+    UInt32          hotMethodCount;
+    UInt32          coldMethodCount;
+};
+
+// this is the structure describing the cold to hot mapping info
+struct ColdToHotMapping
+{
+    UInt32          subSectionCount;
+    SubSectionDesc  subSection[/*subSectionCount*/1];
+    //  UINT32   hotRVAofColdMethod[/*coldMethodCount*/];
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/stressLog.h b/src/coreclr/src/nativeaot/Runtime/inc/stressLog.h
new file mode 100644
index 0000000000000..f657a8de3893a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/stressLog.h
@@ -0,0 +1,832 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// ---------------------------------------------------------------------------
+// StressLog.h
+//
+// StressLog infrastructure
+// 
+// The StressLog is a binary, memory based circular queue of logging messages.  
+//   It is intended to be used in retail builds during stress runs (activated 
+//   by registry key), to help find bugs that only turn up during stress runs.
+//
+// Differently from the desktop implementation the RH implementation of the
+//   stress log will log all facilities, and only filter on logging level.
+//
+// The log has a very simple structure, and is meant to be dumped from an NTSD
+//   extention (eg. strike).
+//
+// debug\rhsos\stresslogdump.cpp contains the dumper utility that parses this
+//   log.
+// ---------------------------------------------------------------------------
+
+#ifndef StressLog_h 
+#define StressLog_h  1
+
+#define SUPPRESS_WARNING_4127   \
+    __pragma(warning(push))     \
+    __pragma(warning(disable:4127)) /* conditional expression is constant*/
+
+#define POP_WARNING_STATE       \
+    __pragma(warning(pop))
+
+#define WHILE_0             \
+    SUPPRESS_WARNING_4127   \
+    while(0)                \
+    POP_WARNING_STATE       \
+
+
+// let's keep STRESS_LOG defined always...
+#if !defined(STRESS_LOG) && !defined(NO_STRESS_LOG)
+#define STRESS_LOG
+#endif
+
+#if defined(STRESS_LOG)
+
+//
+// Logging levels and facilities
+//
+#define DEFINE_LOG_FACILITY(logname, value)  logname = value,
+
+enum LogFacilitiesEnum: unsigned int {
+#include "loglf.h"
+    LF_ALWAYS        = 0x80000000u, // Log message irrepespective of LogFacility (if the level matches)
+    LF_ALL           = 0xFFFFFFFFu, // Used only to mask bits. Never use as LOG((LF_ALL, ...))
+};
+
+
+#define LL_EVERYTHING  10   
+#define LL_INFO1000000  9       // can be expected to generate 1,000,000 logs per small but not trival run
+#define LL_INFO100000   8       // can be expected to generate 100,000 logs per small but not trival run
+#define LL_INFO10000    7       // can be expected to generate 10,000 logs per small but not trival run
+#define LL_INFO1000     6       // can be expected to generate 1,000 logs per small but not trival run
+#define LL_INFO100      5       // can be expected to generate 100 logs per small but not trival run
+#define LL_INFO10       4       // can be expected to generate 10 logs per small but not trival run
+#define LL_WARNING      3
+#define LL_ERROR        2
+#define LL_FATALERROR   1
+#define LL_ALWAYS       0       // impossible to turn off (log level never negative)
+
+//
+//
+//
+
+#ifndef _ASSERTE
+#define _ASSERTE(expr)
+#endif
+
+
+#ifndef DACCESS_COMPILE
+
+
+//==========================================================================================
+// The STRESS_LOG* macros
+//
+// The STRESS_LOG* macros work like printf.  In fact the use printf in their implementation
+// so all printf format specifications work.  In addition the Stress log dumper knows 
+// about certain suffixes for the %p format specification (normally used to print a pointer)
+//
+//          %pM     // The pointer is a MethodInfo -- not supported yet (use %pK instead)
+//          %pT     // The pointer is a type (EEType)
+//          %pV     // The pointer is a C++ Vtable pointer
+//          %pK     // The pointer is a code address (used for call stacks or method names)
+//
+
+// STRESS_LOG_VA was added to allow sending GC trace output to the stress log. msg must be enclosed 
+//   in ()'s and contain a format string followed by 0 - 4 arguments.  The arguments must be numbers or 
+//   string literals.  LogMsgOL is overloaded so that all of the possible sets of parameters are covered.  
+//   This was done becasue GC Trace uses dprintf which dosen't contain info on how many arguments are  
+//   getting passed in and using va_args would require parsing the format string during the GC
+//
+
+#define STRESS_LOG_VA(msg) do {                                                     \
+            if (StressLog::StressLogOn(LF_GC, LL_ALWAYS))                           \
+                StressLog::LogMsgOL msg;                                            \
+            } WHILE_0
+
+#define STRESS_LOG0(facility, level, msg) do {                                      \
+            if (StressLog::StressLogOn(facility, level))                            \
+                StressLog::LogMsg(facility, 0, msg);                                \
+            } WHILE_0                                                              \
+
+#define STRESS_LOG1(facility, level, msg, data1) do {                               \
+            if (StressLog::StressLogOn(facility, level))                            \
+                StressLog::LogMsg(facility, 1, msg, (void*)(size_t)(data1));        \
+            } WHILE_0
+
+#define STRESS_LOG2(facility, level, msg, data1, data2) do {                        \
+            if (StressLog::StressLogOn(facility, level))                            \
+                StressLog::LogMsg(facility, 2, msg,                                 \
+                    (void*)(size_t)(data1), (void*)(size_t)(data2));                \
+            } WHILE_0
+
+#define STRESS_LOG3(facility, level, msg, data1, data2, data3) do {                           \
+            if (StressLog::StressLogOn(facility, level))                                      \
+                StressLog::LogMsg(facility, 3, msg,                                           \
+                    (void*)(size_t)(data1),(void*)(size_t)(data2),(void*)(size_t)(data3));    \
+            } WHILE_0
+
+#define STRESS_LOG4(facility, level, msg, data1, data2, data3, data4) do {                    \
+            if (StressLog::StressLogOn(facility, level))                                      \
+                StressLog::LogMsg(facility, 4, msg, (void*)(size_t)(data1),                   \
+                    (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4));    \
+            } WHILE_0
+
+#define STRESS_LOG5(facility, level, msg, data1, data2, data3, data4, data5) do {             \
+            if (StressLog::StressLogOn(facility, level))                                      \
+                StressLog::LogMsg(facility, 5, msg, (void*)(size_t)(data1),                   \
+                    (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4),     \
+                    (void*)(size_t)(data5));                                                  \
+            } WHILE_0
+
+#define STRESS_LOG6(facility, level, msg, data1, data2, data3, data4, data5, data6) do {      \
+            if (StressLog::StressLogOn(facility, level))                                      \
+                StressLog::LogMsg(facility, 6, msg, (void*)(size_t)(data1),                   \
+                    (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4),     \
+                    (void*)(size_t)(data5), (void*)(size_t)(data6));                          \
+            } WHILE_0
+
+#define STRESS_LOG7(facility, level, msg, data1, data2, data3, data4, data5, data6, data7) do { \
+            if (StressLog::StressLogOn(facility, level))                                      \
+                StressLog::LogMsg(facility, 7, msg, (void*)(size_t)(data1),                   \
+                    (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4),     \
+                    (void*)(size_t)(data5), (void*)(size_t)(data6), (void*)(size_t)(data7));  \
+            } WHILE_0
+
+#define STRESS_LOG_COND0(facility, level, msg) do {                                 \
+            if (StressLog::StressLogOn(facility, level) && (cond))                  \
+                StressLog::LogMsg(facility, 0, msg);                                \
+            } WHILE_0
+
+#define STRESS_LOG_COND1(facility, level, cond, msg, data1) do {                    \
+            if (StressLog::StressLogOn(facility, level) && (cond))                  \
+                StressLog::LogMsg(facility, 1, msg, (void*)(size_t)(data1));        \
+            } WHILE_0
+
+#define STRESS_LOG_COND2(facility, level, cond, msg, data1, data2) do {             \
+            if (StressLog::StressLogOn(facility, level) && (cond))                  \
+                StressLog::LogMsg(facility, 2, msg,                                 \
+                    (void*)(size_t)(data1), (void*)(size_t)(data2));                \
+            } WHILE_0
+
+#define STRESS_LOG_COND3(facility, level, cond, msg, data1, data2, data3) do {      \
+            if (StressLog::StressLogOn(facility, level) && (cond))                  \
+                StressLog::LogMsg(facility, 3, msg,                                 \
+                    (void*)(size_t)(data1),(void*)(size_t)(data2),(void*)(size_t)(data3));    \
+            } WHILE_0
+
+#define STRESS_LOG_COND4(facility, level, cond, msg, data1, data2, data3, data4) do {         \
+            if (StressLog::StressLogOn(facility, level) && (cond))                            \
+                StressLog::LogMsg(facility, 4, msg, (void*)(size_t)(data1),                   \
+                    (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4));    \
+            } WHILE_0
+
+#define STRESS_LOG_COND5(facility, level, cond, msg, data1, data2, data3, data4, data5) do {  \
+            if (StressLog::StressLogOn(facility, level) && (cond))                            \
+                StressLog::LogMsg(facility, 5, msg, (void*)(size_t)(data1),                   \
+                    (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4),     \
+                    (void*)(size_t)(data5));                                                  \
+            } WHILE_0
+
+#define STRESS_LOG_COND6(facility, level, cond, msg, data1, data2, data3, data4, data5, data6) do {     \
+            if (StressLog::StressLogOn(facility, level) && (cond))                            \
+                StressLog::LogMsg(facility, 6, msg, (void*)(size_t)(data1),                   \
+                    (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4),     \
+                    (void*)(size_t)(data5), (void*)(size_t)(data6));                          \
+            } WHILE_0
+
+#define STRESS_LOG_COND7(facility, level, cond, msg, data1, data2, data3, data4, data5, data6, data7) do {  \
+            if (StressLog::StressLogOn(facility, level) && (cond))                            \
+                StressLog::LogMsg(facility, 7, msg, (void*)(size_t)(data1),                   \
+                    (void*)(size_t)(data2),(void*)(size_t)(data3),(void*)(size_t)(data4),     \
+                    (void*)(size_t)(data5), (void*)(size_t)(data6), (void*)(size_t)(data7));  \
+            } WHILE_0
+
+#define STRESS_LOG_RESERVE_MEM(numChunks) do {                                                \
+            if (StressLog::StressLogOn(LF_ALL, LL_ALWAYS))                         \
+                {StressLog::ReserveStressLogChunks (numChunks);}                              \
+            } WHILE_0
+
+// !!! WARNING !!!
+// !!! DO NOT ADD STRESS_LOG8, as the stress log infrastructure supports a maximum of 7 arguments
+// !!! WARNING !!!
+
+#define STRESS_LOG_PLUG_MOVE(plug_start, plug_end, plug_delta) do {                           \
+            if (StressLog::StressLogOn(LF_GC, LL_INFO1000))                                   \
+                StressLog::LogMsg(LF_GC, 3, ThreadStressLog::gcPlugMoveMsg(),                 \
+                (void*)(size_t)(plug_start), (void*)(size_t)(plug_end), (void*)(size_t)(plug_delta)); \
+            } WHILE_0
+
+#define STRESS_LOG_ROOT_PROMOTE(root_addr, objPtr, methodTable) do {                          \
+            if (StressLog::StressLogOn(LF_GC|LF_GCROOTS, LL_INFO1000))                        \
+                StressLog::LogMsg(LF_GC|LF_GCROOTS, 3, ThreadStressLog::gcRootPromoteMsg(),   \
+                    (void*)(size_t)(root_addr), (void*)(size_t)(objPtr), (void*)(size_t)(methodTable)); \
+            } WHILE_0
+
+#define STRESS_LOG_ROOT_RELOCATE(root_addr, old_value, new_value, methodTable) do {           \
+            if (StressLog::StressLogOn(LF_GC|LF_GCROOTS, LL_INFO1000) && ((size_t)(old_value) != (size_t)(new_value))) \
+                StressLog::LogMsg(LF_GC|LF_GCROOTS, 4, ThreadStressLog::gcRootMsg(),          \
+                    (void*)(size_t)(root_addr), (void*)(size_t)(old_value),                   \
+                    (void*)(size_t)(new_value), (void*)(size_t)(methodTable));                \
+            } WHILE_0
+
+#define STRESS_LOG_GC_START(gcCount, Gen, collectClasses) do {                                \
+            if (StressLog::StressLogOn(LF_GCROOTS|LF_GC|LF_GCALLOC, LL_INFO10))               \
+                StressLog::LogMsg(LF_GCROOTS|LF_GC|LF_GCALLOC, 3, ThreadStressLog::gcStartMsg(),        \
+                    (void*)(size_t)(gcCount), (void*)(size_t)(Gen), (void*)(size_t)(collectClasses));   \
+            } WHILE_0
+
+#define STRESS_LOG_GC_END(gcCount, Gen, collectClasses) do {                                  \
+            if (StressLog::StressLogOn(LF_GCROOTS|LF_GC|LF_GCALLOC, LL_INFO10))               \
+                StressLog::LogMsg(LF_GCROOTS|LF_GC|LF_GCALLOC, 3, ThreadStressLog::gcEndMsg(),\
+                    (void*)(size_t)(gcCount), (void*)(size_t)(Gen), (void*)(size_t)(collectClasses), 0);\
+            } WHILE_0
+
+#if defined(_DEBUG)
+#define MAX_CALL_STACK_TRACE          20
+#define STRESS_LOG_OOM_STACK(size) do {                                                       \
+                if (StressLog::StressLogOn(LF_ALWAYS, LL_ALWAYS))                              \
+                {                                                                             \
+                    StressLog::LogMsgOL("OOM on alloc of size %x \n", (void*)(size_t)(size)); \
+                    StressLog::LogCallStack ("OOM");                                          \
+                }                                                                             \
+            } WHILE_0
+#define STRESS_LOG_GC_STACK do {                                                              \
+                if (StressLog::StressLogOn(LF_GC |LF_GCINFO, LL_ALWAYS))                      \
+                {                                                                             \
+                    StressLog::LogMsgOL("GC is triggered \n");                                \
+                    StressLog::LogCallStack ("GC");                                           \
+                }                                                                             \
+            } WHILE_0
+#else //_DEBUG
+#define STRESS_LOG_OOM_STACK(size)
+#define STRESS_LOG_GC_STACK
+#endif //_DEBUG
+
+#endif // DACCESS_COMPILE
+
+//
+// forward declarations:
+//
+class CrstStatic;
+class Thread;
+typedef DPTR(Thread) PTR_Thread;
+class StressLog;
+typedef DPTR(StressLog) PTR_StressLog;
+class ThreadStressLog;
+typedef DPTR(ThreadStressLog) PTR_ThreadStressLog;
+struct StressLogChunk;
+typedef DPTR(StressLogChunk) PTR_StressLogChunk;
+struct DacpStressLogEnumCBArgs;
+
+
+//==========================================================================================
+// StressLog - per-thread circular queue of stresslog messages
+//
+class StressLog {
+public:
+// private:
+    unsigned facilitiesToLog;               // Bitvector of facilities to log (see loglf.h)
+    unsigned levelToLog;                    // log level
+    unsigned MaxSizePerThread;              // maximum number of bytes each thread should have before wrapping
+    unsigned MaxSizeTotal;                  // maximum memory allowed for stress log
+    Int32 totalChunk;                       // current number of total chunks allocated
+    PTR_ThreadStressLog logs;               // the list of logs for every thread.
+    Int32 deadCount;                        // count of dead threads in the log
+    CrstStatic *pLock;                      // lock
+    unsigned __int64 tickFrequency;         // number of ticks per second
+    unsigned __int64 startTimeStamp;        // start time from when tick counter started
+    FILETIME startTime;                     // time the application started
+    size_t   moduleOffset;                  // Used to compute format strings.
+
+#ifndef DACCESS_COMPILE
+public:
+    static void Initialize(unsigned facilities, unsigned level, unsigned maxBytesPerThread, 
+                    unsigned maxBytesTotal, HANDLE hMod);
+    // Called at DllMain THREAD_DETACH to recycle thread's logs
+    static void ThreadDetach(ThreadStressLog *msgs);
+    static long NewChunk ()     { return PalInterlockedIncrement (&theLog.totalChunk); }
+    static long ChunkDeleted () { return PalInterlockedDecrement (&theLog.totalChunk); }
+
+    //the result is not 100% accurate. If multiple threads call this funciton at the same time, 
+    //we could allow the total size be bigger than required. But the memory won't grow forever
+    //and this is not critical so we don't try to fix the race
+    static bool AllowNewChunk (long numChunksInCurThread);
+
+    //preallocate Stress log chunks for current thread. The memory we could preallocate is still
+    //bounded by per thread size limit and total size limit. If chunksToReserve is 0, we will try to
+    //preallocate up to per thread size limit
+    static bool ReserveStressLogChunks (unsigned int chunksToReserve);
+
+// private:
+    static ThreadStressLog* CreateThreadStressLog(Thread * pThread);
+    static ThreadStressLog* CreateThreadStressLogHelper(Thread * pThread);
+
+#else // DACCESS_COMPILE
+public:
+    bool Initialize();
+
+    // Can't refer to the types in sospriv.h because it drags in windows.h
+    void EnumerateStressMsgs(/*STRESSMSGCALLBACK*/ void* smcb, /*ENDTHREADLOGCALLBACK*/ void* etcb, 
+                                        void *token);
+    void EnumStressLogMemRanges(/*STRESSLOGMEMRANGECALLBACK*/ void* slmrcb, void *token);
+
+    // Called while dumping logs after operations are completed, to ensure DAC-caches
+    // allow the stress logs to be dumped again
+    void ResetForRead();
+
+    ThreadStressLog* FindLatestThreadLog() const;
+
+    friend class ClrDataAccess;
+
+#endif // DACCESS_COMPILE
+
+#ifndef DACCESS_COMPILE
+public:
+    FORCEINLINE static bool StressLogOn(unsigned /*facility*/, unsigned level)
+    {
+    #if defined(DACCESS_COMPILE) 
+        UNREFERENCED_PARAMETER(level);
+        return FALSE;
+    #else
+        // In Redhawk we have rationalized facility codes and have much
+        // fewer compared to desktop, as such we'll log all facilities and
+        // limit the filtering to the log level...
+        return
+            // (theLog.facilitiesToLog & facility) 
+            //  && 
+            (level <= theLog.levelToLog);
+    #endif
+    }
+
+    static void LogMsg(unsigned facility, int cArgs, const char* format, ... );
+
+    // Support functions for STRESS_LOG_VA
+    // We disable the warning "conversion from 'type' to 'type' of greater size" since everything will
+    // end up on the stack, and LogMsg will know the size of the variable based on the format string.
+    #ifdef _MSC_VER
+    #pragma warning( push )
+    #pragma warning( disable : 4312 )
+    #endif
+    static void LogMsgOL(const char* format)
+    { LogMsg(LF_GC, 0, format); }
+
+    template < typename T1 >
+    static void LogMsgOL(const char* format, T1 data1)
+    {
+        C_ASSERT(sizeof(T1) <= sizeof(void*)); 
+        LogMsg(LF_GC, 1, format, (void*)(size_t)data1); 
+    }
+
+    template < typename T1, typename T2 >
+    static void LogMsgOL(const char* format, T1 data1, T2 data2)
+    {
+        C_ASSERT(sizeof(T1) <= sizeof(void*) && sizeof(T2) <= sizeof(void*)); 
+        LogMsg(LF_GC, 2, format, (void*)(size_t)data1, (void*)(size_t)data2); 
+    }
+
+    template < typename T1, typename T2, typename T3 >
+    static void LogMsgOL(const char* format, T1 data1, T2 data2, T3 data3)
+    { 
+        C_ASSERT(sizeof(T1) <= sizeof(void*) && sizeof(T2) <= sizeof(void*) && sizeof(T3) <= sizeof(void*)); 
+        LogMsg(LF_GC, 3, format, (void*)(size_t)data1, (void*)(size_t)data2, (void*)(size_t)data3); 
+    }
+
+    template < typename T1, typename T2, typename T3, typename T4 >
+    static void LogMsgOL(const char* format, T1 data1, T2 data2, T3 data3, T4 data4)
+    { 
+        C_ASSERT(sizeof(T1) <= sizeof(void*) && sizeof(T2) <= sizeof(void*) && sizeof(T3) <= sizeof(void*) && sizeof(T4) <= sizeof(void*)); 
+        LogMsg(LF_GC, 4, format, (void*)(size_t)data1, (void*)(size_t)data2, (void*)(size_t)data3, (void*)(size_t)data4); 
+    }
+
+    template < typename T1, typename T2, typename T3, typename T4, typename T5 >
+    static void LogMsgOL(const char* format, T1 data1, T2 data2, T3 data3, T4 data4, T5 data5)
+    { 
+        C_ASSERT(sizeof(T1) <= sizeof(void*) && sizeof(T2) <= sizeof(void*) && sizeof(T3) <= sizeof(void*) && sizeof(T4) <= sizeof(void*) && sizeof(T5) <= sizeof(void*)); 
+        LogMsg(LF_GC, 5, format, (void*)(size_t)data1, (void*)(size_t)data2, (void*)(size_t)data3, (void*)(size_t)data4, (void*)(size_t)data5); 
+    }
+
+    template < typename T1, typename T2, typename T3, typename T4, typename T5, typename T6 >
+    static void LogMsgOL(const char* format, T1 data1, T2 data2, T3 data3, T4 data4, T5 data5, T6 data6)
+    { 
+        C_ASSERT(sizeof(T1) <= sizeof(void*) && sizeof(T2) <= sizeof(void*) && sizeof(T3) <= sizeof(void*) && sizeof(T4) <= sizeof(void*) && sizeof(T5) <= sizeof(void*) && sizeof(T6) <= sizeof(void*)); 
+        LogMsg(LF_GC, 6, format, (void*)(size_t)data1, (void*)(size_t)data2, (void*)(size_t)data3, (void*)(size_t)data4, (void*)(size_t)data5, (void*)(size_t)data6); 
+    }
+
+    template < typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7 >
+    static void LogMsgOL(const char* format, T1 data1, T2 data2, T3 data3, T4 data4, T5 data5, T6 data6, T7 data7)
+    { 
+        C_ASSERT(sizeof(T1) <= sizeof(void*) && sizeof(T2) <= sizeof(void*) && sizeof(T3) <= sizeof(void*) && sizeof(T4) <= sizeof(void*) && sizeof(T5) <= sizeof(void*) && sizeof(T6) <= sizeof(void*) && sizeof(T7) <= sizeof(void*)); 
+        LogMsg(LF_GC, 7, format, (void*)(size_t)data1, (void*)(size_t)data2, (void*)(size_t)data3, (void*)(size_t)data4, (void*)(size_t)data5, (void*)(size_t)data6, (void*)(size_t)data7); 
+    }
+
+    #ifdef _MSC_VER
+    #pragma warning( pop )
+    #endif
+
+// We can only log the stacktrace on DEBUG builds!
+#ifdef _DEBUG
+    static void LogCallStack(const char *const callTag);
+#endif //_DEBUG
+
+#endif // DACCESS_COMPILE
+
+// private: // static variables
+    static StressLog theLog;    // We only have one log, and this is it
+};
+
+
+//==========================================================================================
+// Private classes
+//
+
+#if defined(_MSC_VER)
+// don't warn about 0 sized array below or unnamed structures
+#pragma warning(disable:4200 4201)
+#endif
+
+//==========================================================================================
+// StressMsg
+//
+// The order of fields is important.  Keep the prefix length as the first field.
+// And make sure the timeStamp field is naturally aligned, so we don't waste 
+// space on 32-bit platforms
+//
+struct StressMsg {
+    union {
+        struct {
+            UInt32 numberOfArgs  : 3;   // at most 7 arguments
+            UInt32 formatOffset  : 29;  // offset of string in mscorwks
+        };
+        UInt32 fmtOffsCArgs;            // for optimized access
+    };
+    UInt32     facility;                // facility used to log the entry
+    unsigned __int64 timeStamp;         // time when mssg was logged
+    void*     args[0];                  // size given by numberOfArgs
+
+    static const size_t maxArgCnt = 7;
+    static const size_t maxOffset = 0x20000000;
+    static size_t maxMsgSize () 
+    { return sizeof(StressMsg) + maxArgCnt*sizeof(void*); }
+
+    friend class ThreadStressLog;
+    friend class StressLog;
+};
+
+#ifdef _WIN64
+#define STRESSLOG_CHUNK_SIZE (32 * 1024)
+#else //_WIN64
+#define STRESSLOG_CHUNK_SIZE (16 * 1024)
+#endif //_WIN64
+#define GC_STRESSLOG_MULTIPLY (5)
+
+//==========================================================================================
+// StressLogChunk
+//
+//  A chunk of contiguous memory containing instances of StressMsg
+//
+struct StressLogChunk
+{
+    PTR_StressLogChunk prev;
+    PTR_StressLogChunk next;
+    char buf[STRESSLOG_CHUNK_SIZE];
+    UInt32 dwSig1;
+    UInt32 dwSig2;         
+
+#ifndef DACCESS_COMPILE
+
+    StressLogChunk (PTR_StressLogChunk p = NULL, PTR_StressLogChunk n = NULL)
+        :prev (p), next (n), dwSig1 (0xCFCFCFCF), dwSig2 (0xCFCFCFCF)    
+    {} 
+
+#endif //!DACCESS_COMPILE
+
+    char * StartPtr ()
+    {
+        return buf;
+    }
+
+    char * EndPtr ()
+    {
+        return buf + STRESSLOG_CHUNK_SIZE;
+    }
+
+    bool IsValid () const
+    {
+        return dwSig1 == 0xCFCFCFCF && dwSig2 == 0xCFCFCFCF;
+    }
+};
+
+//==========================================================================================
+// ThreadStressLog
+//
+// This class implements a circular stack of variable sized elements
+//    .The buffer between startPtr-endPtr is used in a circular manner
+//     to store instances of the variable-sized struct StressMsg.
+//     The StressMsg are always aligned to endPtr, while the space
+//     left between startPtr and the last element is 0-padded.
+//    .curPtr points to the most recently written log message
+//    .readPtr points to the next log message to be dumped
+//    .hasWrapped is TRUE while dumping the log, if we had wrapped
+//     past the endPtr marker, back to startPtr
+// The AdvanceRead/AdvanceWrite operations simply update the 
+//     readPtr / curPtr fields. thecaller is responsible for reading/writing 
+//     to the corresponding field
+class ThreadStressLog {
+    PTR_ThreadStressLog next;   // we keep a linked list of these
+    uint64_t   threadId;        // the id for the thread using this buffer
+    bool       isDead;          // Is this thread dead 
+    bool       readHasWrapped;      // set when read ptr has passed chunListTail
+    bool       writeHasWrapped;     // set when write ptr has passed chunListHead
+    StressMsg* curPtr;          // where packets are being put on the queue
+    StressMsg* readPtr;         // where we are reading off the queue (used during dumping)
+    PTR_StressLogChunk chunkListHead; //head of a list of stress log chunks
+    PTR_StressLogChunk chunkListTail; //tail of a list of stress log chunks
+    PTR_StressLogChunk curReadChunk;  //the stress log chunk we are currently reading
+    PTR_StressLogChunk curWriteChunk; //the stress log chunk we are currently writing
+    long chunkListLength;       // how many stress log chunks are in this stress log
+    PTR_Thread pThread;         // thread associated with these stress logs
+    StressMsg * origCurPtr;     // this holds the original curPtr before we start the dump
+
+    friend class StressLog;
+
+#ifndef DACCESS_COMPILE
+public:
+    inline ThreadStressLog ();
+    inline ~ThreadStressLog ();
+
+    void LogMsg ( UInt32 facility, int cArgs, const char* format, ... )
+    {
+        va_list Args;
+        va_start(Args, format);
+        LogMsg (facility, cArgs, format, Args);
+    }
+
+    void LogMsg ( UInt32 facility, int cArgs, const char* format, va_list Args);
+
+private:
+    FORCEINLINE StressMsg* AdvanceWrite(int cArgs);
+    inline StressMsg* AdvWritePastBoundary(int cArgs);
+    FORCEINLINE bool GrowChunkList ();
+
+#else // DACCESS_COMPILE
+public:
+    friend class ClrDataAccess;
+
+    // Called while dumping.  Returns true after all messages in log were dumped
+    FORCEINLINE bool CompletedDump ();
+
+private:
+    FORCEINLINE bool IsReadyForRead()       { return readPtr != NULL; }
+    FORCEINLINE StressMsg* AdvanceRead();
+    inline StressMsg* AdvReadPastBoundary();
+#endif //!DACCESS_COMPILE
+
+public:
+    void Activate (Thread * pThread);
+
+    bool IsValid () const
+    {
+        return chunkListHead != NULL && (!curWriteChunk || curWriteChunk->IsValid ());
+    }
+    
+    static const char* gcStartMsg()
+    {
+        return "{ =========== BEGINGC %d, (requested generation = %lu, collect_classes = %lu) ==========\n";
+    }
+
+    static const char* gcEndMsg()
+    {
+        return "========== ENDGC %d (gen = %lu, collect_classes = %lu) ===========}\n";
+    }
+    
+    static const char* gcRootMsg()
+    {
+        return "    GC Root %p RELOCATED %p -> %p  MT = %pT\n";
+    }
+
+    static const char* gcRootPromoteMsg()
+    {
+        return "    GCHeap::Promote: Promote GC Root *%p = %p MT = %pT\n";
+    }
+
+    static const char* gcPlugMoveMsg()
+    {
+        return "GC_HEAP RELOCATING Objects in heap within range [%p %p) by -0x%x bytes\n";
+    }    
+
+};
+
+
+//==========================================================================================
+// Inline implementations:
+//
+
+#ifdef DACCESS_COMPILE
+
+//------------------------------------------------------------------------------------------
+// Called while dumping.  Returns true after all messages in log were dumped
+FORCEINLINE bool ThreadStressLog::CompletedDump ()
+{
+    return readPtr->timeStamp == 0 
+            //if read has passed end of list but write has not passed head of list yet, we are done
+            //if write has also wrapped, we are at the end if read pointer passed write pointer
+            || (readHasWrapped && 
+                    (!writeHasWrapped || (curReadChunk == curWriteChunk && readPtr >= curPtr)));
+}
+
+//------------------------------------------------------------------------------------------
+// Called when dumping the log (by StressLog::Dump())
+// Updates readPtr to point to next stress messaage to be dumped
+inline StressMsg* ThreadStressLog::AdvanceRead() {
+    // advance the marker
+    readPtr = (StressMsg*)((char*)readPtr + sizeof(StressMsg) + readPtr->numberOfArgs*sizeof(void*));
+    // wrap around if we need to
+    if (readPtr >= (StressMsg *)curReadChunk->EndPtr ())
+    {
+        AdvReadPastBoundary();
+    }
+    return readPtr;
+}
+
+//------------------------------------------------------------------------------------------
+// The factored-out slow codepath for AdvanceRead(), only called by AdvanceRead().  
+// Updates readPtr to and returns the first stress message >= startPtr
+inline StressMsg* ThreadStressLog::AdvReadPastBoundary() {
+    //if we pass boundary of tail list, we need to set has Wrapped
+    if (curReadChunk == chunkListTail)
+    {
+        readHasWrapped = true;
+        //If write has not wrapped, we know the contents from list head to 
+        //cur pointer is garbage, we don't need to read them
+        if (!writeHasWrapped)
+        {
+            return readPtr;
+        }
+    }
+    curReadChunk = curReadChunk->next;
+    void** p = (void**)curReadChunk->StartPtr();
+    while (*p == NULL && (size_t)(p-(void**)curReadChunk->StartPtr ()) < (StressMsg::maxMsgSize()/sizeof(void*)))
+    {
+        ++p;
+    }
+    // if we failed to find a valid start of a StressMsg fallback to startPtr (since timeStamp==0)
+    if (*p == NULL)
+    {
+        p = (void**) curReadChunk->StartPtr ();
+    }
+    readPtr = (StressMsg*)p;
+
+    return readPtr;
+}
+
+#else // DACCESS_COMPILE
+
+//------------------------------------------------------------------------------------------
+// Initialize a ThreadStressLog
+inline ThreadStressLog::ThreadStressLog()
+{
+    chunkListHead = chunkListTail = curWriteChunk = NULL;
+    StressLogChunk * newChunk = new (nothrow) StressLogChunk;        
+    //OOM or in cantalloc region
+    if (newChunk == NULL)
+    {
+        return;
+    }     
+    StressLog::NewChunk ();
+
+    newChunk->prev = newChunk;
+    newChunk->next = newChunk;
+        
+    chunkListHead = chunkListTail = newChunk;
+
+    next = NULL;
+    isDead = TRUE;
+    curPtr = NULL;
+    readPtr = NULL;
+    writeHasWrapped = FALSE;
+    curReadChunk = NULL;
+    curWriteChunk = NULL;
+    chunkListLength = 1;
+    origCurPtr = NULL;
+}
+
+inline ThreadStressLog::~ThreadStressLog ()
+{
+    //no thing to do if the list is empty (failed to initialize)
+    if (chunkListHead == NULL)
+    {
+        return;
+    }
+
+    StressLogChunk * chunk = chunkListHead;
+
+    do
+    {
+        StressLogChunk * tmp = chunk;
+        chunk = chunk->next;
+        delete tmp;
+        StressLog::ChunkDeleted ();
+    } while (chunk != chunkListHead);
+}
+
+//------------------------------------------------------------------------------------------
+// Called when logging, checks if we can increase the number of stress log chunks associated
+// with the current thread
+FORCEINLINE bool ThreadStressLog::GrowChunkList ()
+{
+    _ASSERTE (chunkListLength >= 1);
+    if (!StressLog::AllowNewChunk (chunkListLength))
+    {
+        return FALSE;
+    }
+    StressLogChunk * newChunk = new (nothrow) StressLogChunk (chunkListTail, chunkListHead);
+    if (newChunk == NULL)
+    {
+        return FALSE;
+    }
+    StressLog::NewChunk ();
+    chunkListLength++;
+    chunkListHead->prev = newChunk;
+    chunkListTail->next = newChunk;
+    chunkListHead = newChunk;
+
+    return TRUE;
+}
+
+//------------------------------------------------------------------------------------------
+// Called at runtime when writing the log (by StressLog::LogMsg())
+// Updates curPtr to point to the next spot in the log where we can write
+// a stress message with cArgs arguments
+// For convenience it returns a pointer to the empty slot where we can 
+// write the next stress message.
+// cArgs is the number of arguments in the message to be written.
+inline StressMsg* ThreadStressLog::AdvanceWrite(int cArgs) {
+    // _ASSERTE(cArgs <= StressMsg::maxArgCnt);
+    // advance the marker
+    StressMsg* p = (StressMsg*)((char*)curPtr - sizeof(StressMsg) - cArgs*sizeof(void*));
+
+    //past start of current chunk
+    //wrap around if we need to
+    if (p < (StressMsg*)curWriteChunk->StartPtr ())
+    {
+       curPtr = AdvWritePastBoundary(cArgs);
+    }
+    else
+    {
+        curPtr = p;
+    }
+    
+    return curPtr;
+}
+
+//------------------------------------------------------------------------------------------
+// This is the factored-out slow codepath for AdvanceWrite() and is only called by 
+// AdvanceWrite().  
+// Returns the stress message flushed against endPtr
+// In addition it writes NULLs b/w the startPtr and curPtr
+inline StressMsg* ThreadStressLog::AdvWritePastBoundary(int cArgs) {
+    //zeroed out remaining buffer
+    memset (curWriteChunk->StartPtr (), 0, (char *)curPtr - (char *)curWriteChunk->StartPtr ());
+        
+    //if we are already at head of the list, try to grow the list
+    if (curWriteChunk == chunkListHead)
+    {
+        GrowChunkList ();            
+    }
+
+    curWriteChunk = curWriteChunk->prev;
+    if (curWriteChunk == chunkListTail)
+    {
+        writeHasWrapped = TRUE;
+    }
+    curPtr = (StressMsg*)((char*)curWriteChunk->EndPtr () - sizeof(StressMsg) - cArgs * sizeof(void*));    
+    return curPtr;
+}
+
+#endif // DACCESS_COMPILE
+
+#endif // STRESS_LOG
+
+#ifndef __GCENV_BASE_INCLUDED__
+#if !defined(STRESS_LOG) || defined(DACCESS_COMPILE)
+#define STRESS_LOG_VA(msg)                                              do { } WHILE_0
+#define STRESS_LOG0(facility, level, msg)                               do { } WHILE_0
+#define STRESS_LOG1(facility, level, msg, data1)                        do { } WHILE_0
+#define STRESS_LOG2(facility, level, msg, data1, data2)                 do { } WHILE_0
+#define STRESS_LOG3(facility, level, msg, data1, data2, data3)          do { } WHILE_0
+#define STRESS_LOG4(facility, level, msg, data1, data2, data3, data4)   do { } WHILE_0
+#define STRESS_LOG5(facility, level, msg, data1, data2, data3, data4, data5)   do { } WHILE_0
+#define STRESS_LOG6(facility, level, msg, data1, data2, data3, data4, data5, data6)   do { } WHILE_0
+#define STRESS_LOG7(facility, level, msg, data1, data2, data3, data4, data5, data6, data7)   do { } WHILE_0
+#define STRESS_LOG_PLUG_MOVE(plug_start, plug_end, plug_delta)          do { } WHILE_0
+#define STRESS_LOG_ROOT_PROMOTE(root_addr, objPtr, methodTable)         do { } WHILE_0
+#define STRESS_LOG_ROOT_RELOCATE(root_addr, old_value, new_value, methodTable) do { } WHILE_0
+#define STRESS_LOG_GC_START(gcCount, Gen, collectClasses)               do { } WHILE_0
+#define STRESS_LOG_GC_END(gcCount, Gen, collectClasses)                 do { } WHILE_0
+#define STRESS_LOG_OOM_STACK(size)          do { } WHILE_0
+#define STRESS_LOG_GC_STACK                 do { } WHILE_0
+#define STRESS_LOG_RESERVE_MEM(numChunks)   do { } WHILE_0
+#endif // !STRESS_LOG || DACCESS_COMPILE
+#endif // !__GCENV_BASE_INCLUDED__
+
+#endif // StressLog_h 
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/type_traits.hpp b/src/coreclr/src/nativeaot/Runtime/inc/type_traits.hpp
new file mode 100644
index 0000000000000..45bdf8392abd3
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/type_traits.hpp
@@ -0,0 +1,311 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+// type_traits.hpp
+//
+// Type trait metaprogramming utilities.
+//
+
+#ifndef __TYPE_TRAITS_HPP__
+#define __TYPE_TRAITS_HPP__
+
+#include "CommonTypes.h"
+
+namespace type_traits
+{
+
+namespace imp
+{
+
+struct true_type { static const bool value = true; };
+struct false_type { static const bool value = false; };
+
+////////////////////////////////////////////////////////////////////////////////
+// Helper types Small and Big - guarantee that sizeof(Small) < sizeof(Big)
+//
+
+template <class T, class U>
+struct conversion_helper
+{
+    typedef char Small;
+    struct Big { char dummy[2]; };
+    static Big   Test(...);
+    static Small Test(U);
+    static T MakeT();
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// class template conversion
+// Figures out the conversion relationships between two types
+// Invocations (T and U are types):
+// a) conversion<T, U>::exists
+// returns (at compile time) true if there is an implicit conversion from T
+// to U (example: Derived to Base)
+// b) conversion<T, U>::exists2Way
+// returns (at compile time) true if there are both conversions from T
+// to U and from U to T (example: int to char and back)
+// c) conversion<T, U>::sameType
+// returns (at compile time) true if T and U represent the same type
+//
+// NOTE: might not work if T and U are in a private inheritance hierarchy.
+//
+
+template <class T, class U>
+struct conversion
+{
+    typedef imp::conversion_helper<T, U> H;
+    static const bool exists = sizeof(typename H::Small) == sizeof((H::Test(H::MakeT())));
+    static const bool exists2Way = exists && conversion<U, T>::exists;
+    static const bool sameType = false;
+};
+
+template <class T>
+struct conversion<T, T>    
+{
+    static const bool exists = true;
+    static const bool exists2Way = true;
+    static const bool sameType = true;
+};
+
+template <class T>
+struct conversion<void, T>    
+{
+    static const bool exists = false;
+    static const bool exists2Way = false;
+    static const bool sameType = false;
+};
+
+template <class T>
+struct conversion<T, void>    
+{
+    static const bool exists = false;
+    static const bool exists2Way = false;
+    static const bool sameType = false;
+};
+
+template <>
+struct conversion<void, void>    
+{
+    static const bool exists = true;
+    static const bool exists2Way = true;
+    static const bool sameType = true;
+};
+
+template <bool>
+struct is_base_of_helper;
+
+template <>
+struct is_base_of_helper<true> : public true_type {} ;
+
+template <>
+struct is_base_of_helper<false> : public false_type {} ;
+
+}// imp
+
+////////////////////////////////////////////////////////////////////////////////
+// is_base_of::value is typedefed to be true if TDerived derives from TBase
+// and false otherwise.
+//
+//
+// NOTE: use TR1 type_traits::is_base_of when available.
+//
+#ifdef _MSC_VER
+
+template <typename TBase, typename TDerived>
+struct is_base_of : public imp::is_base_of_helper<__is_base_of( TBase, TDerived)> {};
+
+#else
+
+// Note that we need to compare pointer types here, since conversion of types by-value
+// just tells us whether or not an implicit conversion constructor exists. We handle
+// type parameters that are already pointers specially; see below.
+template <typename TBase, typename TDerived>
+struct is_base_of : public imp::is_base_of_helper<imp::conversion<TDerived *, TBase *>::exists> {};
+
+// Specialization to handle type parameters that are already pointers.
+template <typename TBase, typename TDerived>
+struct is_base_of<TBase *, TDerived *> : public imp::is_base_of_helper<imp::conversion<TDerived *, TBase *>::exists> {};
+
+// Specialization to handle invalid mixing of pointer types.
+template <typename TBase, typename TDerived>
+struct is_base_of<TBase *, TDerived> : public imp::false_type {};
+
+// Specialization to handle invalid mixing of pointer types.
+template <typename TBase, typename TDerived>
+struct is_base_of<TBase, TDerived *> : public imp::false_type {};
+
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Remove const qualifications, if any. Access using remove_const::type
+//
+template <typename T> struct remove_const { typedef T type; };
+template <typename T> struct remove_const<T const> { typedef T type; };
+
+////////////////////////////////////////////////////////////////////////////////
+// is_signed::value is true if T is a signed integral type, false otherwise.
+//
+template <typename T>
+struct is_signed { static const bool value = (static_cast<T>(-1) < 0); };
+
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// These are related to type traits, but they are more like asserts of type
+// traits in that the result is that either the compiler does or does not
+// produce an error.
+//
+namespace type_constraints
+{
+
+////////////////////////////////////////////////////////////////////////////////
+// derived_from will produce a compiler error if TDerived does not
+// derive from TBase.
+//
+// NOTE: use TR1 type_traits::is_base_of when available.
+//
+
+template<class TBase, class TDerived> struct is_base_of
+{
+    is_base_of()
+    {
+        static_assert((type_traits::is_base_of<TBase, TDerived>::value),
+                      "is_base_of() constraint violation: TDerived does not derive from TBase");
+    }
+};
+
+}; // namespace type_constraints
+
+namespace rh { namespace std
+{
+    // Import some select components of the STL
+
+    // TEMPLATE FUNCTION for_each
+    template<class _InIt, class _Fn1>
+    inline
+    _Fn1 for_each(_InIt _First, _InIt _Last, _Fn1 _Func)
+    {   // perform function for each element
+        for (; _First != _Last; ++_First)
+            _Func(*_First);
+        return (_Func);
+    }
+
+    template<class _InIt, class _Ty>
+    inline
+    _InIt find(_InIt _First, _InIt _Last, const _Ty& _Val)
+    {   // find first matching _Val
+        for (; _First != _Last; ++_First)
+            if (*_First == _Val)
+                break;
+        return (_First);
+    }
+
+    template<class _InIt, class _Pr>
+    inline
+    _InIt find_if(_InIt _First, _InIt _Last, _Pr _Pred)
+    {   // find first satisfying _Pred
+        for (; _First != _Last; ++_First)
+            if (_Pred(*_First))
+                break;
+        return (_First);
+    }
+
+    template<class _InIt, class _Ty>
+    inline
+    bool exists(_InIt _First, _InIt _Last, const _Ty& _Val)
+    {
+        return find(_First, _Last, _Val) != _Last;
+    }
+
+    template<class _InIt, class _Pr>
+    inline
+    bool exists_if(_InIt _First, _InIt _Last, _Pr _Pred)
+    {
+        return find_if(_First, _Last, _Pred) != _Last;
+    }
+
+    template<class _InIt, class _Ty>
+    inline
+    UIntNative count(_InIt _First, _InIt _Last, const _Ty& _Val)
+    {
+        UIntNative _Ret = 0;
+        for (; _First != _Last; _First++)
+            if (*_First == _Val)
+                ++_Ret;
+        return _Ret;
+    }
+
+    template<class _InIt, class _Pr>
+    inline
+    UIntNative count_if(_InIt _First, _InIt _Last, _Pr _Pred)
+    {
+        UIntNative _Ret = 0;
+        for (; _First != _Last; _First++)
+            if (_Pred(*_First))
+                ++_Ret;
+        return _Ret;
+    }
+
+    // Forward declaration, each collection requires specialization
+    template<class _FwdIt, class _Ty>
+    inline
+    _FwdIt remove(_FwdIt _First, _FwdIt _Last, const _Ty& _Val);
+} // namespace std
+} // namespace rh
+
+#if 0
+
+// -----------------------------------------------------------------
+// Holding place for unused-but-possibly-useful-in-the-future code.
+
+// -------------------------------------------------
+// This belongs in type_traits.hpp
+
+//
+// is_pointer::value is true if the type is a pointer, false otherwise
+//
+template <typename T> struct is_pointer : public false_type {};
+template <typename T> struct is_pointer<T *> : public true_type {};
+
+//
+// Remove pointer from type, if it has one. Use remove_pointer::type
+// Further specialized in daccess.h
+//
+template <typename T> struct remove_pointer { typedef T type; };
+template <typename T> struct remove_pointer<T *> { typedef T type; };
+
+// -------------------------------------------------
+// This belongs in daccess.h
+
+namespace type_traits
+{
+
+//
+// is_pointer::value is true if the type is a pointer, false otherwise
+// specialized from type_traits.hpp
+//
+template <typename T> struct is_pointer<typename __DPtr<T> > : public type_traits::true_type {};
+
+//
+// remove_pointer::type is T with one less pointer qualification, if it had one.
+// specialized from type_traits.hpp
+//
+template <typename T> struct remove_pointer<typename __DPtr<T> > { typedef T type; };
+
+} // type_traits
+
+namespace dac
+{
+
+//
+// is_dptr::value is true if T is a __DPtr, false otherwise.
+// This is a partial specialization case for the positive case.
+//
+//template <typename T> struct is_dptr<typename __DPtr<T> > : public type_traits::true_type {};
+
+}
+
+#endif
+
+#endif
+
diff --git a/src/coreclr/src/nativeaot/Runtime/inc/varint.h b/src/coreclr/src/nativeaot/Runtime/inc/varint.h
new file mode 100644
index 0000000000000..06e9d65d32db3
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/inc/varint.h
@@ -0,0 +1,144 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+class VarInt
+{
+public:
+    static UInt32 ReadUnsigned(PTR_UInt8 & pbEncoding)
+    {
+        UIntNative lengthBits = *pbEncoding & 0x0F;
+        size_t  negLength = s_negLengthTab[lengthBits];
+        UIntNative shift = s_shiftTab[lengthBits];
+        UInt32 result = *(PTR_UInt32)(pbEncoding - negLength - 4);
+
+        result >>= shift;
+        pbEncoding -= negLength;
+
+        return result;
+    }
+
+    //
+    // WARNING: This method returns the negative of the length of the value that it just skipped!
+    //
+    // This was helpful in the GC info scan loop because it allowed us to always skip past unsigned values in 
+    // the  body of the loop.  At the end of loop, we use this negative sign to distinguish between two cases
+    // and that allows us to decode the unsigned value that we need outside of the loop.  Note that we encode
+    // the negatives in the s_negLengthTable to avoid any additional operations in the body of the GC scan 
+    // loop.
+    // 
+    static IntNative SkipUnsigned(PTR_UInt8 & pbEncoding)
+    {
+        UIntNative lengthBits = *pbEncoding & 0x0F;
+        size_t negLength = s_negLengthTab[lengthBits];
+        pbEncoding -= negLength;
+        return negLength;
+    }
+
+    static UIntNative WriteUnsigned(PTR_UInt8 pbDest, UInt32 value)
+    {
+        if (pbDest == NULL)
+        {
+            if (value < 128)
+                return 1;
+
+            if (value < 128*128)
+                return 2;
+
+            if (value < 128*128*128)
+                return 3;
+
+            if (value < 128*128*128*128)
+                return 4;
+
+            return 5;
+        }
+
+        if (value < 128)
+        {
+            *pbDest++ = (UInt8)(value*2 + 0);
+            return 1;
+        }
+
+        if (value < 128*128)
+        {
+            *pbDest++ = (UInt8)(value*4 + 1);
+            *pbDest++ = (UInt8)(value >> 6);
+            return 2;
+        }
+
+        if (value < 128*128*128)
+        {
+            *pbDest++ = (UInt8)(value*8 + 3);
+            *pbDest++ = (UInt8)(value >> 5);
+            *pbDest++ = (UInt8)(value >> 13);
+            return 3;
+        }
+
+        if (value < 128*128*128*128)
+        {
+            *pbDest++ = (UInt8)(value*16 + 7);
+            *pbDest++ = (UInt8)(value >> 4);
+            *pbDest++ = (UInt8)(value >> 12);
+            *pbDest++ = (UInt8)(value >> 20);
+            return 4;
+        }
+
+        *pbDest++ = 15;
+        *pbDest++ = (UInt8)value;
+        *pbDest++ = (UInt8)(value >> 8);
+        *pbDest++ = (UInt8)(value >> 16);
+        *pbDest++ = (UInt8)(value >> 24);
+        return 5;
+    }
+
+private:
+    static Int8 s_negLengthTab[16];
+    static UInt8 s_shiftTab[16];
+};
+
+__declspec(selectany)
+Int8 VarInt::s_negLengthTab[16] = 
+{
+    -1,    // 0
+    -2,    // 1
+    -1,    // 2
+    -3,    // 3
+
+    -1,    // 4
+    -2,    // 5
+    -1,    // 6
+    -4,    // 7
+
+    -1,    // 8
+    -2,    // 9
+    -1,    // 10
+    -3,    // 11
+
+    -1,    // 12
+    -2,    // 13
+    -1,    // 14
+    -5,    // 15
+};
+
+__declspec(selectany)
+UInt8 VarInt::s_shiftTab[16] =
+{
+    32-7*1,    // 0
+    32-7*2,    // 1
+    32-7*1,    // 2
+    32-7*3,    // 3
+
+    32-7*1,    // 4
+    32-7*2,    // 5
+    32-7*1,    // 6
+    32-7*4,    // 7
+
+    32-7*1,    // 8
+    32-7*2,    // 9
+    32-7*1,    // 10
+    32-7*3,    // 11
+
+    32-7*1,    // 12
+    32-7*2,    // 13
+    32-7*1,    // 14
+    0,         // 15
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/loglf.h b/src/coreclr/src/nativeaot/Runtime/loglf.h
new file mode 100644
index 0000000000000..75c5d126a7873
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/loglf.h
@@ -0,0 +1,17 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// The code in sos.DumpStressLog depends on the facility codes 
+// being bit flags sorted in increasing order.
+// See code:EEStartup#TableOfContents for EE overview
+DEFINE_LOG_FACILITY(LF_GC           ,0x00000001)
+DEFINE_LOG_FACILITY(LF_GCINFO       ,0x00000002)
+DEFINE_LOG_FACILITY(LF_GCALLOC      ,0x00000004)
+DEFINE_LOG_FACILITY(LF_GCROOTS      ,0x00000008)
+DEFINE_LOG_FACILITY(LF_STARTUP      ,0x00000010)  // Log startup and shutdown failures
+DEFINE_LOG_FACILITY(LF_STACKWALK    ,0x00000020)
+//                  LF_ALWAYS        0x80000000   // make certain you don't try to use this bit for a real facility
+//                  LF_ALL           0xFFFFFFFF
+//
+#undef DEFINE_LOG_FACILITY
+
diff --git a/src/coreclr/src/nativeaot/Runtime/portable.cpp b/src/coreclr/src/nativeaot/Runtime/portable.cpp
new file mode 100644
index 0000000000000..4372925d0ddf4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/portable.cpp
@@ -0,0 +1,440 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "CommonMacros.inl"
+#include "volatile.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "shash.h"
+#include "RWLock.h"
+#include "varint.h"
+#include "holder.h"
+#include "rhbinder.h"
+#include "Crst.h"
+#include "RuntimeInstance.h"
+#include "event.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+
+#include "eetype.h"
+#include "TypeManager.h"
+#include "eetype.inl"
+#include "ObjectLayout.h"
+
+#include "GCMemoryHelpers.h"
+#include "GCMemoryHelpers.inl"
+
+#if defined(USE_PORTABLE_HELPERS)
+
+EXTERN_C REDHAWK_API void* REDHAWK_CALLCONV RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame);
+EXTERN_C REDHAWK_API void* REDHAWK_CALLCONV RhpPublishObject(void* pObject, UIntNative cbSize);
+
+struct gc_alloc_context
+{
+    UInt8*         alloc_ptr;
+    UInt8*         alloc_limit;
+    __int64        alloc_bytes; //Number of bytes allocated on SOH by this context
+    __int64        alloc_bytes_loh; //Number of bytes allocated on LOH by this context
+    void*          gc_reserved_1;
+    void*          gc_reserved_2;
+    int            alloc_count;
+};
+
+//
+// Allocations
+//
+COOP_PINVOKE_HELPER(Object *, RhpNewFast, (EEType* pEEType))
+{
+    ASSERT(!pEEType->RequiresAlign8());
+    ASSERT(!pEEType->HasFinalizer());
+
+    Thread * pCurThread = ThreadStore::GetCurrentThread();
+    gc_alloc_context * acontext = pCurThread->GetAllocContext();
+    Object * pObject;
+
+    size_t size = pEEType->get_BaseSize();
+
+    UInt8* result = acontext->alloc_ptr;
+    UInt8* advance = result + size;
+    if (advance <= acontext->alloc_limit)
+    {
+        acontext->alloc_ptr = advance;
+        pObject = (Object *)result;
+        pObject->set_EEType(pEEType);
+        return pObject;
+    }
+
+    pObject = (Object *)RhpGcAlloc(pEEType, 0, size, NULL);
+    if (pObject == nullptr)
+    {
+        ASSERT_UNCONDITIONALLY("NYI");  // TODO: Throw OOM
+    }
+    pObject->set_EEType(pEEType);
+
+    if (size >= RH_LARGE_OBJECT_SIZE)
+        RhpPublishObject(pObject, size);
+
+    return pObject;
+}
+
+#define GC_ALLOC_FINALIZE 0x1 // TODO: Defined in gc.h
+
+COOP_PINVOKE_HELPER(Object *, RhpNewFinalizable, (EEType* pEEType))
+{
+    ASSERT(!pEEType->RequiresAlign8());
+    ASSERT(pEEType->HasFinalizer());
+
+    size_t size = pEEType->get_BaseSize();
+
+    Object * pObject = (Object *)RhpGcAlloc(pEEType, GC_ALLOC_FINALIZE, size, NULL);
+    if (pObject == nullptr)
+    {
+        ASSERT_UNCONDITIONALLY("NYI");  // TODO: Throw OOM
+    }
+    pObject->set_EEType(pEEType);
+
+    if (size >= RH_LARGE_OBJECT_SIZE)
+        RhpPublishObject(pObject, size);
+
+    return pObject;
+}
+
+COOP_PINVOKE_HELPER(Array *, RhpNewArray, (EEType * pArrayEEType, int numElements))
+{
+    ASSERT_MSG(!pArrayEEType->RequiresAlign8(), "NYI");
+
+    Thread * pCurThread = ThreadStore::GetCurrentThread();
+    gc_alloc_context * acontext = pCurThread->GetAllocContext();
+    Array * pObject;
+
+    if (numElements < 0)
+    {
+        ASSERT_UNCONDITIONALLY("NYI");  // TODO: Throw overflow
+    }
+
+    size_t size;
+#ifndef HOST_64BIT
+    // if the element count is <= 0x10000, no overflow is possible because the component size is
+    // <= 0xffff, and thus the product is <= 0xffff0000, and the base size is only ~12 bytes
+    if (numElements > 0x10000)
+    {
+        // Perform the size computation using 64-bit integeres to detect overflow
+        uint64_t size64 = (uint64_t)pArrayEEType->get_BaseSize() + ((uint64_t)numElements * (uint64_t)pArrayEEType->get_ComponentSize());
+        size64 = (size64 + (sizeof(UIntNative)-1)) & ~(sizeof(UIntNative)-1);
+
+        size = (size_t)size64;
+        if (size != size64)
+        {
+            ASSERT_UNCONDITIONALLY("NYI");  // TODO: Throw overflow
+        }
+    }
+    else
+#endif // !HOST_64BIT
+    {
+        size = (size_t)pArrayEEType->get_BaseSize() + ((size_t)numElements * (size_t)pArrayEEType->get_ComponentSize());
+        size = ALIGN_UP(size, sizeof(UIntNative));
+    }
+
+    UInt8* result = acontext->alloc_ptr;
+    UInt8* advance = result + size;
+    if (advance <= acontext->alloc_limit)
+    {
+        acontext->alloc_ptr = advance;
+        pObject = (Array *)result;
+        pObject->set_EEType(pArrayEEType);
+        pObject->InitArrayLength((UInt32)numElements);
+        return pObject;
+    }
+
+    pObject = (Array *)RhpGcAlloc(pArrayEEType, 0, size, NULL);
+    if (pObject == nullptr)
+    {
+        ASSERT_UNCONDITIONALLY("NYI");  // TODO: Throw OOM
+    }
+    pObject->set_EEType(pArrayEEType);
+    pObject->InitArrayLength((UInt32)numElements);
+
+    if (size >= RH_LARGE_OBJECT_SIZE)
+        RhpPublishObject(pObject, size);
+
+    return pObject;
+}
+
+COOP_PINVOKE_HELPER(String *, RhNewString, (EEType * pArrayEEType, int numElements))
+{
+    // TODO: Implement. We tail call to RhpNewArray for now since there's a bunch of TODOs in the places
+    // that matter anyway.
+    return (String*)RhpNewArray(pArrayEEType, numElements);
+}
+
+#endif
+#if defined(USE_PORTABLE_HELPERS)
+
+#ifdef HOST_ARM
+COOP_PINVOKE_HELPER(Object *, RhpNewFinalizableAlign8, (EEType* pEEType))
+{
+    Object * pObject = nullptr;
+    /* TODO */ ASSERT_UNCONDITIONALLY("NYI");
+    return pObject;
+}
+
+COOP_PINVOKE_HELPER(Object *, RhpNewFastMisalign, (EEType* pEEType))
+{
+    Object * pObject = nullptr;
+    /* TODO */ ASSERT_UNCONDITIONALLY("NYI");
+    return pObject;
+}
+
+COOP_PINVOKE_HELPER(Object *, RhpNewFastAlign8, (EEType* pEEType))
+{
+    Object * pObject = nullptr;
+    /* TODO */ ASSERT_UNCONDITIONALLY("NYI");
+    return pObject;
+}
+
+COOP_PINVOKE_HELPER(Array *, RhpNewArrayAlign8, (EEType * pArrayEEType, int numElements))
+{
+    Array * pObject = nullptr;
+    /* TODO */ ASSERT_UNCONDITIONALLY("NYI");
+    return pObject;
+}
+#endif
+
+COOP_PINVOKE_HELPER(void, RhpInitialDynamicInterfaceDispatch, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+
+COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch1, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+
+COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch2, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+
+COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch4, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+
+COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch8, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+
+COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch16, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+
+COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch32, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+
+COOP_PINVOKE_HELPER(void, RhpInterfaceDispatch64, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+
+COOP_PINVOKE_HELPER(void, RhpVTableOffsetDispatch, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+
+// @TODO Implement UniversalTransition
+EXTERN_C void * ReturnFromUniversalTransition;
+void * ReturnFromUniversalTransition;
+
+// @TODO Implement UniversalTransition_DebugStepTailCall
+EXTERN_C void * ReturnFromUniversalTransition_DebugStepTailCall;
+void * ReturnFromUniversalTransition_DebugStepTailCall;
+
+#endif // USE_PORTABLE_HELPERS
+
+// @TODO Implement CallDescrThunk
+EXTERN_C void * ReturnFromCallDescrThunk;
+#ifdef USE_PORTABLE_HELPERS
+void * ReturnFromCallDescrThunk;
+#endif
+
+#if defined(USE_PORTABLE_HELPERS) || defined(TARGET_UNIX)
+#if !defined (HOST_ARM64)
+// 
+// Return address hijacking
+//
+COOP_PINVOKE_HELPER(void, RhpGcProbeHijackScalar, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+COOP_PINVOKE_HELPER(void, RhpGcProbeHijackObject, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+COOP_PINVOKE_HELPER(void, RhpGcProbeHijackByref, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+COOP_PINVOKE_HELPER(void, RhpGcStressHijackScalar, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+COOP_PINVOKE_HELPER(void, RhpGcStressHijackObject, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+COOP_PINVOKE_HELPER(void, RhpGcStressHijackByref, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+#endif
+#endif // defined(USE_PORTABLE_HELPERS) || defined(TARGET_UNIX)
+
+#if defined(USE_PORTABLE_HELPERS)
+
+#if !defined (HOST_ARM64)
+COOP_PINVOKE_HELPER(void, RhpAssignRef, (Object ** dst, Object * ref))
+{
+    // @TODO: USE_PORTABLE_HELPERS - Null check
+    *dst = ref;
+    InlineWriteBarrier(dst, ref);
+}
+
+COOP_PINVOKE_HELPER(void, RhpCheckedAssignRef, (Object ** dst, Object * ref))
+{
+    // @TODO: USE_PORTABLE_HELPERS - Null check
+    *dst = ref;
+    InlineCheckedWriteBarrier(dst, ref);
+}
+#endif
+
+COOP_PINVOKE_HELPER(Object *, RhpCheckedLockCmpXchg, (Object ** location, Object * value, Object * comparand))
+{
+    // @TODO: USE_PORTABLE_HELPERS - Null check
+    Object * ret = (Object *)PalInterlockedCompareExchangePointer((void * volatile *)location, value, comparand);
+    InlineCheckedWriteBarrier(location, value);
+    return ret;
+}
+
+COOP_PINVOKE_HELPER(Object *, RhpCheckedXchg, (Object ** location, Object * value))
+{
+    // @TODO: USE_PORTABLE_HELPERS - Null check
+    Object * ret = (Object *)PalInterlockedExchangePointer((void * volatile *)location, value);
+    InlineCheckedWriteBarrier(location, value);
+    return ret;
+}
+
+COOP_PINVOKE_HELPER(Int32, RhpLockCmpXchg32, (Int32 * location, Int32 value, Int32 comparand))
+{
+    // @TODO: USE_PORTABLE_HELPERS - Null check
+    return PalInterlockedCompareExchange(location, value, comparand);
+}
+
+COOP_PINVOKE_HELPER(Int64, RhpLockCmpXchg64, (Int64 * location, Int64 value, Int64 comparand))
+{
+    // @TODO: USE_PORTABLE_HELPERS - Null check
+    return PalInterlockedCompareExchange64(location, value, comparand);
+}
+
+#endif // USE_PORTABLE_HELPERS
+
+#if !defined(HOST_ARM64)
+COOP_PINVOKE_HELPER(void, RhpMemoryBarrier, ())
+{
+    PalMemoryBarrier();
+}
+#endif
+
+#if defined(USE_PORTABLE_HELPERS)
+EXTERN_C REDHAWK_API void* __cdecl RhAllocateThunksMapping()
+{
+    return NULL;
+}
+
+COOP_PINVOKE_HELPER(void *, RhpGetThunksBase, ())
+{
+    return NULL;
+}
+
+COOP_PINVOKE_HELPER(int, RhpGetNumThunkBlocksPerMapping, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+    return 0;
+}
+
+COOP_PINVOKE_HELPER(int, RhpGetNumThunksPerBlock, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+    return 0;
+}
+
+COOP_PINVOKE_HELPER(int, RhpGetThunkSize, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+    return 0;
+}
+
+COOP_PINVOKE_HELPER(void*, RhpGetThunkDataBlockAddress, (void* pThunkStubAddress))
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+    return NULL;
+}
+
+COOP_PINVOKE_HELPER(void*, RhpGetThunkStubsBlockAddress, (void* pThunkDataAddress))
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+    return NULL;
+}
+
+COOP_PINVOKE_HELPER(int, RhpGetThunkBlockSize, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+    return NULL;
+}
+
+COOP_PINVOKE_HELPER(void, RhCallDescrWorker, (void * callDescr))
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+
+#ifdef CALLDESCR_FPARGREGSARERETURNREGS
+COOP_PINVOKE_HELPER(void, CallingConventionConverter_GetStubs, (UIntNative* pReturnVoidStub, UIntNative* pReturnIntegerStub, UIntNative* pCommonStub))
+#else
+COOP_PINVOKE_HELPER(void, CallingConventionConverter_GetStubs, (UIntNative* pReturnVoidStub, UIntNative* pReturnIntegerStub, UIntNative* pCommonStub, UIntNative* pReturnFloatingPointReturn4Thunk, UIntNative* pReturnFloatingPointReturn8Thunk))
+#endif
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+}
+
+COOP_PINVOKE_HELPER(void *, RhGetCommonStubAddress, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+    return NULL;
+}
+
+COOP_PINVOKE_HELPER(void *, RhGetCurrentThunkContext, ())
+{
+    ASSERT_UNCONDITIONALLY("NYI");
+    return NULL;
+}
+
+#endif
+
+COOP_PINVOKE_HELPER(void, RhpGcPoll, ())
+{
+    // TODO: implement
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/profheapwalkhelper.cpp b/src/coreclr/src/nativeaot/Runtime/profheapwalkhelper.cpp
new file mode 100644
index 0000000000000..fc1288fe181c2
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/profheapwalkhelper.cpp
@@ -0,0 +1,210 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// 
+// On desktop CLR, GC ETW event firing borrows heavily from code in the profiling API,
+// as the GC already called hooks in the profapi to notify it of roots & references. 
+// This file shims up that profapi code the GC expects, though only for the purpose of
+// firing ETW events (not for getting a full profapi up on redhawk).
+// 
+
+#include "common.h"
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+#include "gcenv.h"
+#include "gcheaputilities.h"
+#include "eventtrace.h"
+#include "profheapwalkhelper.h"
+
+//---------------------------------------------------------------------------------------
+//
+// Callback of type promote_func called by GC while scanning roots (in GCProfileWalkHeap,
+// called after the collection).  Wrapper around EEToProfInterfaceImpl::RootReference2,
+// which does the real work.
+//
+// Arguments:
+//      pObj - Object reference encountered
+///     ppRoot - Address that references ppObject (can be interior pointer)
+//      pSC - ProfilingScanContext * containing the root kind and GCReferencesData used
+//            by RootReference2 
+//      dwFlags - Properties of the root as GC_CALL* constants (this function converts
+//                to COR_PRF_GC_ROOT_FLAGS.
+//
+
+void ScanRootsHelper(Object* pObj, Object** ppRoot, ScanContext * pSC, DWORD dwFlags)
+{
+    ProfilingScanContext *pPSC = (ProfilingScanContext *)pSC;
+
+    DWORD dwEtwRootFlags = 0;
+    if (dwFlags & GC_CALL_INTERIOR)
+        dwEtwRootFlags |= kEtwGCRootFlagsInterior;
+    if (dwFlags & GC_CALL_PINNED)
+        dwEtwRootFlags |= kEtwGCRootFlagsPinning;
+
+    // Notify ETW of the root
+
+    if (ETW::GCLog::ShouldWalkHeapRootsForEtw())
+    {
+        ETW::GCLog::RootReference(
+            ppRoot,         // root address
+            pObj,           // object being rooted
+            NULL,           // pSecondaryNodeForDependentHandle is NULL, cuz this isn't a dependent handle
+            FALSE,          // is dependent handle
+            pPSC,
+            dwFlags,        // dwGCFlags
+            dwEtwRootFlags);
+    }
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Callback of type walk_fn used by GCHeap::WalkObject.  Keeps a count of each
+// object reference found.
+//
+// Arguments:
+//      pBO - Object reference encountered in walk
+//      context - running count of object references encountered
+//
+// Return Value:
+//      Always returns TRUE to object walker so it walks the entire object
+//
+
+bool CountContainedObjectRef(Object * pBO, void * context)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREFERENCED_PARAMETER(pBO);
+    // Increase the count
+    (*((size_t *)context))++;
+
+    return TRUE;
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Callback of type walk_fn used by GCHeap::WalkObject.  Stores each object reference
+// encountered into an array.
+//
+// Arguments:
+//      pBO - Object reference encountered in walk
+//      context - Array of locations within the walked object that point to other
+//                objects.  On entry, (*context) points to the next unfilled array
+//                entry.  On exit, that location is filled, and (*context) is incremented
+//                to point to the next entry.
+//
+// Return Value:
+//      Always returns TRUE to object walker so it walks the entire object
+//
+
+bool SaveContainedObjectRef(Object * pBO, void * context)
+{
+    LIMITED_METHOD_CONTRACT;
+    // Assign the value
+    **((Object ***)context) = pBO;
+
+    // Now increment the array pointer
+    //
+    // Note that HeapWalkHelper has already walked the references once to count them up,
+    // and then allocated an array big enough to hold those references.  First time this
+    // callback is called for a given object, (*context) points to the first entry in the
+    // array.  So "blindly" incrementing (*context) here and using it next time around
+    // for the next reference, over and over again, should be safe.
+    (*((Object ***)context))++;
+
+    return TRUE;
+}
+
+//---------------------------------------------------------------------------------------
+//
+// Callback of type walk_fn used by the GC when walking the heap, to help profapi
+// track objects.  This guy orchestrates the use of the above callbacks which dig
+// into object references contained each object encountered by this callback.
+//
+// Arguments:
+//      pBO - Object reference encountered on the heap
+//
+// Return Value:
+//      BOOL indicating whether the heap walk should continue.
+//      TRUE=continue
+//      FALSE=stop
+//
+
+bool HeapWalkHelper(Object * pBO, void * pvContext)
+{
+    OBJECTREF *   arrObjRef      = NULL;
+    size_t        cNumRefs       = 0;
+    bool          bOnStack       = false;
+    //MethodTable * pMT            = pBO->GetMethodTable();
+
+    ProfilerWalkHeapContext * pProfilerWalkHeapContext = (ProfilerWalkHeapContext *) pvContext;
+
+    //if (pMT->ContainsPointersOrCollectible())
+    {
+        // First round through calculates the number of object refs for this class
+        GCHeapUtilities::GetGCHeap()->DiagWalkObject(pBO, &CountContainedObjectRef, (void *)&cNumRefs);
+
+        if (cNumRefs > 0)
+        {
+            // Create an array to contain all of the refs for this object
+            bOnStack = cNumRefs <= 32 ? true : false;
+
+            if (bOnStack)
+            {
+                // It's small enough, so just allocate on the stack
+                arrObjRef = (OBJECTREF *)_alloca(cNumRefs * sizeof(OBJECTREF));
+            }
+            else
+            {
+                // Otherwise, allocate from the heap
+                arrObjRef = new (nothrow) OBJECTREF[cNumRefs];
+
+                if (!arrObjRef)
+                {
+                    return FALSE;
+                }
+            }
+
+            // Second round saves off all of the ref values
+            OBJECTREF * pCurObjRef = arrObjRef;
+            GCHeapUtilities::GetGCHeap()->DiagWalkObject(pBO, &SaveContainedObjectRef, (void *)&pCurObjRef);
+        }
+    }
+
+    HRESULT hr = E_FAIL;
+
+#ifdef FEATURE_ETW
+    if (ETW::GCLog::ShouldWalkHeapObjectsForEtw())
+    {
+        ETW::GCLog::ObjectReference(
+            pProfilerWalkHeapContext,
+            pBO,
+            ULONGLONG(pBO->get_SafeEEType()),
+            cNumRefs,
+            (Object **) arrObjRef);
+    }
+#endif // FEATURE_ETW
+
+    // If the data was not allocated on the stack, need to clean it up.
+    if ((arrObjRef != NULL) && !bOnStack)
+    {
+        delete [] arrObjRef;
+    }
+
+    // Return TRUE iff we want to the heap walk to continue. The only way we'd abort the
+    // heap walk is if we're issuing profapi callbacks, and the profapi profiler
+    // intentionally returned a failed HR (as its request that we stop the walk). There's
+    // a potential conflict here. If a profapi profiler and an ETW profiler are both
+    // monitoring the heap dump, and the profapi profiler requests to abort the walk (but
+    // the ETW profiler may not want to abort the walk), then what do we do? The profapi
+    // profiler gets precedence. We don't want to accidentally send more callbacks to a
+    // profapi profiler that explicitly requested an abort. The ETW profiler will just
+    // have to deal. In theory, I could make the code more complex by remembering that a
+    // profapi profiler requested to abort the dump but an ETW profiler is still
+    // attached, and then intentionally inhibit the remainder of the profapi callbacks
+    // for this GC. But that's unnecessary complexity. In practice, it should be
+    // extremely rare that a profapi profiler is monitoring heap dumps AND an ETW
+    // profiler is also monitoring heap dumps.
+    return TRUE;
+}
+
+#endif // defined(FEATURE_EVENT_TRACE) || defined(GC_PROFILING)
diff --git a/src/coreclr/src/nativeaot/Runtime/profheapwalkhelper.h b/src/coreclr/src/nativeaot/Runtime/profheapwalkhelper.h
new file mode 100644
index 0000000000000..9c0da119aeabb
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/profheapwalkhelper.h
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef _GCHEAPWALKHELPER_H_
+#define _GCHEAPWALKHELPER_H_
+
+
+// These two functions are utilized to scan the heap if requested by ETW
+// or a profiler. The implementations of these two functions are in profheapwalkhelper.cpp.
+#if defined(FEATURE_EVENT_TRACE) || defined(GC_PROFILING)
+void ScanRootsHelper(Object* pObj, Object** ppRoot, ScanContext* pSC, DWORD dwFlags);
+bool HeapWalkHelper(Object* pBO, void* pvContext);
+#endif
+
+
+#endif // _GCHEAPWALKHELPER_H_
diff --git a/src/coreclr/src/nativeaot/Runtime/regdisplay.h b/src/coreclr/src/nativeaot/Runtime/regdisplay.h
new file mode 100644
index 0000000000000..b9ef9fa4bfcac
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/regdisplay.h
@@ -0,0 +1,162 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#if defined(TARGET_X86) || defined(TARGET_AMD64)
+
+struct REGDISPLAY 
+{
+    PTR_UIntNative pRax;
+    PTR_UIntNative pRcx;
+    PTR_UIntNative pRdx;
+    PTR_UIntNative pRbx;
+    //           pEsp;
+    PTR_UIntNative pRbp;
+    PTR_UIntNative pRsi;
+    PTR_UIntNative pRdi;
+#ifdef TARGET_AMD64
+    PTR_UIntNative pR8;
+    PTR_UIntNative pR9;
+    PTR_UIntNative pR10;
+    PTR_UIntNative pR11;
+    PTR_UIntNative pR12;
+    PTR_UIntNative pR13;
+    PTR_UIntNative pR14;
+    PTR_UIntNative pR15;
+#endif // TARGET_AMD64
+
+    UIntNative   SP;
+    PTR_PCODE    pIP;
+    PCODE        IP;
+
+#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
+    Fp128          Xmm[16-6]; // preserved xmm6..xmm15 regs for EH stackwalk
+                              // these need to be unwound during a stack walk
+                              // for EH, but not adjusted, so we only need
+                              // their values, not their addresses
+#endif // TARGET_AMD64 && !UNIX_AMD64_ABI
+
+    inline PCODE GetIP() { return IP; }
+    inline PTR_PCODE GetAddrOfIP() { return pIP; }
+    inline UIntNative GetSP() { return SP; }
+    inline UIntNative GetFP() { return *pRbp; }
+    inline UIntNative GetPP() { return *pRbx; }
+
+    inline void SetIP(PCODE IP) { this->IP = IP; }
+    inline void SetAddrOfIP(PTR_PCODE pIP) { this->pIP = pIP; }
+    inline void SetSP(UIntNative SP) { this->SP = SP; }
+};
+
+#elif defined(TARGET_ARM)
+
+struct REGDISPLAY
+{
+    PTR_UIntNative pR0;
+    PTR_UIntNative pR1;
+    PTR_UIntNative pR2;
+    PTR_UIntNative pR3;
+    PTR_UIntNative pR4;
+    PTR_UIntNative pR5;
+    PTR_UIntNative pR6;
+    PTR_UIntNative pR7;
+    PTR_UIntNative pR8;
+    PTR_UIntNative pR9;
+    PTR_UIntNative pR10;
+    PTR_UIntNative pR11;
+    PTR_UIntNative pR12;
+    PTR_UIntNative pLR;
+
+    UIntNative   SP;
+    PTR_PCODE    pIP;
+    PCODE        IP;
+
+    UInt64       D[16-8]; // preserved D registers D8..D15 (note that D16-D31 are not preserved according to the ABI spec)
+                          // these need to be unwound during a stack walk
+                          // for EH, but not adjusted, so we only need
+                          // their values, not their addresses
+
+    inline PCODE GetIP() { return IP; }
+    inline PTR_PCODE GetAddrOfIP() { return pIP; }
+    inline UIntNative GetSP() { return SP; }
+    inline UIntNative GetFP() { return *pR11; }
+    inline void SetIP(PCODE IP) { this->IP = IP; }
+    inline void SetAddrOfIP(PTR_PCODE pIP) { this->pIP = pIP; }
+    inline void SetSP(UIntNative SP) { this->SP = SP; }
+};
+
+#elif defined(TARGET_ARM64)
+
+struct REGDISPLAY 
+{
+    PTR_UIntNative pX0;
+    PTR_UIntNative pX1;
+    PTR_UIntNative pX2;
+    PTR_UIntNative pX3;
+    PTR_UIntNative pX4;
+    PTR_UIntNative pX5;
+    PTR_UIntNative pX6;
+    PTR_UIntNative pX7;
+    PTR_UIntNative pX8;
+    PTR_UIntNative pX9;
+    PTR_UIntNative pX10;
+    PTR_UIntNative pX11;
+    PTR_UIntNative pX12;
+    PTR_UIntNative pX13;
+    PTR_UIntNative pX14;
+    PTR_UIntNative pX15;
+    PTR_UIntNative pX16;
+    PTR_UIntNative pX17;
+    PTR_UIntNative pX18;
+    PTR_UIntNative pX19;
+    PTR_UIntNative pX20;
+    PTR_UIntNative pX21;
+    PTR_UIntNative pX22;
+    PTR_UIntNative pX23;
+    PTR_UIntNative pX24;
+    PTR_UIntNative pX25;
+    PTR_UIntNative pX26;
+    PTR_UIntNative pX27;
+    PTR_UIntNative pX28;
+    PTR_UIntNative pFP; // X29
+    PTR_UIntNative pLR; // X30
+
+    UIntNative   SP;
+    PTR_PCODE    pIP;
+    PCODE        IP;
+
+    UInt64       D[16-8]; // Only the bottom 64-bit value of the V registers V8..V15 needs to be preserved
+                          // (V0-V7 and V16-V31 are not preserved according to the ABI spec).
+                          // These need to be unwound during a stack walk
+                          // for EH, but not adjusted, so we only need
+                          // their values, not their addresses
+
+    inline PCODE GetIP() { return IP; }
+    inline PTR_PCODE GetAddrOfIP() { return pIP; }
+    inline UIntNative GetSP() { return SP; }
+    inline UIntNative GetFP() { return *pFP; }
+
+    inline void SetIP(PCODE IP) { this->IP = IP; }
+    inline void SetAddrOfIP(PTR_PCODE pIP) { this->pIP = pIP; }
+    inline void SetSP(UIntNative SP) { this->SP = SP; }
+};
+#elif defined(TARGET_WASM)
+
+struct REGDISPLAY
+{
+    // TODO: WebAssembly doesn't really have registers. What exactly do we need here?
+
+    UIntNative   SP;
+    PTR_PCODE    pIP;
+    PCODE        IP;
+
+    inline PCODE GetIP() { return NULL; }
+    inline PTR_PCODE GetAddrOfIP() { return NULL; }
+    inline UIntNative GetSP() { return 0; }
+    inline UIntNative GetFP() { return 0; }
+
+    inline void SetIP(PCODE IP) { }
+    inline void SetAddrOfIP(PTR_PCODE pIP) { }
+    inline void SetSP(UIntNative SP) { }
+};
+#endif // HOST_X86 || HOST_AMD64 || HOST_ARM || HOST_ARM64 || HOST_WASM
+
+typedef REGDISPLAY * PREGDISPLAY;
diff --git a/src/coreclr/src/nativeaot/Runtime/rhassert.cpp b/src/coreclr/src/nativeaot/Runtime/rhassert.cpp
new file mode 100644
index 0000000000000..7ac5c540fd16a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/rhassert.cpp
@@ -0,0 +1,110 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+
+
+#include "RhConfig.h"
+
+#ifdef _DEBUG
+
+#define MB_ABORTRETRYIGNORE         0x00000002L
+#define IDABORT             3
+#define IDRETRY             4
+#define IDIGNORE            5
+
+void Assert(const char * expr, const char * file, UInt32 line_num, const char * message)
+{
+#ifndef DACCESS_COMPILE
+#ifdef NO_UI_ASSERT
+    PalDebugBreak();
+#else
+    if (g_pRhConfig->GetBreakOnAssert())
+    {
+        printf(
+            "--------------------------------------------------\n"
+            "Debug Assertion Violation\n\n"
+            "%s%s%s"
+            "Expression: '%s'\n\n"
+            "File: %s, Line: %u\n"
+            "--------------------------------------------------\n",
+            message ? ("Message: ") : (""),
+            message ? (message) : (""),
+            message ? ("\n\n") : (""),
+            expr, file, line_num);
+
+        // Flush standard output before failing fast to make sure the assertion failure message
+        // is retained when tests are being run with redirected stdout.
+        fflush(stdout);
+
+        // If there's no debugger attached, we just FailFast
+        if (!PalIsDebuggerPresent())
+            PalRaiseFailFastException(NULL, NULL, FAIL_FAST_GENERATE_EXCEPTION_ADDRESS);
+
+        // If there is a debugger attached, we break and then allow continuation.
+        PalDebugBreak();
+        return;
+    }
+
+    char buffer[4096];
+
+    sprintf_s(buffer, COUNTOF(buffer),
+           "--------------------------------------------------\n"
+           "Debug Assertion Violation\n\n"
+           "%s%s%s"
+           "Expression: '%s'\n\n"
+           "File: %s, Line: %u\n"
+           "--------------------------------------------------\n"
+           "Abort: Exit Immediately\n"
+           "Retry: DebugBreak()\n"
+           "Ignore: Keep Going\n"
+           "--------------------------------------------------\n", 
+           message ? ("Message: ") : (""),
+           message ? (message) : (""),
+           message ? ("\n\n") : (""),
+           expr, file, line_num);
+
+    HANDLE hMod = PalLoadLibraryExW(L"user32.dll", NULL, 0);
+    Int32 (* pfn)(HANDLE, char *, const char *, UInt32) = 
+        (Int32 (*)(HANDLE, char *, const char *, UInt32))PalGetProcAddress(hMod, "MessageBoxA");
+
+    Int32 result = pfn(NULL, buffer, "Redhawk Assert", MB_ABORTRETRYIGNORE);
+
+    switch (result)
+    {
+    case IDABORT:
+        PalTerminateProcess(PalGetCurrentProcess(), 666);
+        break;
+    case IDRETRY:
+        PalDebugBreak();
+        break;
+    case IDIGNORE:
+        break;
+    }
+#endif
+#else
+    UNREFERENCED_PARAMETER(expr);
+    UNREFERENCED_PARAMETER(file);
+    UNREFERENCED_PARAMETER(line_num);
+    UNREFERENCED_PARAMETER(message);
+#endif //!DACCESS_COMPILE
+}
+
+extern "C" void NYI_Assert(const char *message, ...)
+{
+#if !defined(DACCESS_COMPILE)
+    va_list args;
+    va_start(args, message);
+    vprintf(message, args);
+    va_end(args);
+    ASSERT_UNCONDITIONALLY("NYI");
+#else
+    UNREFERENCED_PARAMETER(message);
+#endif
+}
+
+#endif // _DEBUG
diff --git a/src/coreclr/src/nativeaot/Runtime/rhassert.h b/src/coreclr/src/nativeaot/Runtime/rhassert.h
new file mode 100644
index 0000000000000..5da270dacb9fd
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/rhassert.h
@@ -0,0 +1,69 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#ifndef __RHASSERT_H__
+#define __RHASSERT_H__
+
+#ifdef _MSC_VER
+#define ASSUME(expr) __assume(expr)
+#else  // _MSC_VER
+#define ASSUME(expr) do { if (!(expr)) __builtin_unreachable(); } while (0) 
+#endif // _MSC_VER
+
+#if defined(_DEBUG) && !defined(DACCESS_COMPILE)
+
+#define ASSERT(expr) \
+    { \
+    if (!(expr)) { Assert(#expr, __FILE__, __LINE__, NULL); } \
+    } \
+
+#define ASSERT_MSG(expr, msg) \
+    { \
+    if (!(expr)) { Assert(#expr, __FILE__, __LINE__, msg); } \
+    } \
+
+#define VERIFY(expr) ASSERT((expr))
+
+#define ASSERT_UNCONDITIONALLY(message) \
+    Assert("ASSERT_UNCONDITIONALLY", __FILE__, __LINE__, message); \
+
+void Assert(const char * expr, const char * file, unsigned int line_num, const char * message);
+
+#else
+
+#define ASSERT(expr)
+
+#define ASSERT_MSG(expr, msg)
+
+#define VERIFY(expr) (expr)
+
+#define ASSERT_UNCONDITIONALLY(message)
+
+#endif
+
+#ifndef _ASSERTE
+#define _ASSERTE(_expr) ASSERT(_expr)
+#endif
+
+#if defined(_DEBUG)
+
+void NYI_ASSERT();
+
+#endif
+
+#define PORTABILITY_ASSERT(message) \
+    ASSERT_UNCONDITIONALLY(message); \
+    ASSUME(0); \
+
+#define UNREACHABLE() \
+    ASSERT_UNCONDITIONALLY("UNREACHABLE"); \
+    ASSUME(0); \
+
+#define UNREACHABLE_MSG(message) \
+    ASSERT_UNCONDITIONALLY(message); \
+    ASSUME(0);  \
+
+#define FAIL_FAST_GENERATE_EXCEPTION_ADDRESS 0x1
+
+#define RhFailFast() PalRaiseFailFastException(NULL, NULL, FAIL_FAST_GENERATE_EXCEPTION_ADDRESS)
+
+#endif // __RHASSERT_H__
diff --git a/src/coreclr/src/nativeaot/Runtime/rhcommon.h b/src/coreclr/src/nativeaot/Runtime/rhcommon.h
new file mode 100644
index 0000000000000..5cd0b1a6ca32a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/rhcommon.h
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This file is here because we share some common code with the CLR and that platform uses common.h as a 
+// precompiled header. Due to limitations on precompilation (a precompiled header must be included first 
+// and must not be preceded by any other preprocessor directive) we cannot conditionally include common.h, 
+// so the simplest solution is to maintain this empty header under Redhawk.
+//
+
+//
+// For our DAC build, we precompile gcrhenv.h because it is extremely large (~3MB of text).  For non-DAC
+// builds, we do not do this because the majority of the files have more constrained #includes.
+//
+
+#include "stdint.h"
diff --git a/src/coreclr/src/nativeaot/Runtime/rheventtrace.cpp b/src/coreclr/src/nativeaot/Runtime/rheventtrace.cpp
new file mode 100644
index 0000000000000..93c9a9a71d670
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/rheventtrace.cpp
@@ -0,0 +1,623 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Redhawk-specific ETW helper code.
+// 
+// When Redhawk does stuff substantially different from desktop CLR, the
+// Redhawk-specific implementations should go here.
+//
+#include "common.h"
+#include "gcenv.h"
+#include "rheventtrace.h"
+#include "eventtrace.h"
+#include "rhbinder.h"
+#include "slist.h"
+#include "rwlock.h"
+#include "runtimeinstance.h"
+#include "shash.h"
+#include "eventtracepriv.h"
+#include "shash.inl"
+#include "palredhawk.h"
+
+#if defined(FEATURE_EVENT_TRACE)
+
+//---------------------------------------------------------------------------------------
+// BulkTypeEventLogger is a helper class to batch up type information and then flush to
+// ETW once the event reaches its max # descriptors
+
+
+//---------------------------------------------------------------------------------------
+//
+// Batches up ETW information for a type and pops out to recursively call
+// ETW::TypeSystemLog::LogTypeAndParametersIfNecessary for any
+// "type parameters".  Generics info is not reliably available, so "type parameter"
+// really just refers to the type of array elements if thAsAddr is an array.
+//
+// Arguments:
+//      * thAsAddr - EEType to log
+//      * typeLogBehavior - Ignored in Redhawk builds
+//
+
+void BulkTypeEventLogger::LogTypeAndParameters(UInt64 thAsAddr, ETW::TypeSystemLog::TypeLogBehavior typeLogBehavior)
+{
+    if (!ETW_TRACING_CATEGORY_ENABLED(
+        MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_TYPE_KEYWORD))
+    {
+        return;
+    }
+
+    EEType * pEEType = (EEType *) thAsAddr;
+
+    // Batch up this type.  This grabs useful info about the type, including any
+    // type parameters it may have, and sticks it in m_rgBulkTypeValues
+    int iBulkTypeEventData = LogSingleType(pEEType);
+    if (iBulkTypeEventData == -1)
+    {
+        // There was a failure trying to log the type, so don't bother with its type
+        // parameters
+        return;
+    }
+
+    // Look at the type info we just batched, so we can get the type parameters
+    BulkTypeValue * pVal = &m_rgBulkTypeValues[iBulkTypeEventData];
+
+    // We're about to recursively call ourselves for the type parameters, so make a
+    // local copy of their type handles first (else, as we log them we could flush
+    // and clear out m_rgBulkTypeValues, thus trashing pVal)
+    NewArrayHolder<ULONGLONG> rgTypeParameters;
+    DWORD cTypeParams = pVal->cTypeParameters;
+    if (cTypeParams == 1)
+    {
+        ETW::TypeSystemLog::LogTypeAndParametersIfNecessary(this, pVal->ullSingleTypeParameter, typeLogBehavior);
+    }
+    else if (cTypeParams > 1)
+    {
+        rgTypeParameters = new (nothrow) ULONGLONG[cTypeParams];
+        for (DWORD i=0; i < cTypeParams; i++)
+        {
+            rgTypeParameters[i] = pVal->rgTypeParameters[i];
+        }
+
+        // Recursively log any referenced parameter types
+        for (DWORD i=0; i < cTypeParams; i++)
+        {
+            ETW::TypeSystemLog::LogTypeAndParametersIfNecessary(this, rgTypeParameters[i], typeLogBehavior);
+        }
+    }
+}
+
+// We keep a hash of these to keep track of:
+//     * Which types have been logged through ETW (so we can avoid logging dupe Type
+//         events), and
+//     * GCSampledObjectAllocation stats to help with "smart sampling" which
+//         dynamically adjusts sampling rate of objects by type.
+// See code:LoggedTypesFromModuleTraits
+
+class LoggedTypesTraits : public  DefaultSHashTraits<EEType*>
+{
+public:
+
+    // explicitly declare local typedefs for these traits types, otherwise 
+    // the compiler may get confused
+    typedef EEType* key_t;
+
+    static key_t GetKey(const element_t &e)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return e;
+    }
+
+    static BOOL Equals(key_t k1, key_t k2)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (k1 == k2);
+    }
+
+    static count_t Hash(key_t k)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (count_t) (UIntNative) k;
+    }
+
+    static bool IsNull(const element_t &e)
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (e == NULL);
+    }
+
+    static const element_t Null()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return NULL;
+    }
+};
+
+enum class CorElementType : UInt8
+{
+    ELEMENT_TYPE_END = 0x0,
+
+    ELEMENT_TYPE_BOOLEAN = 0x2,
+    ELEMENT_TYPE_CHAR = 0x3,
+    ELEMENT_TYPE_I1 = 0x4,
+    ELEMENT_TYPE_U1 = 0x5,
+    ELEMENT_TYPE_I2 = 0x6,
+    ELEMENT_TYPE_U2 = 0x7,
+    ELEMENT_TYPE_I4 = 0x8,
+    ELEMENT_TYPE_U4 = 0x9,
+    ELEMENT_TYPE_I8 = 0xa,
+    ELEMENT_TYPE_U8 = 0xb,
+    ELEMENT_TYPE_R4 = 0xc,
+    ELEMENT_TYPE_R8 = 0xd,
+
+    ELEMENT_TYPE_I = 0x18,
+    ELEMENT_TYPE_U = 0x19,
+};
+
+static CorElementType ElementTypeToCorElementType(EETypeElementType elementType)
+{
+    switch (elementType)
+    {
+    case EETypeElementType::ElementType_Boolean:
+        return CorElementType::ELEMENT_TYPE_BOOLEAN;
+    case EETypeElementType::ElementType_Char:
+        return CorElementType::ELEMENT_TYPE_CHAR;
+    case EETypeElementType::ElementType_SByte:
+        return CorElementType::ELEMENT_TYPE_I1;
+    case EETypeElementType::ElementType_Byte:
+        return CorElementType::ELEMENT_TYPE_U1;
+    case EETypeElementType::ElementType_Int16:
+        return CorElementType::ELEMENT_TYPE_I2;
+    case EETypeElementType::ElementType_UInt16:
+        return CorElementType::ELEMENT_TYPE_U2;
+    case EETypeElementType::ElementType_Int32:
+        return CorElementType::ELEMENT_TYPE_I4;
+    case EETypeElementType::ElementType_UInt32:
+        return CorElementType::ELEMENT_TYPE_U4;
+    case EETypeElementType::ElementType_Int64:
+        return CorElementType::ELEMENT_TYPE_I8;
+    case EETypeElementType::ElementType_UInt64:
+        return CorElementType::ELEMENT_TYPE_U8;
+    case EETypeElementType::ElementType_Single:
+        return CorElementType::ELEMENT_TYPE_R4;
+    case EETypeElementType::ElementType_Double:
+        return CorElementType::ELEMENT_TYPE_R8;
+    case EETypeElementType::ElementType_IntPtr:
+        return CorElementType::ELEMENT_TYPE_I;
+    case EETypeElementType::ElementType_UIntPtr:
+        return CorElementType::ELEMENT_TYPE_U;
+    }
+    return CorElementType::ELEMENT_TYPE_END;
+}
+
+// Avoid reporting the same type twice by keeping a hash of logged types.
+SHash<LoggedTypesTraits>* s_loggedTypesHash = NULL;
+
+//---------------------------------------------------------------------------------------
+//
+// Interrogates EEType for the info that's interesting to include in the BulkType ETW
+// event.  Does not recursively call self for type parameters.
+//
+// Arguments:
+//      * pEEType - EEType to log info about
+//
+// Return Value:
+//      Index into internal array where the info got batched.  Or -1 if there was a
+//      failure.
+//
+
+int BulkTypeEventLogger::LogSingleType(EEType * pEEType)
+{
+#ifdef MULTIPLE_HEAPS
+    // We need to add a lock to protect the types hash for Server GC.
+    ASSERT_UNCONDITIONALLY("Add a lock to protect s_loggedTypesHash access!");
+#endif 
+    //Avoid logging the same type twice, but using the hash of loggged types.
+    if (s_loggedTypesHash == NULL)
+        s_loggedTypesHash = new SHash<LoggedTypesTraits>();
+    EEType* preexistingType = s_loggedTypesHash->Lookup(pEEType);
+    if (preexistingType != NULL)
+    {
+        return -1;
+    }
+    else
+    {
+        s_loggedTypesHash->Add(pEEType);
+    }
+
+    // If there's no room for another type, flush what we've got
+    if (m_nBulkTypeValueCount == _countof(m_rgBulkTypeValues))
+    {
+        FireBulkTypeEvent();
+    }
+    
+    _ASSERTE(m_nBulkTypeValueCount < _countof(m_rgBulkTypeValues));
+
+    BulkTypeValue * pVal = &m_rgBulkTypeValues[m_nBulkTypeValueCount];
+    
+    // Clear out pVal before filling it out (array elements can get reused if there
+    // are enough types that we need to flush to multiple events).
+    pVal->Clear();
+
+    pVal->fixedSizedData.TypeID = (ULONGLONG) pEEType;
+    pVal->fixedSizedData.Flags = kEtwTypeFlagsModuleBaseAddress;
+    pVal->fixedSizedData.CorElementType = (BYTE)ElementTypeToCorElementType(pEEType->GetElementType());
+
+    ULONGLONG * rgTypeParamsForEvent = NULL;
+    ULONGLONG typeParamForNonGenericType = 0;
+
+    // Determine this EEType's module.
+    RuntimeInstance * pRuntimeInstance = GetRuntimeInstance();
+
+    ULONGLONG osModuleHandle = (ULONGLONG) pEEType->GetTypeManagerPtr()->AsTypeManager()->GetOsModuleHandle();
+
+    pVal->fixedSizedData.ModuleID = osModuleHandle;
+
+    if (pEEType->IsParameterizedType())
+    {
+        ASSERT(pEEType->IsArray());
+        // Array
+        pVal->fixedSizedData.Flags |= kEtwTypeFlagsArray;
+        pVal->cTypeParameters = 1;
+        pVal->ullSingleTypeParameter = (ULONGLONG) pEEType->get_RelatedParameterType();
+    }
+    else
+    {
+        // Note: if pEEType->IsCloned(), then no special handling is necessary.  All the
+        // functionality we need from the EEType below work just as well from cloned types.
+
+        // Note: For generic types, we do not necessarily know the generic parameters. 
+        // So we leave it to the profiler at post-processing time to determine that via
+        // the PDBs.  We'll leave pVal->cTypeParameters as 0, even though there could be
+        // type parameters.
+
+        // Flags
+        if (pEEType->HasFinalizer())
+        {
+            pVal->fixedSizedData.Flags |= kEtwTypeFlagsFinalizable;
+        }
+
+        // Note: Pn runtime knows nothing about delegates, and there are no CCWs/RCWs. 
+        // So no other type flags are applicable to set
+    }
+
+    ULONGLONG rvaType = osModuleHandle == 0 ? 0 : (ULONGLONG(pEEType) - osModuleHandle);
+    pVal->fixedSizedData.TypeNameID = (DWORD) rvaType;
+
+    // Now that we know the full size of this type's data, see if it fits in our
+    // batch or whether we need to flush
+
+    int cbVal = pVal->GetByteCountInEvent();
+    if (cbVal > kMaxBytesTypeValues)
+    {
+        // This type is apparently so huge, it's too big to squeeze into an event, even
+        // if it were the only type batched in the whole event.  Bail
+        ASSERT(!"Type too big to log via ETW");
+        return -1;
+    }
+
+    if (m_nBulkTypeValueByteCount + cbVal > kMaxBytesTypeValues)
+    {
+        // Although this type fits into the array, its size is so big that the entire
+        // array can't be logged via ETW. So flush the array, and start over by
+        // calling ourselves--this refetches the type info and puts it at the
+        // beginning of the array.  Since we know this type is small enough to be
+        // batched into an event on its own, this recursive call will not try to
+        // call itself again.
+        FireBulkTypeEvent();
+        return LogSingleType(pEEType);
+    }
+
+    // The type fits into the batch, so update our state
+    m_nBulkTypeValueCount++;
+    m_nBulkTypeValueByteCount += cbVal;
+    return m_nBulkTypeValueCount - 1;       // Index of type we just added
+}
+
+
+void BulkTypeEventLogger::Cleanup()
+{
+    if (s_loggedTypesHash != NULL)
+    {
+        delete s_loggedTypesHash;
+        s_loggedTypesHash = NULL;
+    }
+}
+
+#endif // defined(FEATURE_EVENT_TRACE)
+
+
+//---------------------------------------------------------------------------------------
+//
+// Outermost level of ETW-type-logging.  Clients outside (rh)eventtrace.cpp call this to log
+// an EETypes and (recursively) its type parameters when present.  This guy then calls
+// into the appropriate BulkTypeEventLogger to do the batching and logging
+//
+// Arguments:
+//      * pBulkTypeEventLogger - If our caller is keeping track of batched types, it
+//          passes this to us so we can use it to batch the current type (GC heap walk
+//          does this).  In Redhawk builds this should not be NULL.
+//      * thAsAddr - EEType to batch
+//      * typeLogBehavior - Unused in Redhawk builds
+//
+
+void ETW::TypeSystemLog::LogTypeAndParametersIfNecessary(BulkTypeEventLogger * pLogger, UInt64 thAsAddr, ETW::TypeSystemLog::TypeLogBehavior typeLogBehavior)
+{
+#if defined(FEATURE_EVENT_TRACE)
+
+    if (!ETW_TRACING_CATEGORY_ENABLED(
+        MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, 
+        TRACE_LEVEL_INFORMATION, 
+        CLR_TYPE_KEYWORD))
+    {
+        return;
+    }
+
+    _ASSERTE(pLogger != NULL);
+    pLogger->LogTypeAndParameters(thAsAddr, typeLogBehavior);
+
+#endif // defined(FEATURE_EVENT_TRACE)
+}
+
+
+//---------------------------------------------------------------------------------------
+// Runtime helpers for ETW logging.
+//---------------------------------------------------------------------------------------
+typedef enum
+{
+    EVENT_LOG_CCW = 1,
+    EVENT_LOG_RCW,
+    EVENT_FLUSH_COM
+} COM_ETW_EVENTS;
+
+
+
+COOP_PINVOKE_HELPER(void, RhpETWLogLiveCom, (Int32 eventType, void* CCWGCHandle, void* objectID, void* typeRawValue, void* IUnknown, void* VTable, Int32 comRefCount, Int32 jupiterRefCount, Int32 flags))
+{
+    switch (eventType)
+    {
+    case EVENT_LOG_CCW:
+        BulkComLogger::WriteCCW(CCWGCHandle, objectID, typeRawValue, IUnknown, comRefCount, jupiterRefCount, flags);
+        break;
+    case EVENT_LOG_RCW:
+        BulkComLogger::WriteRCW(objectID, typeRawValue, IUnknown, VTable, comRefCount, flags);
+        break;
+    case EVENT_FLUSH_COM:
+        BulkComLogger::FlushComETW();
+        break;
+    default:
+        ASSERT_UNCONDITIONALLY("unexpected COM ETW Event ID");
+    }
+}
+
+COOP_PINVOKE_HELPER(bool, RhpETWShouldWalkCom, ())
+{
+    return BulkComLogger::ShouldReportComForGCHeapEtw();
+}
+
+//---------------------------------------------------------------------------------------
+// BulkStaticsLogger: Batches up and logs static variable roots
+//---------------------------------------------------------------------------------------
+
+BulkComLogger* BulkComLogger::s_comLogger;
+
+BulkComLogger::BulkComLogger()
+    : m_currRcw(0), m_currCcw(0), m_etwRcwData(0), m_etwCcwData(0)
+{
+    CONTRACTL
+    {
+        THROWS;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    }
+    CONTRACTL_END;
+
+    m_etwRcwData = new EventRCWEntry[kMaxRcwCount];
+    m_etwCcwData = new EventCCWEntry[kMaxCcwCount];
+}
+
+
+BulkComLogger::~BulkComLogger()
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    } 
+    CONTRACTL_END;
+
+    FireBulkComEvent();
+
+    if (m_etwRcwData)
+        delete[] m_etwRcwData;
+
+    if (m_etwCcwData)
+        delete[] m_etwCcwData;
+}
+
+bool BulkComLogger::ShouldReportComForGCHeapEtw()
+{
+    return ETW::GCLog::ShouldWalkHeapObjectsForEtw();
+}
+
+void BulkComLogger::WriteCCW(void* CCWGCHandle, void* objectID, void* typeRawValue, void* IUnknown, long comRefCount, long jupiterRefCount, long flags)
+{
+    EventCCWEntry ccwEntry;
+
+    ccwEntry.RootID = (UInt64)CCWGCHandle;
+    ccwEntry.ObjectID = (UInt64) objectID;
+    ccwEntry.TypeID = (UInt64) typeRawValue;
+    ccwEntry.IUnk = (UInt64) IUnknown;
+    ccwEntry.RefCount = (ULONG) comRefCount;
+    ccwEntry.JupiterRefCount = (ULONG) jupiterRefCount;
+    ccwEntry.Flags = flags;
+
+    BulkComLogger* comLogger = BulkComLogger::GetInstance();
+    if (comLogger != NULL)
+    {
+        comLogger->WriteCcw(ccwEntry);
+    }
+}
+
+void BulkComLogger::WriteRCW(void* objectID, void* typeRawValue, void* IUnknown, void* VTable, long comRefCount, long flags)
+{
+    EventRCWEntry rcwEntry;
+
+    rcwEntry.ObjectID = (UInt64) objectID;
+    rcwEntry.TypeID = (UInt64) typeRawValue;
+    rcwEntry.IUnk = (UInt64) IUnknown;
+    rcwEntry.VTable = (UInt64) VTable;
+    rcwEntry.RefCount = comRefCount;
+    rcwEntry.Flags = flags;
+
+    BulkComLogger* comLogger = BulkComLogger::GetInstance();
+    if (comLogger != NULL)
+    {
+        comLogger->WriteRcw(rcwEntry);
+    }
+}
+
+void BulkComLogger::FlushComETW()
+{
+    BulkComLogger* comLogger = BulkComLogger::GetInstance();
+    if (comLogger != NULL)
+        comLogger->Cleanup();
+}
+
+void BulkComLogger::FireBulkComEvent()
+{
+    WRAPPER_NO_CONTRACT;
+
+    FlushRcw();
+    FlushCcw();
+}
+
+
+BulkComLogger* BulkComLogger::GetInstance()
+{
+    if (s_comLogger == NULL)
+    {
+        s_comLogger = new BulkComLogger();
+    }
+
+    return s_comLogger;
+}
+
+void BulkComLogger::Cleanup()
+{
+    if (s_comLogger != NULL)
+    {
+        delete s_comLogger;
+        s_comLogger = NULL;
+    }
+}
+
+void BulkComLogger::WriteCcw(const EventCCWEntry& ccw)
+{
+    CONTRACTL
+    {
+        THROWS;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    }
+    CONTRACTL_END;
+
+    _ASSERTE(m_currCcw < kMaxCcwCount);
+
+    EventCCWEntry &mccw = m_etwCcwData[m_currCcw++];
+    mccw = ccw;
+
+    if (m_currCcw >= kMaxCcwCount)
+        FlushCcw();
+}
+
+void BulkComLogger::FlushCcw()
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    }
+    CONTRACTL_END;
+
+    _ASSERTE(m_currCcw <= kMaxCcwCount);
+
+    if (m_currCcw == 0)
+        return;
+
+    unsigned short instance = GetClrInstanceId();
+
+    EVENT_DATA_DESCRIPTOR eventData[3];
+    EventDataDescCreate(&eventData[0], &m_currCcw, sizeof(const unsigned int));
+    EventDataDescCreate(&eventData[1], &instance, sizeof(const unsigned short));
+    EventDataDescCreate(&eventData[2], m_etwCcwData, sizeof(EventCCWEntry) * m_currCcw);
+
+    ULONG result = PalEventWrite(Microsoft_Windows_DotNETRuntimeHandle, &GCBulkRootCCW, _countof(eventData), eventData);
+    _ASSERTE(result == ERROR_SUCCESS);
+
+    m_currCcw = 0;
+}
+
+void BulkComLogger::WriteRcw(const EventRCWEntry& rcw)
+{
+    CONTRACTL
+    {
+        THROWS;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    }
+    CONTRACTL_END;
+
+    _ASSERTE(m_currRcw < kMaxRcwCount);
+
+    EventRCWEntry &mrcw = m_etwRcwData[m_currRcw];
+    mrcw = rcw;
+
+    if (++m_currRcw >= kMaxRcwCount)
+        FlushRcw();
+}
+
+void BulkComLogger::FlushRcw()
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    }
+    CONTRACTL_END;
+
+    _ASSERTE(m_currRcw <= kMaxRcwCount);
+
+    if (m_currRcw == 0)
+        return;
+
+    unsigned short instance = GetClrInstanceId();
+
+    EVENT_DATA_DESCRIPTOR eventData[3];
+    EventDataDescCreate(&eventData[0], &m_currRcw, sizeof(const unsigned int));
+    EventDataDescCreate(&eventData[1], &instance, sizeof(const unsigned short));
+    EventDataDescCreate(&eventData[2], m_etwRcwData, sizeof(EventRCWEntry) * m_currRcw);
+
+    ULONG result = PalEventWrite(Microsoft_Windows_DotNETRuntimeHandle, &GCBulkRCW, _countof(eventData), eventData);
+    _ASSERTE(result == ERROR_SUCCESS);
+
+    m_currRcw = 0;
+}
+
+COOP_PINVOKE_HELPER(void, RhpEtwExceptionThrown, (LPCWSTR exceptionTypeName, LPCWSTR exceptionMessage, void* faultingIP, HRESULT hresult))
+{
+    FireEtwExceptionThrown_V1(exceptionTypeName,
+        exceptionMessage,
+        faultingIP,
+        hresult,
+        0,
+        GetClrInstanceId());
+}
+
+
+
diff --git a/src/coreclr/src/nativeaot/Runtime/rheventtrace.h b/src/coreclr/src/nativeaot/Runtime/rheventtrace.h
new file mode 100644
index 0000000000000..33c7c7bf03968
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/rheventtrace.h
@@ -0,0 +1,182 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This header provides Redhawk-specific ETW code and macros, to allow sharing of common
+// ETW code between Redhawk and desktop CLR.
+//
+#ifndef __RHEVENTTRACE_INCLUDED
+#define __RHEVENTTRACE_INCLUDED
+
+
+#ifdef FEATURE_ETW
+
+// FireEtwGCPerHeapHistorySpecial() has to be defined manually rather than via the manifest because it does
+// not have a standard signature.
+#define FireEtwGCPerHeapHistorySpecial(DataPerHeap, DataSize, ClrId) (MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context.IsEnabled && PalEventEnabled(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCPerHeapHistory)) ? Template_GCPerHeapHistorySpecial(Microsoft_Windows_Redhawk_GC_PrivateHandle, &GCPerHeapHistory, DataPerHeap, DataSize, ClrId) : 0
+
+// Map the CLR private provider to our version so we can avoid inserting more #ifdef's in the code.
+#define MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context MICROSOFT_WINDOWS_REDHAWK_GC_PRIVATE_PROVIDER_Context
+#define MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context MICROSOFT_WINDOWS_REDHAWK_GC_PUBLIC_PROVIDER_Context
+#define Microsoft_Windows_DotNETRuntimeHandle Microsoft_Windows_Redhawk_GC_PublicHandle
+
+#define CLR_GC_KEYWORD 0x1
+#define CLR_FUSION_KEYWORD 0x4
+#define CLR_LOADER_KEYWORD 0x8
+#define CLR_JIT_KEYWORD 0x10
+#define CLR_NGEN_KEYWORD 0x20
+#define CLR_STARTENUMERATION_KEYWORD 0x40
+#define CLR_ENDENUMERATION_KEYWORD 0x80
+#define CLR_SECURITY_KEYWORD 0x400
+#define CLR_APPDOMAINRESOURCEMANAGEMENT_KEYWORD 0x800
+#define CLR_JITTRACING_KEYWORD 0x1000
+#define CLR_INTEROP_KEYWORD 0x2000
+#define CLR_CONTENTION_KEYWORD 0x4000
+#define CLR_EXCEPTION_KEYWORD 0x8000
+#define CLR_THREADING_KEYWORD 0x10000
+#define CLR_JITTEDMETHODILTONATIVEMAP_KEYWORD 0x20000
+#define CLR_OVERRIDEANDSUPPRESSNGENEVENTS_KEYWORD 0x40000
+#define CLR_TYPE_KEYWORD 0x80000
+#define CLR_GCHEAPDUMP_KEYWORD 0x100000
+#define CLR_GCHEAPALLOC_KEYWORD 0x200000
+#define CLR_GCHEAPSURVIVALANDMOVEMENT_KEYWORD 0x400000
+#define CLR_GCHEAPCOLLECT_KEYWORD 0x800000
+#define CLR_GCHEAPANDTYPENAMES_KEYWORD 0x1000000
+#define CLR_PERFTRACK_KEYWORD 0x20000000
+#define CLR_STACK_KEYWORD 0x40000000
+#ifndef ERROR_SUCCESS
+#define ERROR_SUCCESS   0
+#endif
+
+#undef ETW_TRACING_INITIALIZED
+#define ETW_TRACING_INITIALIZED(RegHandle) (RegHandle != NULL)
+
+#undef ETW_CATEGORY_ENABLED
+#define ETW_CATEGORY_ENABLED(Context, LevelParam, Keyword) \
+    (Context.IsEnabled &&                                                               \
+    (                                                                                   \
+        (LevelParam <= ((Context).Level)) ||                                                    \
+        ((Context.Level) == 0)                                                           \
+    ) &&                                                                                \
+    (   \
+        (Keyword == (ULONGLONG)0) ||    \
+        (   \
+            (Keyword & (Context.MatchAnyKeyword)) && \
+            (   \
+                (Keyword & (Context.MatchAllKeyword)) == (Context.MatchAllKeyword)    \
+            )   \
+        )   \
+    )   \
+    )
+
+class EEType;
+class BulkTypeEventLogger;
+
+namespace ETW
+{
+    // Class to wrap all type system logic for ETW
+    class TypeSystemLog
+    {
+    public:
+        // This enum is unused on Redhawk, but remains here to keep Redhawk / desktop CLR
+        // code shareable.
+        enum TypeLogBehavior
+        {
+            kTypeLogBehaviorTakeLockAndLogIfFirstTime,
+            kTypeLogBehaviorAssumeLockAndLogIfFirstTime,
+            kTypeLogBehaviorAlwaysLog,
+        };
+
+        static void LogTypeAndParametersIfNecessary(BulkTypeEventLogger * pLogger, UInt64 thAsAddr, TypeLogBehavior typeLogBehavior);
+    };
+};
+
+struct EventRCWEntry
+{
+    UInt64 ObjectID;
+    UInt64 TypeID;
+    UInt64 IUnk;
+    UInt64 VTable;
+    UInt32 RefCount;
+    UInt32 Flags;
+};
+
+#pragma pack(push, 1)
+struct EventCCWEntry
+{
+    UInt64 RootID;
+    UInt64 ObjectID;
+    UInt64 TypeID;
+    UInt64 IUnk;
+    UInt32 RefCount;
+    UInt32 JupiterRefCount;
+    UInt32 Flags;
+};
+
+C_ASSERT(sizeof(EventCCWEntry) == 44);
+#pragma pack(pop)
+
+const UInt32 cbComMaxEtwEvent = 64 * 1024;
+
+// Does all logging for RCWs and CCWs in the process.
+class BulkComLogger
+{
+public:
+    // Returns true is gc heap collection is on.
+    static bool ShouldReportComForGCHeapEtw(); 
+
+    // Write one CCW to the CCW buffer.
+    static void WriteCCW(void* CCWGCHandle, void* objectID, void* typeRawValue, void* IUnknown, long comRefCount, long jupiterRefCount, long flags);
+
+    // Write one RCW to the RCW buffer.
+    static void WriteRCW(void* objectID, void* typeRawValue, void* IUnknown, void* VTable, long refCount, long flags);
+
+    // Gets or creates a unique BulkComLogger instance 
+    static BulkComLogger* GetInstance();
+    
+    // Write the remaining events and deletes the static instance.
+    static void FlushComETW();
+
+private:
+    BulkComLogger();
+    ~BulkComLogger();
+
+    // Forces a flush of all ETW events not yet fired.
+    void FireBulkComEvent();
+
+    // Writes one RCW to the RCW buffer.  May or may not fire the event.
+    void WriteRcw(const EventRCWEntry& rcw);
+
+    // Writes one CCW to the CCW buffer.  May or may not fire the event.
+    void WriteCcw(const EventCCWEntry& ccw);
+
+    // Forces a flush of all RCW ETW events not yet fired.
+    void FlushRcw();
+
+    // Forces a flush of all CCW ETW events not yet fired.
+    void FlushCcw();
+
+    // Distroys the unique instance and forces a flush for all ETW events not yet fired.
+    void Cleanup();
+
+private:
+    // The maximum number of RCW/CCW events we can batch up based on the max size of an ETW event.
+    static const int kMaxRcwCount = (cbComMaxEtwEvent - 0x30) / sizeof(EventRCWEntry);
+    static const int kMaxCcwCount = (cbComMaxEtwEvent - 0x30) / sizeof(EventCCWEntry);
+
+    int m_currRcw;  // The current number of batched (but not emitted) RCW events.
+    int m_currCcw;  // The current number of batched (but not emitted) CCW events.
+
+    BulkTypeEventLogger *m_typeLogger;  // Type logger to emit type data for.
+
+    EventRCWEntry *m_etwRcwData;  // RCW buffer.
+    EventCCWEntry *m_etwCcwData;  // CCW buffer.
+
+    static BulkComLogger* s_comLogger;
+};
+
+#else
+#define FireEtwGCPerHeapHistorySpecial(DataPerHeap, DataSize, ClrId)
+#endif
+
+#endif //__RHEVENTTRACE_INCLUDED
diff --git a/src/coreclr/src/nativeaot/Runtime/sha1.cpp b/src/coreclr/src/nativeaot/Runtime/sha1.cpp
new file mode 100644
index 0000000000000..ca5a8e262edd2
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/sha1.cpp
@@ -0,0 +1,380 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// 
+//
+//
+// ===========================================================================
+// File: sha1.cpp
+// 
+// ===========================================================================
+/*++
+
+Abstract:
+
+    SHA-1 implementation
+
+Revision History:
+
+--*/
+
+/*
+       File sha1.cpp    <STRIP>Version 03 August 2000.</STRIP>
+                   
+                     
+      This implements the SHA-1 hash function.  
+      For algorithmic background see (for example)
+
+
+           Alfred J. Menezes et al
+           Handbook of Applied Cryptography
+           The CRC Press Series on Discrete Mathematics
+                   and its Applications
+           CRC Press LLC, 1997
+           ISBN 0-8495-8523-7
+           QA76.9A25M643
+
+       Also see FIPS 180-1 - Secure Hash Standard, 
+       1993 May 11 and 1995 April 17, by the U.S.
+       National Institute of Standards and Technology (NIST).
+
+*/
+
+
+#include "common.h"
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "PalRedhawkCommon.h"
+
+#include "sha1.h"
+
+
+#define ROTATE32L(x,n) rotate32l(x,n)
+#define SHAVE32(x)     (UInt32)(x)
+
+static UInt32 rotate32l(UInt32 val, int shift)
+{
+    shift &= 0x1f;
+    val = (val >> (0x20 - shift)) | (val << shift);
+    return val;
+}
+
+static void SHA1_block(SHA1_CTX *ctx)
+/*
+     Update the SHA-1 hash from a fresh 64 bytes of data.
+*/
+{ 
+    static const UInt32 sha1_round1 = 0x5A827999u; 
+    static const UInt32 sha1_round2 = 0x6ED9EBA1u;
+    static const UInt32 sha1_round3 = 0x8F1BBCDCu;
+    static const UInt32 sha1_round4 = 0xCA62C1D6u;
+    
+    UInt32 a = ctx->partial_hash[0], b = ctx->partial_hash[1]; 
+    UInt32 c = ctx->partial_hash[2], d = ctx->partial_hash[3];
+    UInt32 e = ctx->partial_hash[4];
+    UInt32 msg80[80]; 
+    int i;
+
+    // OACR note:
+    // Loop conditions are using (i <= limit - increment) instead of (i < limit) to satisfy OACR. When the increment is greater
+    // than 1, OACR incorrectly thinks that the max value of 'i' is (limit - 1).
+
+    for (i = 0; i < 16; i++) {   // Copy to local array, zero original
+                                  // Extend length to 80
+        const UInt32 datval = ctx->awaiting_data[i];
+        ctx->awaiting_data[i] = 0;
+        msg80[i] = datval;
+    }
+
+    for (i = 16; i <= 80 - 2; i += 2) {
+        const UInt32 temp1 =    msg80[i-3] ^ msg80[i-8] 
+                        ^ msg80[i-14] ^ msg80[i-16];
+        const UInt32 temp2 =    msg80[i-2] ^ msg80[i-7] 
+                        ^ msg80[i-13] ^ msg80[i-15];
+        msg80[i  ] = ROTATE32L(temp1, 1);
+        msg80[i+1] = ROTATE32L(temp2, 1);
+    }
+  
+#define ROUND1(B, C, D) ((D ^ (B & (C ^ D))) + sha1_round1)
+                        //  Equivalent to (B & C) | (~B & D).
+                        //  (check cases B = 0 and B = 1)
+#define ROUND2(B, C, D) ((B ^ C ^ D) + sha1_round2)
+
+#define ROUND3(B, C, D) ((C & (B | D) | (B & D)) + sha1_round3)
+
+#define ROUND4(B, C, D) ((B ^ C ^ D) + sha1_round4)
+
+// Round 1
+    for (i = 0; i <= 20 - 5; i += 5) { 
+        e += ROTATE32L(a, 5) + ROUND1(b, c, d) + msg80[i];
+        b = ROTATE32L(b, 30);
+
+        d += ROTATE32L(e, 5) + ROUND1(a, b, c) + msg80[i+1];
+        a = ROTATE32L(a, 30);
+
+        c += ROTATE32L(d, 5) + ROUND1(e, a, b) + msg80[i+2];
+        e = ROTATE32L(e, 30);
+
+        b += ROTATE32L(c, 5) + ROUND1(d, e, a) + msg80[i+3];
+        d = ROTATE32L(d, 30);
+
+        a += ROTATE32L(b, 5) + ROUND1(c, d, e) + msg80[i+4];
+        c = ROTATE32L(c, 30);
+#if 0
+        printf("i = %ld %08lx %08lx %08lx %08lx %08lx\n", 
+            i, a, b, c, d, e);
+#endif
+    } // for i
+
+// Round 2
+    for (i = 20; i <= 40 - 5; i += 5) { 
+        e += ROTATE32L(a, 5) + ROUND2(b, c, d) + msg80[i];
+        b = ROTATE32L(b, 30);
+
+        d += ROTATE32L(e, 5) + ROUND2(a, b, c) + msg80[i+1];
+        a = ROTATE32L(a, 30);
+
+        c += ROTATE32L(d, 5) + ROUND2(e, a, b) + msg80[i+2];
+        e = ROTATE32L(e, 30);
+
+        b += ROTATE32L(c, 5) + ROUND2(d, e, a) + msg80[i+3];
+        d = ROTATE32L(d, 30);
+
+        a += ROTATE32L(b, 5) + ROUND2(c, d, e) + msg80[i+4];
+        c = ROTATE32L(c, 30);
+    } // for i
+
+// Round 3
+    for (i = 40; i <= 60 - 5; i += 5) { 
+        e += ROTATE32L(a, 5) + ROUND3(b, c, d) + msg80[i];
+        b = ROTATE32L(b, 30);
+
+        d += ROTATE32L(e, 5) + ROUND3(a, b, c) + msg80[i+1];
+        a = ROTATE32L(a, 30);
+
+        c += ROTATE32L(d, 5) + ROUND3(e, a, b) + msg80[i+2];
+        e = ROTATE32L(e, 30);
+
+        b += ROTATE32L(c, 5) + ROUND3(d, e, a) + msg80[i+3];
+        d = ROTATE32L(d, 30);
+
+        a += ROTATE32L(b, 5) + ROUND3(c, d, e) + msg80[i+4];
+        c = ROTATE32L(c, 30);
+    } // for i
+
+// Round 4
+    for (i = 60; i <= 80 - 5; i += 5) { 
+        e += ROTATE32L(a, 5) + ROUND4(b, c, d) + msg80[i];
+        b = ROTATE32L(b, 30);
+
+        d += ROTATE32L(e, 5) + ROUND4(a, b, c) + msg80[i+1];
+        a = ROTATE32L(a, 30);
+
+        c += ROTATE32L(d, 5) + ROUND4(e, a, b) + msg80[i+2];
+        e = ROTATE32L(e, 30);
+
+        b += ROTATE32L(c, 5) + ROUND4(d, e, a) + msg80[i+3];
+        d = ROTATE32L(d, 30);
+
+        a += ROTATE32L(b, 5) + ROUND4(c, d, e) + msg80[i+4];
+        c = ROTATE32L(c, 30);
+    } // for i
+
+#undef ROUND1
+#undef ROUND2
+#undef ROUND3
+#undef ROUND4
+
+    ctx->partial_hash[0] += a;
+    ctx->partial_hash[1] += b;
+    ctx->partial_hash[2] += c;
+    ctx->partial_hash[3] += d;
+    ctx->partial_hash[4] += e;
+#if 0
+    for (i = 0; i < 16; i++) {
+        printf("%8lx ", msg16[i]);
+        if ((i & 7) == 7) printf("\n");
+    }
+    printf("a, b, c, d, e = %08lx %08lx %08lx %08lx %08lx\n", 
+        a, b, c, d, e);
+    printf("Partial hash = %08lx %08lx %08lx %08lx %08lx\n",
+        (long)ctx->partial_hash[0], (long)ctx->partial_hash[1],
+        (long)ctx->partial_hash[2], (long)ctx->partial_hash[3],
+        (long)ctx->partial_hash[4]);
+#endif 
+} // end SHA1_block
+
+
+void SHA1Hash::SHA1Init(SHA1_CTX *ctx)
+{
+    ctx->nbit_total[0] = ctx->nbit_total[1] = 0;
+    
+    for (UInt32 i = 0; i != 16; i++) {
+        ctx->awaiting_data[i] = 0;
+    }
+   
+     /* 
+         Initialize hash variables.
+         
+     */
+
+    ctx->partial_hash[0] = 0x67452301u;
+    ctx->partial_hash[1] = 0xefcdab89u;
+    ctx->partial_hash[2] = ~ctx->partial_hash[0];
+    ctx->partial_hash[3] = ~ctx->partial_hash[1];
+    ctx->partial_hash[4] = 0xc3d2e1f0u;
+
+} 
+
+void SHA1Hash::SHA1Update(
+        SHA1_CTX *  ctx,        // IN/OUT
+        const UInt8 *    msg,    // IN
+        UInt32           nbyte)  // IN
+/*
+    Append data to a partially hashed SHA-1 message.
+*/
+{
+    const UInt8 *fresh_data = msg;
+    UInt32 nbyte_left = nbyte;
+    UInt32 nbit_occupied = ctx->nbit_total[0] & 511;
+    UInt32 *awaiting_data;
+    const UInt32 nbitnew_low = SHAVE32(8*nbyte);
+
+
+    ASSERT((nbit_occupied & 7) == 0);   // Partial bytes not implemented
+    
+    ctx->nbit_total[0] += nbitnew_low;
+    ctx->nbit_total[1] += (nbyte >> 29) 
+           + (SHAVE32(ctx->nbit_total[0]) < nbitnew_low);
+
+        /* Advance to word boundary in waiting_data */
+    
+    if ((nbit_occupied & 31) != 0) {
+        awaiting_data = ctx->awaiting_data + nbit_occupied/32;
+
+        while ((nbit_occupied & 31) != 0 && nbyte_left != 0) {
+            nbit_occupied += 8;
+            *awaiting_data |= (UInt32)*fresh_data++ 
+                     << ((-(int)nbit_occupied) & 31);
+            nbyte_left--;            // Start at most significant byte
+        }
+    } // if nbit_occupied
+
+             /* Transfer 4 bytes at a time */
+
+    do {
+        const UInt32 nword_occupied = nbit_occupied/32;
+        UInt32 nwcopy = min(nbyte_left/4, 16 - nword_occupied);
+        ASSERT (nbit_occupied <= 512);
+        ASSERT ((nbit_occupied & 31) == 0 || nbyte_left == 0);
+        awaiting_data = ctx->awaiting_data + nword_occupied;
+        nbyte_left -= 4*nwcopy;
+        nbit_occupied += 32*nwcopy;
+
+        while (nwcopy != 0) {
+            const UInt32 byte0 = (UInt32)fresh_data[0];
+            const UInt32 byte1 = (UInt32)fresh_data[1];
+            const UInt32 byte2 = (UInt32)fresh_data[2];
+            const UInt32 byte3 = (UInt32)fresh_data[3];
+            *awaiting_data++ = byte3 | (byte2 << 8)
+                        | (byte1 << 16) | (byte0 << 24);
+                             /* Big endian */
+            fresh_data += 4;
+            nwcopy--;
+        } 
+
+        if (nbit_occupied == 512) {
+            SHA1_block(ctx);
+            nbit_occupied = 0;
+            awaiting_data -= 16;
+            ASSERT(awaiting_data == ctx->awaiting_data);
+        }
+    } while (nbyte_left >= 4); 
+
+    ASSERT (ctx->awaiting_data + nbit_occupied/32
+                       == awaiting_data);
+
+    while (nbyte_left != 0) {
+        const UInt32 new_byte = (UInt32)*fresh_data++;
+
+        ASSERT((nbit_occupied & 31) <= 16);
+        nbit_occupied += 8;
+        *awaiting_data |= new_byte << ((-(int)nbit_occupied) & 31);
+        nbyte_left--;
+    }
+    
+    ASSERT (nbit_occupied == (ctx->nbit_total[0] & 511));
+} // end SHA1Update
+
+
+
+void SHA1Hash::SHA1Final(
+        SHA1_CTX *  ctx,            // IN/OUT
+        UInt8 *          digest)     // OUT
+/*
+        Finish a SHA-1 hash.
+*/
+{
+    const UInt32 nbit0 = ctx->nbit_total[0];
+    const UInt32 nbit1 = ctx->nbit_total[1];
+    UInt32 nbit_occupied = nbit0 & 511;
+    UInt32 i;
+
+    ASSERT((nbit_occupied & 7) == 0);
+
+    ctx->awaiting_data[nbit_occupied/32] 
+         |= (UInt32)0x80 << ((-8-nbit_occupied) & 31);
+                          // Append a 1 bit
+    nbit_occupied += 8;
+
+
+    // Append zero bits until length (in bits) is 448 mod 512.
+    // Then append the length, in bits.
+    // Here we assume the buffer was zeroed earlier.
+
+    if (nbit_occupied > 448) {   // If fewer than 64 bits left
+        SHA1_block(ctx);
+        nbit_occupied = 0;
+    }
+    ctx->awaiting_data[14] = nbit1;
+    ctx->awaiting_data[15] = nbit0;
+    SHA1_block(ctx);
+
+         /* Copy final digest to user-supplied byte array */
+
+    for (i = 0; i != 5; i++) {
+        const UInt32 dwi = ctx->partial_hash[i];
+        digest[4*i + 0] = (UInt8)((dwi >> 24) & 255);
+        digest[4*i + 1] = (UInt8)((dwi >> 16) & 255);
+        digest[4*i + 2] = (UInt8)((dwi >>  8) & 255);
+        digest[4*i + 3] = (UInt8)(dwi         & 255);  // Big-endian
+    }
+} // end SHA1Final
+
+SHA1Hash::SHA1Hash()
+{
+    m_fFinalized = false;
+    SHA1Init(&m_Context);
+}
+    
+void SHA1Hash::AddData(const UInt8 *pbData, UInt32 cbData)
+{
+    if (m_fFinalized)
+        return;
+        
+    SHA1Update(&m_Context, pbData, cbData);
+}
+
+// Retrieve a pointer to the final hash.
+UInt8 *SHA1Hash::GetHash()
+{
+    if (m_fFinalized)
+        return m_Value;
+
+    SHA1Final(&m_Context, m_Value);
+     
+    m_fFinalized = true;
+
+    return m_Value;
+}
+
diff --git a/src/coreclr/src/nativeaot/Runtime/sha1.h b/src/coreclr/src/nativeaot/Runtime/sha1.h
new file mode 100644
index 0000000000000..70f827dcde8e9
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/sha1.h
@@ -0,0 +1,52 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// 
+
+#ifndef SHA1_H_
+#define SHA1_H_
+
+// Hasher class, performs no allocation and therefore does not throw or return
+// errors. Usage is as follows:
+//  Create an instance (this initializes the hash).
+//  Add one or more blocks of input data using AddData().
+//  Retrieve the hash using GetHash(). This can be done as many times as desired
+//  until the object is destructed. Once a hash is asked for, further AddData
+//  calls will be ignored. There is no way to reset object state (simply
+//  destroy the object and create another instead).
+
+#define SHA1_HASH_SIZE 20  // Number of bytes output by SHA-1
+
+typedef struct {
+        UInt32 magic_sha1;   // Magic value for A_SHA_CTX
+        UInt32 awaiting_data[16];
+                             // Data awaiting full 512-bit block.
+                             // Length (nbit_total[0] % 512) bits.
+                             // Unused part of buffer (at end) is zero
+        UInt32 partial_hash[5];
+                             // Hash through last full block
+        UInt32 nbit_total[2];       
+                             // Total length of message so far
+                             // (bits, mod 2^64)
+} SHA1_CTX;
+
+class SHA1Hash
+{
+private:
+    SHA1_CTX m_Context;
+    UInt8    m_Value[SHA1_HASH_SIZE];
+    bool     m_fFinalized;
+    
+    void SHA1Init(SHA1_CTX*);
+    void SHA1Update(SHA1_CTX*, const UInt8*, const UInt32);
+    void SHA1Final(SHA1_CTX*, UInt8* digest);
+
+public:
+    SHA1Hash();
+    void AddData(const UInt8 *pbData, UInt32 cbData);
+    UInt8 *GetHash();
+};
+
+#endif  // SHA1_H_
+
+
+
diff --git a/src/coreclr/src/nativeaot/Runtime/shash.h b/src/coreclr/src/nativeaot/Runtime/shash.h
new file mode 100644
index 0000000000000..65929f8d03d31
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/shash.h
@@ -0,0 +1,634 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// SHash is a templated closed chaining hash table of pointers.  It provides
+// for multiple entries under the same key, and also for deleting elements.
+
+// Synchronization:
+// Synchronization requirements depend on use.  There are several properties to take into account:
+//
+// - Lookups may be asynchronous with each other
+// - Lookups must be exclusive with Add operations 
+//    (@todo: this can be remedied by delaying destruction of old tables during reallocation, e.g. during GC)
+// - Remove operations may be asynchronous with Lookup/Add, unless elements are also deallocated. (In which 
+//    case full synchronization is required)
+
+
+// A SHash is templated by a class of TRAITS.  These traits define the various specifics of the
+// particular hash table.
+// The required traits are:
+//
+// element_t                                    Type of elements in the hash table.  These elements are stored
+//                                              by value in the hash table. Elements must look more or less 
+//                                              like primitives - they must support assignment relatively 
+//                                              efficiently.  There are 2 required sentinel values: 
+//                                              Null and Deleted (described below).  (Note that element_t is 
+//                                              very commonly a pointer type.)  
+//
+//                                              The key must be derivable from the element; if your
+//                                              table's keys are independent of the stored values, element_t
+//                                              should be a key/value pair.
+//                                              
+// key_t                                        Type of the lookup key.  The key is used for identity 
+//                                              comparison between elements, and also as a key for lookup. 
+//                                              This is also used by value and should support 
+//                                              efficient assignment.
+//
+// count_t                                      integral type for counts.  Typically inherited by default 
+//                                              Traits (count_t).
+//
+// static key_t GetKey(const element_t &e)      Get key from element.  Should be stable for a given e.
+// static bool Equals(key_t k1, key_t k2)       Compare 2 keys for equality.  Again, should be stable.
+// static count_t Hash(key_t k)                 Compute hash from a key.  For efficient operation, the hashes
+//                                              for a set of elements should have random uniform distribution.
+//
+// static element_t Null()                      Return the Null sentinel value.  May be inherited from 
+//                                              default traits if it can be assigned from 0.
+// static element_t Deleted()                   Return the Deleted sentinel value.  May be inherited from the
+//                                              default traits if it can be assigned from -1.
+// static bool IsNull(const ELEMENT &e)         Compare element with Null sentinel value. May be inherited from
+//                                              default traits if it can be assigned from 0.
+// static bool IsDeleted(const ELEMENT &e)      Compare element with Deleted sentinel value. May be inherited 
+//                                              from the default traits if it can be assigned from -1.
+// static void OnFailure(FailureType failureType) Called when a failure occurs during SHash operation
+//
+// s_growth_factor_numerator
+// s_growth_factor_denominator                  Factor to grow allocation (numerator/denominator).  
+//                                              Typically inherited from default traits (3/2)
+//
+// s_density_factor_numerator                   
+// s_density_factor_denominator                 Maximum occupied density of table before growth 
+//                                              occurs (num/denom).  Typically inherited (3/4).
+//
+// s_minimum_allocation                         Minimum table allocation count (size on first growth.)  It is 
+//                                              probably preferable to call Reallocate on initialization rather
+//                                              than override his from the default traits. (7)
+//
+// s_supports_remove                            Set to false for a slightly faster implementation that does not 
+//                                              support deletes. There is a downside to the s_supports_remove flag, 
+//                                              in that there may be more copies of the template instantiated through 
+//                                              the system as different variants are used.
+
+#ifndef __shash_h__
+#define __shash_h__
+
+// disable the "Conditional expression is constant" warning
+#pragma warning(push)
+#pragma warning(disable:4127)
+
+
+enum FailureType { ftAllocation, ftOverflow };
+
+// DefaultHashTraits provides defaults for seldomly customized values in traits classes. 
+
+template < typename ELEMENT, typename COUNT_T = UInt32 > 
+class DefaultSHashTraits
+{
+  public:
+    typedef COUNT_T count_t;
+    typedef ELEMENT element_t;
+    typedef DPTR(element_t) PTR_element_t;   // by default SHash is DAC-aware. For RS
+                                             // only SHash use NonDacAwareSHashTraits
+                                             // (which typedefs element_t* PTR_element_t)
+    static const count_t s_growth_factor_numerator = 3;
+    static const count_t s_growth_factor_denominator = 2;
+
+    static const count_t s_density_factor_numerator = 3;
+    static const count_t s_density_factor_denominator = 4;
+
+    static const count_t s_minimum_allocation = 7;
+
+    static const bool s_supports_remove = true;
+
+    static const ELEMENT Null() { return (const ELEMENT) 0; }
+    static const ELEMENT Deleted() { return (const ELEMENT) -1; }
+    static bool IsNull(const ELEMENT &e) { return e == (const ELEMENT) 0; }
+    static bool IsDeleted(const ELEMENT &e) { return e == (const ELEMENT) -1; }
+
+    static void OnFailure(FailureType /*ft*/) { }
+
+    // No defaults - must specify:
+    // 
+    // typedef key_t;
+    // static key_t GetKey(const element_t &i);
+    // static bool Equals(key_t k1, key_t k2);
+    // static count_t Hash(key_t k);
+};
+
+// Hash table class definition
+
+template <typename TRAITS>
+class SHash : public TRAITS
+{
+  private:
+    class Index;
+    friend class Index;
+
+    class KeyIndex;
+    friend class KeyIndex;
+    class Iterator;
+    class KeyIterator;
+
+  public:
+    // explicitly declare local typedefs for these traits types, otherwise 
+    // the compiler may get confused
+    typedef typename TRAITS::element_t element_t;
+    typedef typename TRAITS::PTR_element_t PTR_element_t;
+    typedef typename TRAITS::key_t key_t;
+    typedef typename TRAITS::count_t count_t;
+
+    // Constructor/destructor.  SHash tables always start out empty, with no
+    // allocation overhead.  Call Reallocate to prime with an initial size if
+    // desired.
+
+    SHash();
+    ~SHash();
+
+    // Lookup an element in the table by key.  Returns NULL if no element in the table
+    // has the given key.  Note that multiple entries for the same key may be stored - 
+    // this will return the first element added.  Use KeyIterator to find all elements
+    // with a given key.
+
+    element_t Lookup(key_t key) const;
+
+    // Pointer-based flavor of Lookup (allows efficient access to tables of structures)
+
+    const element_t* LookupPtr(key_t key) const;
+
+    // Add an element to the hash table.  This will never replace an element; multiple
+    // elements may be stored with the same key.
+    //
+    // Returns 'true' on success, 'false' on failure.
+
+    bool Add(const element_t &element);
+
+    // Add a new element to the hash table, if no element with the same key is already 
+    // there. Otherwise, it will replace the existing element. This has the effect of
+    // updating an element rather than adding a duplicate.
+    //
+    // Returns 'true' on success, 'false' on failure.
+
+    bool AddOrReplace(const element_t & element);
+
+    // Remove the first element matching the key from the hash table.  
+
+    void Remove(key_t key);
+
+    // Remove the specific element.
+
+    void Remove(Iterator& i);
+    void Remove(KeyIterator& i);
+
+    // Pointer-based flavor of Remove (allows efficient access to tables of structures)
+
+    void RemovePtr(element_t * element);
+
+    // Remove all elements in the hashtable
+
+    void RemoveAll();
+
+    // Begin and End pointers for iteration over entire table. 
+
+    Iterator Begin() const;
+    Iterator End() const;
+
+    // Begin and End pointers for iteration over all elements with a given key.
+
+    KeyIterator Begin(key_t key) const;
+    KeyIterator End(key_t key) const;
+
+    // Return the number of elements currently stored in the table
+
+    count_t GetCount() const; 
+
+    // Return the number of elements that the table is capable storing currently
+
+    count_t GetCapacity() const;
+
+    // Reallocates a hash table to a specific size.  The size must be big enough
+    // to hold all elements in the table appropriately.  
+    //
+    // Note that the actual table size must always be a prime number; the number
+    // passed in will be upward adjusted if necessary.
+    //
+    // Returns 'true' on success, 'false' on failure.
+
+    bool Reallocate(count_t newTableSize);
+
+    // See if it is OK to grow the hash table by one element.  If not, reallocate
+    // the hash table.
+    //
+    // Returns 'true' on success, 'false' on failure.
+
+    bool CheckGrowth();
+
+    // See if it is OK to grow the hash table by N elementsone element.  If not, reallocate
+    // the hash table.
+
+    bool CheckGrowth(count_t newElements);
+
+private:
+
+    // Resizes a hash table for growth.  The new size is computed based
+    // on the current population, growth factor, and maximum density factor.
+    //
+    // Returns 'true' on success, 'false' on failure.
+
+    bool Grow();
+
+    // Utility function to add a new element to the hash table.  Note that
+    // it is perfectly find for the element to be a duplicate - if so it
+    // is added an additional time. Returns true if a new empty spot was used;
+    // false if an existing deleted slot.
+
+    static bool Add(element_t *table, count_t tableSize, const element_t &element);
+
+    // Utility function to add a new element to the hash table, if no element with the same key
+    // is already there. Otherwise, it will replace the existing element. This has the effect of
+    // updating an element rather than adding a duplicate.
+
+    void AddOrReplace(element_t *table, count_t tableSize, const element_t &element);
+ 
+    // Utility function to find the first element with the given key in 
+    // the hash table.
+
+    static const element_t* Lookup(PTR_element_t table, count_t tableSize, key_t key);
+
+    // Utility function to remove the first element with the given key
+    // in the hash table.
+
+    void Remove(element_t *table, count_t tableSize, key_t key);
+
+    // Utility function to remove the specific element.
+
+    void RemoveElement(element_t *table, count_t tableSize, element_t *element);
+
+    //
+    // Enumerator, provides a template to produce an iterator on an existing class 
+    // with a single iteration variable.
+    //
+
+    template <typename SUBTYPE>
+    class Enumerator
+    {
+     private:
+        const SUBTYPE *This() const
+        {
+            return (const SUBTYPE *) this;
+        }
+
+        SUBTYPE *This()
+        {
+            return (SUBTYPE *)this;
+        }
+
+      public:
+
+        Enumerator()
+        {
+        }
+
+        const element_t &operator*() const
+        {
+            return This()->Get();
+        }
+        const element_t *operator->() const
+        {
+            return &(This()->Get());
+        }
+        SUBTYPE &operator++()
+        {
+            This()->Next();
+            return *This();
+        }
+        SUBTYPE operator++(int)
+        {
+            SUBTYPE i = *This();
+            This()->Next();
+            return i;
+        }
+        bool operator==(const SUBTYPE &i) const
+        {
+            return This()->Equal(i);
+        }
+        bool operator!=(const SUBTYPE &i) const
+        {
+            return !This()->Equal(i);
+        }
+    };
+
+    //
+    // Index for whole table iterator.  This is also the base for the keyed iterator.
+    //
+
+    class Index
+    {
+        friend class SHash;
+        friend class Iterator;
+        friend class Enumerator<Iterator>;
+
+        // The methods implementation has to be here for portability
+        // Some compilers won't compile the separate implementation in shash.inl
+      protected:
+
+        PTR_element_t m_table;
+        count_t m_tableSize;
+        count_t m_index;
+
+        Index(const SHash *hash, bool begin)
+        : m_table(hash->m_table),
+            m_tableSize(hash->m_tableSize),
+            m_index(begin ? 0 : m_tableSize)
+        {
+        }
+
+        const element_t &Get() const
+        {
+            return m_table[m_index];
+        }
+
+        void First()
+        {
+            if (m_index < m_tableSize)
+                if (TRAITS::IsNull(m_table[m_index]) || TRAITS::IsDeleted(m_table[m_index]))
+                    Next();
+        }
+
+        void Next()
+        {
+            if (m_index >= m_tableSize)
+                return;
+            
+            for (;;)
+            {
+                m_index++;
+                if (m_index >= m_tableSize)
+                    break;
+                if (!TRAITS::IsNull(m_table[m_index]) && !TRAITS::IsDeleted(m_table[m_index]))
+                    break;
+            }
+        }
+
+        bool Equal(const Index &i) const
+        { 
+            return i.m_index == m_index; 
+        }
+    };
+
+    class Iterator : public Index, public Enumerator<Iterator>
+    {
+        friend class SHash;
+
+      public:
+        Iterator(const SHash *hash, bool begin)
+          : Index(hash, begin)
+        {
+        }
+    };
+
+    //
+    // Index for iterating elements with a given key.  
+    // Note that the m_index field is artificially bumped to m_tableSize when the end
+    // of iteration is reached. This allows a canonical End iterator to be used.
+    //
+
+    class KeyIndex : public Index
+    {
+        friend class SHash;
+        friend class KeyIterator;
+        friend class Enumerator<KeyIterator>;
+
+        // The methods implementation has to be here for portability
+        // Some compilers won't compile the separate implementation in shash.inl
+      protected:
+        key_t       m_key;
+        count_t     m_increment;
+
+        KeyIndex(const SHash *hash, bool begin)
+        : Index(hash, begin),
+            m_increment(0)
+        {
+        }
+
+        void SetKey(key_t key)
+        {
+            if (m_tableSize > 0)
+            {
+                m_key = key;
+                count_t hash = Hash(key);
+
+                TRAITS::m_index = hash % m_tableSize;
+                m_increment = (hash % (m_tableSize-1)) + 1;
+
+                // Find first valid element
+                if (IsNull(m_table[TRAITS::m_index]))
+                    TRAITS::m_index = m_tableSize;
+                else if (IsDeleted(m_table[TRAITS::m_index])
+                        || !Equals(m_key, GetKey(m_table[TRAITS::m_index])))
+                    Next();
+            }
+        }
+
+        void Next()
+        {
+            while (true)
+            {
+                TRAITS::m_index += m_increment;
+                if (TRAITS::m_index >= m_tableSize)
+                    TRAITS::m_index -= m_tableSize;
+
+                if (IsNull(m_table[TRAITS::m_index]))
+                {
+                    TRAITS::m_index = m_tableSize;
+                    break;
+                }
+
+                if (!IsDeleted(m_table[TRAITS::m_index])
+                        && Equals(m_key, GetKey(m_table[TRAITS::m_index])))
+                {
+                    break;
+                }
+            }
+        }
+    };
+
+    class KeyIterator : public KeyIndex, public Enumerator<KeyIterator>
+    {
+        friend class SHash;
+
+      public:
+
+        operator Iterator &()
+        {
+            return *(Iterator*)this;
+        }
+
+        operator const Iterator &()
+        {
+            return *(const Iterator*)this;
+        }
+
+        KeyIterator(const SHash *hash, bool begin)
+          : KeyIndex(hash, begin)
+        {
+        }
+    };
+
+    // Test for prime number.
+    static bool IsPrime(count_t number);
+
+    // Find the next prime number >= the given value.  
+
+    static count_t NextPrime(count_t number);
+
+    // Instance members
+
+    PTR_element_t m_table;                // pointer to table
+    count_t       m_tableSize;            // allocated size of table
+    count_t       m_tableCount;           // number of elements in table
+    count_t       m_tableOccupied;        // number, includes deleted slots
+    count_t       m_tableMax;             // maximum occupied count before reallocating
+};
+
+// disables support for DAC marshaling. Useful for defining right-side only SHashes
+
+template <typename PARENT>
+class NonDacAwareSHashTraits : public PARENT
+{
+public:
+    typedef typename PARENT::element_t element_t;
+    typedef element_t * PTR_element_t;
+};
+
+// disables support for removing elements - produces slightly faster implementation
+
+template <typename PARENT>
+class NoRemoveSHashTraits : public PARENT
+{
+public:
+    // explicitly declare local typedefs for these traits types, otherwise 
+    // the compiler may get confused
+    typedef typename PARENT::element_t element_t;
+    typedef typename PARENT::count_t count_t;
+
+    static const bool s_supports_remove = false;
+    static const element_t Deleted() { UNREACHABLE(); }
+    static bool IsDeleted(const element_t &e) { UNREFERENCED_PARAMETER(e); return false; }
+};
+
+// PtrHashTraits is a template to provides useful defaults for pointer hash tables
+// It relies on methods GetKey and Hash defined on ELEMENT
+
+template <typename ELEMENT, typename KEY> 
+class PtrSHashTraits : public DefaultSHashTraits<ELEMENT *>
+{
+  public:
+
+    // explicitly declare local typedefs for these traits types, otherwise 
+    // the compiler may get confused
+    typedef DefaultSHashTraits<ELEMENT *> PARENT;
+    typedef typename PARENT::element_t element_t;
+    typedef typename PARENT::count_t count_t;
+
+    typedef KEY key_t;
+
+    static key_t GetKey(const element_t &e) 
+    { 
+        return e->GetKey(); 
+    }
+    static bool Equals(key_t k1, key_t k2) 
+    { 
+        return k1 == k2; 
+    }
+    static count_t Hash(key_t k) 
+    { 
+        return ELEMENT::Hash(k);
+    }
+};
+
+template <typename ELEMENT, typename KEY>
+class PtrSHash : public SHash< PtrSHashTraits<ELEMENT, KEY> >
+{
+};
+
+template <typename KEY, typename VALUE>
+class KeyValuePair {
+    KEY     key;
+    VALUE   value;
+
+public:
+    KeyValuePair()
+    {
+    }
+
+    KeyValuePair(const KEY& k, const VALUE& v)
+        : key(k), value(v)
+    {
+    }
+
+    KEY const & Key() const
+    {
+        return key;
+    }
+
+    VALUE const & Value() const
+    {
+        return value;
+    }
+};
+
+template <typename KEY, typename VALUE>
+class MapSHashTraits : public DefaultSHashTraits< KeyValuePair<KEY,VALUE> >
+{
+public:
+    // explicitly declare local typedefs for these traits types, otherwise 
+    // the compiler may get confused
+    typedef typename DefaultSHashTraits< KeyValuePair<KEY,VALUE> >::element_t element_t;
+    typedef typename DefaultSHashTraits< KeyValuePair<KEY,VALUE> >::count_t count_t;
+
+    typedef KEY key_t;
+
+    static key_t GetKey(element_t e)
+    {
+        return e.Key();
+    }
+    static bool Equals(key_t k1, key_t k2)
+    {
+        return k1 == k2;
+    }
+    static count_t Hash(key_t k)
+    {
+        return (count_t)(size_t)k;
+    }
+
+    static const element_t Null() { return element_t((KEY)0,(VALUE)0); }
+    static bool IsNull(const element_t &e) { return e.Key() == (KEY)0; }
+};
+
+template <typename KEY, typename VALUE>
+class MapSHash : public SHash< NoRemoveSHashTraits< MapSHashTraits <KEY, VALUE> > >
+{
+    typedef SHash< NoRemoveSHashTraits< MapSHashTraits <KEY, VALUE> > > PARENT;
+
+public:
+    void Add(KEY key, VALUE value)
+    {
+        PARENT::Add(KeyValuePair<KEY,VALUE>(key, value));
+    }
+
+    bool Lookup(KEY key, VALUE* pValue)
+    {
+        const KeyValuePair<KEY,VALUE> *pRet = PARENT::LookupPtr(key);
+        if (pRet == NULL)
+            return false;
+
+        *pValue = pRet->Value();
+        return true;
+    }
+};
+
+
+// restore "Conditional expression is constant" warning to previous value
+#pragma warning(pop)
+
+#endif // __shash_h__
diff --git a/src/coreclr/src/nativeaot/Runtime/shash.inl b/src/coreclr/src/nativeaot/Runtime/shash.inl
new file mode 100644
index 0000000000000..ab39f63955786
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/shash.inl
@@ -0,0 +1,470 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// disable the "Conditional expression is constant" warning
+#pragma warning(disable:4127)
+
+
+template <typename TRAITS>
+SHash<TRAITS>::SHash()
+  : m_table(nullptr),
+    m_tableSize(0),
+    m_tableCount(0),
+    m_tableOccupied(0),
+    m_tableMax(0)
+{
+    C_ASSERT(TRAITS::s_growth_factor_numerator > TRAITS::s_growth_factor_denominator);
+    C_ASSERT(TRAITS::s_density_factor_numerator < TRAITS::s_density_factor_denominator);
+}
+
+template <typename TRAITS>
+SHash<TRAITS>::~SHash()
+{
+    delete [] m_table;
+}
+
+template <typename TRAITS>
+typename SHash<TRAITS>::count_t SHash<TRAITS>::GetCount() const 
+{
+    return m_tableCount;
+}
+
+template <typename TRAITS>
+typename SHash<TRAITS>::count_t SHash<TRAITS>::GetCapacity() const 
+{
+    return m_tableMax;
+}
+
+template <typename TRAITS>
+typename SHash< TRAITS>::element_t SHash<TRAITS>::Lookup(key_t key) const
+{
+    const element_t *pRet = Lookup(m_table, m_tableSize, key);
+    return ((pRet != NULL) ? (*pRet) : TRAITS::Null());
+}
+
+template <typename TRAITS>
+const typename SHash< TRAITS>::element_t* SHash<TRAITS>::LookupPtr(key_t key) const
+{
+    return Lookup(m_table, m_tableSize, key);
+}
+
+template <typename TRAITS>
+bool SHash<TRAITS>::Add(const element_t &element)
+{
+    if (!CheckGrowth())
+        return false;
+
+    if (Add(m_table, m_tableSize, element))
+        m_tableOccupied++;
+    m_tableCount++;
+
+    return true;
+}
+
+template <typename TRAITS>
+bool SHash<TRAITS>::AddOrReplace(const element_t &element)
+{
+    if (!CheckGrowth())
+        return false;
+
+    AddOrReplace(m_table, m_tableSize, element);
+    return true;
+}
+
+template <typename TRAITS>
+void SHash<TRAITS>::Remove(key_t key)
+{
+    Remove(m_table, m_tableSize, key);
+}
+
+template <typename TRAITS>
+void SHash<TRAITS>::Remove(Iterator& i)
+{
+    RemoveElement(m_table, m_tableSize, (element_t*)&(*i));
+}
+
+template <typename TRAITS>
+void SHash<TRAITS>::Remove(KeyIterator& i)
+{
+    RemoveElement(m_table, m_tableSize, (element_t*)&(*i));
+}
+
+template <typename TRAITS>
+void SHash<TRAITS>::RemovePtr(element_t * p)
+{
+    RemoveElement(m_table, m_tableSize, p);
+}
+
+template <typename TRAITS>
+void SHash<TRAITS>::RemoveAll()
+{
+    delete [] m_table;
+
+    m_table = NULL;
+    m_tableSize = 0;
+    m_tableCount = 0;
+    m_tableOccupied = 0;
+    m_tableMax = 0;
+}
+
+template <typename TRAITS>
+typename SHash<TRAITS>::Iterator SHash<TRAITS>::Begin() const
+{
+    Iterator i(this, true);
+    i.First();
+    return i;
+}
+
+template <typename TRAITS>
+typename SHash<TRAITS>::Iterator SHash<TRAITS>::End() const
+{
+    return Iterator(this, false);
+}
+
+template <typename TRAITS>
+typename SHash<TRAITS>::KeyIterator SHash<TRAITS>::Begin(key_t key) const
+{
+    KeyIterator k(this, true);
+    k.SetKey(key);
+    return k;
+}
+
+template <typename TRAITS>
+typename SHash<TRAITS>::KeyIterator SHash<TRAITS>::End(key_t key) const
+{
+    return KeyIterator(this, false);
+}
+
+template <typename TRAITS>
+bool SHash<TRAITS>::CheckGrowth()
+{
+    if (m_tableOccupied == m_tableMax)
+    {
+        return Grow();
+    }
+        
+    return true;
+}
+
+template <typename TRAITS>
+bool SHash<TRAITS>::Grow()
+{
+    count_t newSize = (count_t) (m_tableCount 
+                                 * TRAITS::s_growth_factor_numerator / TRAITS::s_growth_factor_denominator
+                                 * TRAITS::s_density_factor_denominator / TRAITS::s_density_factor_numerator);
+    if (newSize < TRAITS::s_minimum_allocation)
+        newSize = TRAITS::s_minimum_allocation;
+
+    // handle potential overflow
+    if (newSize < m_tableCount)
+    {
+        TRAITS::OnFailure(ftOverflow);
+        return false;
+    }
+
+    return Reallocate(newSize);
+}
+
+template <typename TRAITS>
+bool SHash<TRAITS>::CheckGrowth(count_t newElements)
+{
+    count_t newCount = (m_tableCount + newElements);
+
+    // handle potential overflow
+    if (newCount < newElements)
+    {
+        TRAITS::OnFailure(ftOverflow);
+        return false;
+    }
+
+    // enough space in the table?
+    if (newCount < m_tableMax)
+        return true;
+
+    count_t newSize = (count_t) (newCount * TRAITS::s_density_factor_denominator / TRAITS::s_density_factor_numerator) + 1;
+
+    // handle potential overflow
+    if (newSize < newCount)
+    {
+        TRAITS::OnFailure(ftOverflow);
+        return false;
+    }
+
+    // accelerate the growth to avoid unnecessary rehashing
+    count_t newSize2 = (m_tableCount * TRAITS::s_growth_factor_numerator / TRAITS::s_growth_factor_denominator
+                                     * TRAITS::s_density_factor_denominator / TRAITS::s_density_factor_numerator);
+
+    if (newSize < newSize2)
+        newSize = newSize2;
+
+    if (newSize < TRAITS::s_minimum_allocation)
+        newSize = TRAITS::s_minimum_allocation;
+
+    return Reallocate(newSize);
+}
+
+template <typename TRAITS>
+bool SHash<TRAITS>::Reallocate(count_t newTableSize)
+{
+    ASSERT(newTableSize >= 
+                 (count_t) (GetCount() * TRAITS::s_density_factor_denominator / TRAITS::s_density_factor_numerator));
+
+    // Allocation size must be a prime number.  This is necessary so that hashes uniformly
+    // distribute to all indices, and so that chaining will visit all indices in the hash table.
+    newTableSize = NextPrime(newTableSize);
+    if (newTableSize == 0)
+    {
+        TRAITS::OnFailure(ftOverflow);
+        return false;
+    }
+
+    element_t *newTable = new (nothrow) element_t [newTableSize];
+    if (newTable == NULL)
+    {
+        TRAITS::OnFailure(ftAllocation);
+        return false;
+    }
+
+    element_t *p = newTable, *pEnd = newTable + newTableSize;
+    while (p < pEnd)
+    {
+        *p = TRAITS::Null();
+        p++;
+    }
+
+    // Move all entries over to new table.
+
+    for (Iterator i = Begin(), end = End(); i != end; i++)
+    {
+        const element_t & cur = (*i);
+        if (!TRAITS::IsNull(cur) && !TRAITS::IsDeleted(cur))
+            Add(newTable, newTableSize, cur);
+    }
+
+    // @todo:
+    // We might want to try to delay this cleanup to allow asynchronous readers
+
+    delete [] m_table;
+
+    m_table = PTR_element_t(newTable);
+    m_tableSize = newTableSize;
+    m_tableMax = (count_t) (newTableSize * TRAITS::s_density_factor_numerator / TRAITS::s_density_factor_denominator);
+    m_tableOccupied = m_tableCount;
+
+    return true;
+}
+
+template <typename TRAITS>
+const typename SHash<TRAITS>::element_t * SHash<TRAITS>::Lookup(PTR_element_t table, count_t tableSize, key_t key)
+{
+    if (tableSize == 0)
+        return NULL;
+
+    count_t hash = TRAITS::Hash(key);
+    count_t index = hash % tableSize; 
+    count_t increment = 0; // delay computation
+
+    while (true)
+    {
+        element_t& current = table[index];
+            
+        if (TRAITS::IsNull(current))
+            return NULL;
+
+        if (!TRAITS::IsDeleted(current)
+            && TRAITS::Equals(key, TRAITS::GetKey(current)))
+        {
+            return &current;
+        }
+
+        if (increment == 0)
+            increment = (hash % (tableSize-1)) + 1; 
+
+        index += increment;
+        if (index >= tableSize)
+            index -= tableSize;
+    }
+}
+
+template <typename TRAITS>
+bool SHash<TRAITS>::Add(element_t *table, count_t tableSize, const element_t &element)
+{
+    key_t key = TRAITS::GetKey(element);
+
+    count_t hash = TRAITS::Hash(key);
+    count_t index = hash % tableSize; 
+    count_t increment = 0; // delay computation
+
+    while (true)
+    {
+        element_t& current = table[index];
+            
+        if (TRAITS::IsNull(current))
+        {
+            table[index] = element;
+            return true;
+        }
+
+        if (TRAITS::IsDeleted(current))
+        {
+            table[index] = element;
+            return false;
+        }
+
+        if (increment == 0)
+            increment = (hash % (tableSize-1)) + 1; 
+
+        index += increment;
+        if (index >= tableSize)
+            index -= tableSize;
+    }
+}
+
+template <typename TRAITS>
+void SHash<TRAITS>::AddOrReplace(element_t *table, count_t tableSize, const element_t &element)
+{
+    ASSERT(!TRAITS::s_supports_remove);        
+
+    key_t key = TRAITS::GetKey(element);
+
+    count_t hash = TRAITS::Hash(key);
+    count_t index = hash % tableSize; 
+    count_t increment = 0; // delay computation
+
+    while (true)
+    {
+        element_t& current = table[index];
+        ASSERT(!TRAITS::IsDeleted(current));    
+ 
+        if (TRAITS::IsNull(current))
+        {
+            table[index] = element;
+            m_tableCount++;
+            m_tableOccupied++;
+            return;
+        }
+        else if (TRAITS::Equals(key, TRAITS::GetKey(current)))
+        {
+            table[index] = element;
+            return;
+        }
+
+        if (increment == 0)
+            increment = (hash % (tableSize-1)) + 1; 
+
+        index += increment;
+        if (index >= tableSize)
+            index -= tableSize;
+    }
+}
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4702) // Workaround bogus unreachable code warning
+#endif
+template <typename TRAITS>
+void SHash<TRAITS>::Remove(element_t *table, count_t tableSize, key_t key)
+{
+    ASSERT(TRAITS::s_supports_remove);
+    ASSERT(Lookup(table, tableSize, key) != NULL);
+
+    count_t hash = TRAITS::Hash(key);
+    count_t index = hash % tableSize; 
+    count_t increment = 0; // delay computation
+
+    while (true)
+    {
+        element_t& current = table[index];
+            
+        if (TRAITS::IsNull(current))
+            return;
+
+        if (!TRAITS::IsDeleted(current)
+            && TRAITS::Equals(key, TRAITS::GetKey(current)))
+        {
+            table[index] = TRAITS::Deleted();
+      	    m_tableCount--;
+            return;
+        }
+
+        if (increment == 0)
+            increment = (hash % (tableSize-1)) + 1; 
+
+        index += increment;
+        if (index >= tableSize)
+            index -= tableSize;
+    }
+}
+#ifdef _MSC_VER
+#pragma warning (default: 4702)
+#endif
+
+template <typename TRAITS>
+void SHash<TRAITS>::RemoveElement(element_t *table, count_t tableSize, element_t *element)
+{
+    ASSERT(TRAITS::s_supports_remove);
+    ASSERT(table <= element && element < table + tableSize);
+    ASSERT(!TRAITS::IsNull(*element) && !TRAITS::IsDeleted(*element));
+
+    *element = TRAITS::Deleted();
+    m_tableCount--;
+}
+
+template <typename TRAITS>
+bool SHash<TRAITS>::IsPrime(count_t number)
+{
+    // This is a very low-tech check for primality, which doesn't scale very well.  
+    // There are more efficient tests if this proves to be burdensome for larger
+    // tables.
+
+    if ((number&1) == 0)
+        return false;
+
+    count_t factor = 3;
+    while (factor * factor <= number)
+    {
+        if ((number % factor) == 0)
+            return false;
+        factor += 2;
+    }
+
+    return true;
+}
+
+namespace
+{
+    const UInt32 g_shash_primes[] = {
+        11,17,23,29,37,47,59,71,89,107,131,163,197,239,293,353,431,521,631,761,919,
+        1103,1327,1597,1931,2333,2801,3371,4049,4861,5839,7013,8419,10103,12143,14591,
+        17519,21023,25229,30293,36353,43627,52361,62851,75431,90523, 108631, 130363, 
+        156437, 187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403,
+        968897, 1162687, 1395263, 1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 
+        4999559, 5999471, 7199369 };
+}
+
+
+// Returns a prime larger than 'number' or 0, in case of overflow
+template <typename TRAITS>
+typename SHash<TRAITS>::count_t SHash<TRAITS>::NextPrime(typename SHash<TRAITS>::count_t number)
+{
+    for (int i = 0; i < (int) (sizeof(g_shash_primes) / sizeof(g_shash_primes[0])); i++)
+    {
+        if (g_shash_primes[i] >= number)
+            return (typename SHash<TRAITS>::count_t)(g_shash_primes[i]);
+    }
+
+    if ((number&1) == 0)
+        number++;
+
+    while (number != 1)
+    {
+        if (IsPrime(number))
+            return number;
+        number += 2;
+    }
+
+    return 0;
+}
+
+// restore "Conditional expression is constant" warning to default value
+#pragma warning(default:4127)
+
diff --git a/src/coreclr/src/nativeaot/Runtime/slist.h b/src/coreclr/src/nativeaot/Runtime/slist.h
new file mode 100644
index 0000000000000..4525ba3b586ef
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/slist.h
@@ -0,0 +1,124 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef __slist_h__
+#define __slist_h__
+
+#include "forward_declarations.h"
+
+MSVC_SAVE_WARNING_STATE()
+MSVC_DISABLE_WARNING(4127)  // conditional expression is constant -- it's intentionally constant
+
+struct DoNothingFailFastPolicy
+{
+    static inline void FailFast();
+};
+
+template <typename T, typename FailFastPolicy = DoNothingFailFastPolicy>
+struct DefaultSListTraits : public FailFastPolicy
+{
+    typedef DPTR(T) PTR_T;
+    typedef DPTR(PTR_T) PTR_PTR_T;
+
+    static inline PTR_PTR_T GetNextPtr(PTR_T pT);
+    static inline bool Equals(PTR_T pA, PTR_T pB);
+};
+
+//------------------------------------------------------------------------------------------------------------
+// class SList, to use a singly linked list.
+//
+// To use, either expose a field DPTR(T) m_pNext by adding DefaultSListTraits as a friend class, or
+// define a new Traits class derived from DefaultSListTraits<T> and override the GetNextPtr function.
+//
+// SList supports lockless head insert and Remove methods. However, PushHeadInterlocked and
+// PopHeadInterlocked must be used very carefully, as the rest of the mutating methods are not
+// interlocked. In general, code must be careful to ensure that it will never use more than one
+// synchronization mechanism at any given time to control access to a resource, and this is no
+// exception. In particular, if synchronized access to other SList operations (such as FindAndRemove)
+// are required, than a separate synchronization mechanism (such as a critical section) must be used.
+//------------------------------------------------------------------------------------------------------------
+template <typename T, typename Traits = DefaultSListTraits<T> >
+class SList : public Traits
+{
+protected:
+    typedef typename Traits::PTR_T PTR_T;
+    typedef typename Traits::PTR_PTR_T PTR_PTR_T;
+
+public:
+    SList();
+
+    // Returns true if there are no entries in the list.
+    bool IsEmpty();
+
+    // Returns the value of (but does not remove) the first element in the list.
+    PTR_T GetHead();
+
+    // Inserts pItem at the front of the list. See class header for more information.
+    void PushHead(PTR_T pItem);
+    void PushHeadInterlocked(PTR_T pItem);
+
+    // Removes and returns the first entry in the list. See class header for more information.
+    PTR_T PopHead();
+
+    class Iterator
+    {
+        friend SList<T, Traits>;
+
+      public:
+        Iterator(Iterator const &it);
+        Iterator& operator=(Iterator const &it);
+
+        PTR_T operator->();
+        PTR_T operator*();
+
+        Iterator & operator++();
+        Iterator operator++(int);
+
+        bool operator==(Iterator const &rhs);
+        bool operator==(PTR_T pT);
+        bool operator!=(Iterator const &rhs);
+
+      private:
+        Iterator(PTR_PTR_T ppItem);
+
+        Iterator Insert(PTR_T pItem);
+        Iterator Remove();
+
+        static Iterator End();
+        PTR_PTR_T m_ppCur;
+#ifdef _DEBUG
+        mutable bool m_fIsValid;
+#endif
+
+        PTR_T _Value() const;
+
+        enum e_ValidateOperation
+        {
+            e_CanCompare,   // Will assert in debug if m_fIsValid == false.
+            e_CanInsert,    // i.e., not the fake End() value of m_ppCur == NULL
+            e_HasValue,     // i.e., m_ppCur != NULL && *m_ppCur != NULL
+        };
+        void _Validate(e_ValidateOperation op) const;
+    };
+
+    Iterator Begin();
+    Iterator End();
+
+    // Returns iterator to first list item matching pItem
+    Iterator FindFirst(PTR_T pItem);
+    bool     RemoveFirst(PTR_T pItem);
+
+    // Inserts pItem *before* it. Returns iterator pointing to inserted item.
+    Iterator Insert(Iterator & it, PTR_T pItem);
+
+    // Removes item pointed to by it from the list. Returns iterator pointing
+    // to following item.
+    Iterator Remove(Iterator & it);
+
+private:
+    PTR_T m_pHead;
+};
+
+MSVC_RESTORE_WARNING_STATE()
+
+#endif // __slist_h__
diff --git a/src/coreclr/src/nativeaot/Runtime/slist.inl b/src/coreclr/src/nativeaot/Runtime/slist.inl
new file mode 100644
index 0000000000000..dc437fe1c9ba9
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/slist.inl
@@ -0,0 +1,361 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+MSVC_SAVE_WARNING_STATE()
+MSVC_DISABLE_WARNING(4127)  // conditional expression is constant --
+                            // while (true) loops and compile time template constants cause this.
+
+
+//-------------------------------------------------------------------------------------------------
+namespace rh { namespace std
+{
+    // Specialize rh::std::find for SList iterators so that it will use _Traits::Equals.
+    template<class _Tx, class _Traits, class _Ty>
+    inline
+    typename SList<_Tx, _Traits>::Iterator find(
+        typename SList<_Tx, _Traits>::Iterator _First,
+        typename SList<_Tx, _Traits>::Iterator _Last,
+        const _Ty& _Val)
+    {   // find first matching _Val
+        for (; _First != _Last; ++_First)
+            if (_Traits::Equals(*_First, _Val))
+                break;
+        return (_First);
+    }
+} // namespace std
+} // namespace rh
+
+//-------------------------------------------------------------------------------------------------
+inline
+void DoNothingFailFastPolicy::FailFast()
+{
+    // Intentionally a no-op.
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename FailFastPolicy>
+inline
+typename DefaultSListTraits<T, FailFastPolicy>::PTR_PTR_T DefaultSListTraits<T, FailFastPolicy>::GetNextPtr(
+    PTR_T pT)
+{
+    ASSERT(pT != NULL);
+    return dac_cast<PTR_PTR_T>(dac_cast<TADDR>(pT) + offsetof(T, m_pNext));
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename FailFastPolicy>
+inline
+bool DefaultSListTraits<T, FailFastPolicy>::Equals(
+    PTR_T pA,
+    PTR_T pB)
+{   // Default is pointer comparison
+    return pA == pB;
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+SList<T, Traits>::SList()
+    : m_pHead(NULL)
+{
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+bool SList<T, Traits>::IsEmpty()
+{
+    return Begin() == End();
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::PTR_T SList<T, Traits>::GetHead()
+{
+    return m_pHead;
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+void SList<T, Traits>::PushHead(
+    PTR_T pItem)
+{
+    NO_DAC();
+    Begin().Insert(pItem);
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+void SList<T, Traits>::PushHeadInterlocked(
+    PTR_T pItem)
+{
+    NO_DAC();
+    ASSERT(pItem != NULL);
+    ASSERT(IS_ALIGNED(&m_pHead, sizeof(void*)));
+
+    while (true)
+    {
+        *Traits::GetNextPtr(pItem) = *reinterpret_cast<T * volatile *>(&m_pHead);
+        if (PalInterlockedCompareExchangePointer(
+                reinterpret_cast<void * volatile *>(&m_pHead),
+                reinterpret_cast<void *>(pItem),
+                reinterpret_cast<void *>(*Traits::GetNextPtr(pItem))) == reinterpret_cast<void *>(*Traits::GetNextPtr(pItem)))
+        {
+            break;
+        }
+    }
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::PTR_T SList<T, Traits>::PopHead()
+{
+    NO_DAC();
+    PTR_T pRet = *Begin();
+    Begin().Remove();
+    return pRet;
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+SList<T, Traits>::Iterator::Iterator(
+    Iterator const &it)
+    : m_ppCur(it.m_ppCur)
+#ifdef _DEBUG
+      , m_fIsValid(it.m_fIsValid)
+#endif
+{
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+SList<T, Traits>::Iterator::Iterator(
+    PTR_PTR_T ppItem)
+    : m_ppCur(ppItem)
+#ifdef _DEBUG
+      , m_fIsValid(true)
+#endif
+{
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::Iterator& SList<T, Traits>::Iterator::operator=(
+    Iterator const &it)
+{
+    m_ppCur = it.m_ppCur;
+#ifdef _DEBUG
+    m_fIsValid = it.m_fIsValid;
+#endif
+    return *this;
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::PTR_T SList<T, Traits>::Iterator::operator->()
+{
+    _Validate(e_HasValue);
+    return _Value();
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::PTR_T SList<T, Traits>::Iterator::operator*()
+{
+    _Validate(e_HasValue);
+    return _Value();
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::Iterator & SList<T, Traits>::Iterator::operator++()
+{
+    _Validate(e_HasValue); // Having a value means we're not at the end.
+    m_ppCur = Traits::GetNextPtr(_Value());
+    return *this;
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::Iterator SList<T, Traits>::Iterator::operator++(
+    int)
+{
+    _Validate(e_HasValue); // Having a value means we're not at the end.
+    PTR_PTR_T ppRet = m_ppCur;
+    ++(*this);
+    return Iterator(ppRet);
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+bool SList<T, Traits>::Iterator::operator==(
+    Iterator const &rhs)
+{
+    _Validate(e_CanCompare);
+    rhs._Validate(e_CanCompare);
+    return Traits::Equals(_Value(), rhs._Value());
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+bool SList<T, Traits>::Iterator::operator==(
+    PTR_T pT)
+{
+    _Validate(e_CanCompare);
+    return Traits::Equals(_Value(), pT);
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+bool SList<T, Traits>::Iterator::operator!=(
+    Iterator const &rhs)
+{
+    return !operator==(rhs);
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline /*static*/
+typename SList<T, Traits>::Iterator SList<T, Traits>::Iterator::End()
+{
+    return Iterator(NULL);
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::Iterator SList<T, Traits>::Iterator::Insert(
+    PTR_T pItem)
+{
+    NO_DAC();
+    _Validate(e_CanInsert);
+    *Traits::GetNextPtr(pItem) = *m_ppCur;
+    *m_ppCur = pItem;
+    Iterator itRet(m_ppCur);
+    ++(*this);
+    return itRet;
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::Iterator SList<T, Traits>::Iterator::Remove()
+{
+    NO_DAC();
+    _Validate(e_HasValue);
+    *m_ppCur = *Traits::GetNextPtr(*m_ppCur);
+    PTR_PTR_T ppRet = m_ppCur;
+    // Set it to End, so that subsequent misuse of this iterator will
+    // result in an AV rather than possible memory corruption.
+    *this = End();
+    return Iterator(ppRet);
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::PTR_T SList<T, Traits>::Iterator::_Value() const
+{
+    ASSERT(m_fIsValid);
+    return dac_cast<PTR_T>(m_ppCur == NULL ? NULL : *m_ppCur);
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+void SList<T, Traits>::Iterator::_Validate(e_ValidateOperation op) const
+{
+    ASSERT(m_fIsValid);
+    ASSERT(op == e_CanCompare || op == e_CanInsert || op == e_HasValue);
+
+    if ((op != e_CanCompare && m_ppCur == NULL) ||
+        (op == e_HasValue && *m_ppCur == NULL))
+    {
+        // NOTE: Default of DoNothingFailFastPolicy is a no-op, and so this function will be
+        // eliminated in retail builds. This is ok, as the subsequent operation will cause
+        // an AV, which will itself trigger a FailFast. Provide a different policy to get
+        // different behavior.
+        ASSERT_MSG(false, "Invalid SList::Iterator use.");
+        Traits::FailFast();
+#ifdef _DEBUG
+        m_fIsValid = false;
+#endif
+    }
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::Iterator SList<T, Traits>::Begin()
+{
+    typedef SList<T, Traits> T_THIS;
+    return Iterator(dac_cast<PTR_PTR_T>(
+        dac_cast<TADDR>(this) + offsetof(T_THIS, m_pHead)));
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::Iterator SList<T, Traits>::End()
+{
+    return Iterator::End();
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::Iterator SList<T, Traits>::FindFirst(PTR_T pItem)
+{
+    return rh::std::find(Begin(), End(), pItem);
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+bool SList<T, Traits>::RemoveFirst(PTR_T pItem)
+{
+    NO_DAC();
+    Iterator it = FindFirst(pItem);
+    if (it != End())
+    {
+        it.Remove();
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::Iterator SList<T, Traits>::Insert(Iterator & it, PTR_T pItem)
+{
+    return it.Insert(pItem);
+}
+
+//-------------------------------------------------------------------------------------------------
+template <typename T, typename Traits>
+inline
+typename SList<T, Traits>::Iterator SList<T, Traits>::Remove(Iterator & it)
+{
+    return it.Remove();
+}
+
+
+MSVC_RESTORE_WARNING_STATE()
+
diff --git a/src/coreclr/src/nativeaot/Runtime/startup.cpp b/src/coreclr/src/nativeaot/Runtime/startup.cpp
new file mode 100644
index 0000000000000..4ed81423af950
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/startup.cpp
@@ -0,0 +1,463 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "holder.h"
+#include "Crst.h"
+#include "event.h"
+#include "RWLock.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "RuntimeInstance.h"
+#include "rhbinder.h"
+#include "CachedInterfaceDispatch.h"
+#include "RhConfig.h"
+#include "stressLog.h"
+#include "RestrictedCallouts.h"
+#include "yieldprocessornormalized.h"
+
+#ifndef DACCESS_COMPILE
+
+#ifdef PROFILE_STARTUP
+unsigned __int64 g_startupTimelineEvents[NUM_STARTUP_TIMELINE_EVENTS] = { 0 };
+#endif // PROFILE_STARTUP
+
+#ifdef TARGET_UNIX
+Int32 RhpHardwareExceptionHandler(UIntNative faultCode, UIntNative faultAddress, PAL_LIMITED_CONTEXT* palContext, UIntNative* arg0Reg, UIntNative* arg1Reg);
+#else
+Int32 __stdcall RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs);
+#endif
+
+static void CheckForPalFallback();
+static bool DetectCPUFeatures();
+
+extern RhConfig * g_pRhConfig;
+
+EXTERN_C bool g_fHasFastFxsave = false;
+
+CrstStatic g_CastCacheLock;
+CrstStatic g_ThunkPoolLock;
+
+#if defined(HOST_X86) || defined(HOST_AMD64)
+// This field is inspected from the generated code to determine what intrinsics are available.
+EXTERN_C int g_cpuFeatures = 0;
+// This field is defined in the generated code and sets the ISA expectations.
+EXTERN_C int g_requiredCpuFeatures;
+#endif
+
+static bool InitDLL(HANDLE hPalInstance)
+{
+    CheckForPalFallback();
+
+#ifdef FEATURE_CACHED_INTERFACE_DISPATCH
+    //
+    // Initialize interface dispatch.
+    //
+    if (!InitializeInterfaceDispatch())
+        return false;
+#endif
+
+    //
+    // Initialize support for registering GC and HandleTable callouts.
+    //
+    if (!RestrictedCallouts::Initialize())
+        return false;
+
+    //
+    // Initialize RuntimeInstance state
+    //
+    if (!RuntimeInstance::Initialize(hPalInstance))
+        return false;
+
+    // Note: The global exception handler uses RuntimeInstance
+#if !defined(APP_LOCAL_RUNTIME) && !defined(USE_PORTABLE_HELPERS)
+#ifndef TARGET_UNIX
+    PalAddVectoredExceptionHandler(1, RhpVectoredExceptionHandler);
+#else
+    PalSetHardwareExceptionHandler(RhpHardwareExceptionHandler);
+#endif
+#endif // !APP_LOCAL_RUNTIME && !USE_PORTABLE_HELPERS
+
+    InitializeYieldProcessorNormalizedCrst();
+
+    STARTUP_TIMELINE_EVENT(NONGC_INIT_COMPLETE);
+
+    if (!RedhawkGCInterface::InitializeSubsystems())
+        return false;
+
+    STARTUP_TIMELINE_EVENT(GC_INIT_COMPLETE);
+
+#ifdef STRESS_LOG
+    UInt32 dwTotalStressLogSize = g_pRhConfig->GetTotalStressLogSize();
+    UInt32 dwStressLogLevel = g_pRhConfig->GetStressLogLevel();
+
+    unsigned facility = (unsigned)LF_ALL;
+    unsigned dwPerThreadChunks = (dwTotalStressLogSize / 24) / STRESSLOG_CHUNK_SIZE;
+    if (dwTotalStressLogSize != 0)
+    {
+        StressLog::Initialize(facility, dwStressLogLevel, 
+                              dwPerThreadChunks * STRESSLOG_CHUNK_SIZE, 
+                              (unsigned)dwTotalStressLogSize, hPalInstance);
+    }
+#endif // STRESS_LOG
+
+#ifndef USE_PORTABLE_HELPERS
+    if (!DetectCPUFeatures())
+        return false;
+#endif
+
+    if (!g_CastCacheLock.InitNoThrow(CrstType::CrstCastCache))
+        return false;
+
+    if (!g_ThunkPoolLock.InitNoThrow(CrstType::CrstCastCache))
+        return false;
+
+    return true;
+}
+
+static void CheckForPalFallback()
+{
+#ifdef _DEBUG
+    UInt32 disallowSetting = g_pRhConfig->GetDisallowRuntimeServicesFallback();
+    if (disallowSetting == 0)
+        return;
+
+    // The fallback provider doesn't implement write watch, so we check for the write watch capability as a 
+    // proxy for whether or not we're using the fallback provider since we don't have direct access to this 
+    // information from here.
+
+    if (disallowSetting == 1)
+    {
+        // If RH_DisallowRuntimeServicesFallback is set to 1, we want to fail fast if we discover that we're 
+        // running against the fallback provider.  
+        if (!PalHasCapability(WriteWatchCapability))
+            RhFailFast();
+    }
+    else if (disallowSetting == 2)
+    {
+        // If RH_DisallowRuntimeServicesFallback is set to 2, we want to fail fast if we discover that we're 
+        // NOT running against the fallback provider.  
+        if (PalHasCapability(WriteWatchCapability))
+            RhFailFast();
+    }
+#endif // _DEBUG
+}
+
+#ifndef USE_PORTABLE_HELPERS
+// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.cs
+enum XArchIntrinsicConstants
+{
+    XArchIntrinsicConstants_Aes = 0x0001,
+    XArchIntrinsicConstants_Pclmulqdq = 0x0002,
+    XArchIntrinsicConstants_Sse3 = 0x0004,
+    XArchIntrinsicConstants_Ssse3 = 0x0008,
+    XArchIntrinsicConstants_Sse41 = 0x0010,
+    XArchIntrinsicConstants_Sse42 = 0x0020,
+    XArchIntrinsicConstants_Popcnt = 0x0040,
+    XArchIntrinsicConstants_Avx = 0x0080,
+    XArchIntrinsicConstants_Fma = 0x0100,
+    XArchIntrinsicConstants_Avx2 = 0x0200,
+    XArchIntrinsicConstants_Bmi1 = 0x0400,
+    XArchIntrinsicConstants_Bmi2 = 0x0800,
+    XArchIntrinsicConstants_Lzcnt = 0x1000,
+};
+
+bool DetectCPUFeatures()
+{
+#if defined(HOST_X86) || defined(HOST_AMD64)
+    
+    unsigned char buffer[16];
+
+#ifdef HOST_AMD64
+    // AMD has a "fast" mode for fxsave/fxrstor, which omits the saving of xmm registers.  The OS will enable this mode
+    // if it is supported.  So if we continue to use fxsave/fxrstor, we must manually save/restore the xmm registers.
+    // fxsr_opt is bit 25 of EDX
+    getextcpuid(0, 0x80000001, buffer);
+    if ((buffer[15] & 0x02) != 0)
+        g_fHasFastFxsave = true;
+#endif
+
+    uint32_t maxCpuId = getcpuid(0, buffer);
+
+    if (maxCpuId >= 1)
+    {
+        // getcpuid executes cpuid with eax set to its first argument, and ecx cleared.
+        // It returns the resulting eax in buffer[0-3], ebx in buffer[4-7], ecx in buffer[8-11],
+        // and edx in buffer[12-15].
+
+        (void)getcpuid(1, buffer);
+
+        // If SSE/SSE2 is not enabled, there is no point in checking the rest.
+        //   SSE  is bit 25 of EDX   (buffer[15] & 0x02)
+        //   SSE2 is bit 26 of EDX   (buffer[15] & 0x04)
+        if ((buffer[15] & 0x06) == 0x06)                                    // SSE & SSE2
+        {
+            if ((buffer[11] & 0x02) != 0)                                   // AESNI
+            {
+                g_cpuFeatures |= XArchIntrinsicConstants_Aes;
+            }
+
+            if ((buffer[8] & 0x02) != 0)                                    // PCLMULQDQ
+            {
+                g_cpuFeatures |= XArchIntrinsicConstants_Pclmulqdq;
+            }
+
+            if ((buffer[8] & 0x01) != 0)                                    // SSE3
+            {
+                g_cpuFeatures |= XArchIntrinsicConstants_Sse3;
+
+                if ((buffer[9] & 0x02) != 0)                                // SSSE3
+                {
+                    g_cpuFeatures |= XArchIntrinsicConstants_Ssse3;
+
+                    if ((buffer[10] & 0x08) != 0)                           // SSE4.1
+                    {
+                        g_cpuFeatures |= XArchIntrinsicConstants_Sse41;
+
+                        if ((buffer[10] & 0x10) != 0)                       // SSE4.2
+                        {
+                            g_cpuFeatures |= XArchIntrinsicConstants_Sse42;
+
+                            if ((buffer[10] & 0x80) != 0)                   // POPCNT
+                            {
+                                g_cpuFeatures |= XArchIntrinsicConstants_Popcnt;
+                            }
+
+                            if ((buffer[11] & 0x18) == 0x18)                // AVX & OSXSAVE
+                            {
+                                if (PalIsAvxEnabled() && (xmmYmmStateSupport() == 1))
+                                {
+                                    g_cpuFeatures |= XArchIntrinsicConstants_Avx;
+
+                                    if ((buffer[9] & 0x10) != 0)            // FMA
+                                    {
+                                        g_cpuFeatures |= XArchIntrinsicConstants_Fma;
+                                    }
+
+                                    if (maxCpuId >= 0x07)
+                                    {
+                                        (void)getextcpuid(0, 0x07, buffer);
+
+                                        if ((buffer[4] & 0x20) != 0)        // AVX2
+                                        {
+                                            g_cpuFeatures |= XArchIntrinsicConstants_Avx2;
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        if (maxCpuId >= 0x07)
+        {
+            (void)getextcpuid(0, 0x07, buffer);
+
+            if ((buffer[4] & 0x08) != 0)            // BMI1
+            {
+                g_cpuFeatures |= XArchIntrinsicConstants_Bmi1;
+            }
+
+            if ((buffer[5] & 0x01) != 0)            // BMI2
+            {
+                g_cpuFeatures |= XArchIntrinsicConstants_Bmi2;
+            }
+        }
+    }
+
+    uint32_t maxCpuIdEx = getcpuid(0x80000000, buffer);
+    
+    if (maxCpuIdEx >= 0x80000001)
+    {
+        // getcpuid executes cpuid with eax set to its first argument, and ecx cleared.
+        // It returns the resulting eax in buffer[0-3], ebx in buffer[4-7], ecx in buffer[8-11],
+        // and edx in buffer[12-15].
+
+        (void)getcpuid(0x80000001, buffer);
+
+        if ((buffer[8] & 0x20) != 0)            // LZCNT
+        {
+            g_cpuFeatures |= XArchIntrinsicConstants_Lzcnt;
+        }
+    }
+
+    if ((g_cpuFeatures & g_requiredCpuFeatures) != g_requiredCpuFeatures)
+    {
+        return false;
+    }
+#endif // HOST_X86 || HOST_AMD64
+
+    return true;
+}
+#endif // !USE_PORTABLE_HELPERS
+
+#ifdef PROFILE_STARTUP
+#define STD_OUTPUT_HANDLE ((UInt32)-11)
+
+struct RegisterModuleTrace
+{
+    LARGE_INTEGER Begin;
+    LARGE_INTEGER End;
+};
+
+const int NUM_REGISTER_MODULE_TRACES = 16;
+int g_registerModuleCount = 0;
+
+RegisterModuleTrace g_registerModuleTraces[NUM_REGISTER_MODULE_TRACES] = { 0 };
+
+static void AppendInt64(char * pBuffer, UInt32* pLen, UInt64 value)
+{
+    char localBuffer[20];
+    int cch = 0;
+
+    do
+    {
+        localBuffer[cch++] = '0' + (value % 10);
+        value = value / 10;
+    } while (value);
+
+    for (int i = 0; i < cch; i++)
+    {
+        pBuffer[(*pLen)++] = localBuffer[cch - i - 1];
+    }
+
+    pBuffer[(*pLen)++] = ',';
+    pBuffer[(*pLen)++] = ' ';
+}
+#endif // PROFILE_STARTUP
+
+static void UninitDLL()
+{
+#ifdef PROFILE_STARTUP
+    char buffer[1024];
+
+    UInt32 len = 0;
+
+    AppendInt64(buffer, &len, g_startupTimelineEvents[PROCESS_ATTACH_BEGIN]);
+    AppendInt64(buffer, &len, g_startupTimelineEvents[NONGC_INIT_COMPLETE]);
+    AppendInt64(buffer, &len, g_startupTimelineEvents[GC_INIT_COMPLETE]);
+    AppendInt64(buffer, &len, g_startupTimelineEvents[PROCESS_ATTACH_COMPLETE]);
+
+    for (int i = 0; i < g_registerModuleCount; i++)
+    {
+        AppendInt64(buffer, &len, g_registerModuleTraces[i].Begin.QuadPart);
+        AppendInt64(buffer, &len, g_registerModuleTraces[i].End.QuadPart);
+    }
+
+    buffer[len++] = '\n';
+
+    fwrite(buffer, len, 1, stdout);
+#endif // PROFILE_STARTUP
+}
+
+volatile bool g_processShutdownHasStarted = false;
+
+static void DllThreadDetach()
+{
+    // BEWARE: loader lock is held here!
+
+    // Should have already received a call to FiberDetach for this thread's "home" fiber.
+    Thread* pCurrentThread = ThreadStore::GetCurrentThreadIfAvailable();
+    if (pCurrentThread != NULL && !pCurrentThread->IsDetached())
+    {
+        // Once shutdown starts, RuntimeThreadShutdown callbacks are ignored, implying that
+        // it is no longer guaranteed that exiting threads will be detached.
+        if (!g_processShutdownHasStarted)
+        {
+            ASSERT_UNCONDITIONALLY("Detaching thread whose home fiber has not been detached");
+            RhFailFast();
+        }
+    }
+}
+
+void RuntimeThreadShutdown(void* thread)
+{
+    // Note: loader lock is normally *not* held here!
+    // The one exception is that the loader lock may be held during the thread shutdown callback
+    // that is made for the single thread that runs the final stages of orderly process
+    // shutdown (i.e., the thread that delivers the DLL_PROCESS_DETACH notifications when the
+    // process is being torn down via an ExitProcess call).
+
+    UNREFERENCED_PARAMETER(thread);
+
+    ASSERT((Thread*)thread == ThreadStore::GetCurrentThread());
+
+    if (!g_processShutdownHasStarted)
+    {
+        ThreadStore::DetachCurrentThread();
+    }
+}
+
+extern "C" bool RhInitialize()
+{
+    if (!PalInit())
+        return false;
+
+    if (!InitDLL(PalGetModuleHandleFromPointer((void*)&RhInitialize)))
+        return false;
+
+    return true;
+}
+
+COOP_PINVOKE_HELPER(void, RhpEnableConservativeStackReporting, ())
+{
+    GetRuntimeInstance()->EnableConservativeStackReporting();
+}
+
+//
+// Currently called only from a managed executable once Main returns, this routine does whatever is needed to
+// cleanup managed state before exiting. There's not a lot here at the moment since we're always about to let
+// the OS tear the process down anyway. 
+//
+// @TODO: Eventually we'll probably have a hosting API and explicit shutdown request. When that happens we'll
+// something more sophisticated here since we won't be able to rely on the OS cleaning up after us.
+//
+COOP_PINVOKE_HELPER(void, RhpShutdown, ())
+{
+    // Indicate that runtime shutdown is complete and that the caller is about to start shutting down the entire process.
+    g_processShutdownHasStarted = true;
+}
+
+#ifdef _WIN32
+EXTERN_C UInt32_BOOL WINAPI RtuDllMain(HANDLE hPalInstance, UInt32 dwReason, void* /*pvReserved*/)
+{
+    switch (dwReason)
+    {
+    case DLL_PROCESS_ATTACH:
+    {
+        STARTUP_TIMELINE_EVENT(PROCESS_ATTACH_BEGIN);
+
+        if (!InitDLL(hPalInstance))
+            return FALSE;
+
+        STARTUP_TIMELINE_EVENT(PROCESS_ATTACH_COMPLETE);
+    }
+    break;
+
+    case DLL_PROCESS_DETACH:
+        UninitDLL();
+        break;
+
+    case DLL_THREAD_DETACH:
+        DllThreadDetach();
+        break;
+    }
+
+    return TRUE;
+}
+#endif // _WIN32
+
+#endif // !DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/stressLog.cpp b/src/coreclr/src/nativeaot/Runtime/stressLog.cpp
new file mode 100644
index 0000000000000..9c1d2cd33ad10
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/stressLog.cpp
@@ -0,0 +1,585 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// ---------------------------------------------------------------------------
+// StressLog.cpp
+//
+// StressLog infrastructure
+// ---------------------------------------------------------------------------
+
+#include "common.h"
+#ifdef DACCESS_COMPILE
+#include <windows.h>
+#include "sospriv.h"
+#endif // DACCESS_COMPILE
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "daccess.h"
+#include "stressLog.h"
+#include "holder.h"
+#include "Crst.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "RWLock.h"
+#include "event.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+
+template<typename T> inline T VolatileLoad(T const * pt) { return *(T volatile const *)pt; }
+template<typename T> inline void VolatileStore(T* pt, T val) { *(T volatile *)pt = val; }
+
+#ifdef STRESS_LOG
+
+typedef DPTR(StressLog) PTR_StressLog;
+GPTR_IMPL(StressLog, g_pStressLog /*, &StressLog::theLog*/);
+
+#ifndef DACCESS_COMPILE
+
+/*********************************************************************************/
+#if defined(HOST_X86)
+
+/* This is like QueryPerformanceCounter but a lot faster.  On machines with 
+   variable-speed CPUs (for power management), this is not accurate, but may
+   be good enough.
+*/
+inline __declspec(naked) unsigned __int64 getTimeStamp() {
+    
+   __asm {
+        RDTSC   // read time stamp counter
+        ret
+    };
+}
+
+#else // HOST_X86
+unsigned __int64 getTimeStamp() {
+    
+    LARGE_INTEGER ret;
+    ZeroMemory(&ret, sizeof(LARGE_INTEGER));
+    
+    PalQueryPerformanceCounter(&ret);
+
+    return ret.QuadPart;
+}
+
+#endif // HOST_X86 else
+
+/*********************************************************************************/
+/* Get the the frequency corresponding to 'getTimeStamp'.  For non-x86 
+   architectures, this is just the performance counter frequency.
+*/
+unsigned __int64 getTickFrequency()
+{
+    LARGE_INTEGER ret;
+    ZeroMemory(&ret, sizeof(LARGE_INTEGER));
+    PalQueryPerformanceFrequency(&ret);
+    return ret.QuadPart;
+}
+
+#endif // DACCESS_COMPILE
+
+StressLog StressLog::theLog = { 0, 0, 0, 0, 0, 0 };
+const static unsigned __int64 RECYCLE_AGE = 0x40000000L;        // after a billion cycles, we can discard old threads
+
+/*********************************************************************************/
+
+#ifndef DACCESS_COMPILE
+
+void StressLog::Initialize(unsigned facilities,  unsigned level, unsigned maxBytesPerThread, 
+            unsigned maxBytesTotal, HANDLE hMod) 
+{
+    if (theLog.MaxSizePerThread != 0)
+    {
+        // guard ourself against multiple initialization. First init wins.
+        return;
+    }
+
+    g_pStressLog = &theLog;
+
+    theLog.pLock = new (nothrow) CrstStatic();
+    theLog.pLock->Init(CrstStressLog);
+    if (maxBytesPerThread < STRESSLOG_CHUNK_SIZE)
+    {
+        maxBytesPerThread = STRESSLOG_CHUNK_SIZE;
+    }
+    theLog.MaxSizePerThread = maxBytesPerThread;
+
+    if (maxBytesTotal < STRESSLOG_CHUNK_SIZE * 256)
+    {
+        maxBytesTotal = STRESSLOG_CHUNK_SIZE * 256;
+    }
+    theLog.MaxSizeTotal = maxBytesTotal;
+    theLog.totalChunk = 0;
+    theLog.facilitiesToLog = facilities | LF_ALWAYS;
+    theLog.levelToLog = level;
+    theLog.deadCount = 0;
+    
+    theLog.tickFrequency = getTickFrequency();
+    
+    PalGetSystemTimeAsFileTime (&theLog.startTime);
+    theLog.startTimeStamp = getTimeStamp();
+
+    theLog.moduleOffset = (size_t)hMod; // HMODULES are base addresses.
+}
+
+/*********************************************************************************/
+/* create a new thread stress log buffer associated with pThread                 */
+
+ThreadStressLog* StressLog::CreateThreadStressLog(Thread * pThread) {
+    
+    if (theLog.facilitiesToLog == 0)
+        return NULL;
+
+    if (pThread == NULL)
+        pThread = ThreadStore::GetCurrentThread();
+
+    ThreadStressLog* msgs = reinterpret_cast<ThreadStressLog*>(pThread->GetThreadStressLog());
+    if (msgs != NULL)
+    {
+        return msgs;
+    }
+
+    // if it looks like we won't be allowed to allocate a new chunk, exit early
+    if (VolatileLoad(&theLog.deadCount) == 0 && !AllowNewChunk (0))
+    {
+        return NULL;
+    }
+
+    CrstHolder holder(theLog.pLock);
+
+    msgs = CreateThreadStressLogHelper(pThread);
+
+    return msgs;
+}
+
+ThreadStressLog* StressLog::CreateThreadStressLogHelper(Thread * pThread) {
+
+    bool skipInsert = FALSE;
+    ThreadStressLog* msgs = NULL;
+
+    // See if we can recycle a dead thread
+    if (VolatileLoad(&theLog.deadCount) > 0) 
+    {        
+        unsigned __int64 recycleStamp = getTimeStamp() - RECYCLE_AGE;
+        msgs = VolatileLoad(&theLog.logs);
+        //find out oldest dead ThreadStressLog in case we can't find one within 
+        //recycle age but can't create a new chunk
+        ThreadStressLog * oldestDeadMsg = NULL;
+        
+        while(msgs != 0) 
+        {
+            if (msgs->isDead)
+            {
+                bool hasTimeStamp = msgs->curPtr != (StressMsg *)msgs->chunkListTail->EndPtr();
+                if (hasTimeStamp && msgs->curPtr->timeStamp < recycleStamp) 
+                {
+                    skipInsert = TRUE;                
+                    PalInterlockedDecrement(&theLog.deadCount);
+                    break;
+                }
+                
+                if (!oldestDeadMsg)
+                {
+                    oldestDeadMsg = msgs;
+                }                
+                else if (hasTimeStamp && oldestDeadMsg->curPtr->timeStamp > msgs->curPtr->timeStamp)
+                {
+                    oldestDeadMsg = msgs;
+                }                               
+            }
+            
+            msgs = msgs->next;
+        }
+
+        //if the total stress log size limit is already passed and we can't add new chunk,
+        //always reuse the oldest dead msg
+        if (!AllowNewChunk (0) && !msgs)
+        {
+            msgs = oldestDeadMsg;
+            skipInsert = TRUE;
+            PalInterlockedDecrement(&theLog.deadCount);
+        }
+    }
+
+    if (msgs == 0)  {
+        msgs = new (nothrow) ThreadStressLog();
+
+        if (msgs == 0 ||!msgs->IsValid ()) 
+        {
+            delete msgs;
+            msgs = 0;
+            goto LEAVE;
+        }
+    }
+
+    msgs->Activate (pThread);
+
+    if (!skipInsert) {
+#ifdef _DEBUG
+        ThreadStressLog* walk = VolatileLoad(&theLog.logs);
+        while (walk)
+        {
+            _ASSERTE (walk != msgs);
+            walk = walk->next;
+        }
+#endif
+        // Put it into the stress log
+        msgs->next = VolatileLoad(&theLog.logs);
+        VolatileStore(&theLog.logs, msgs);
+    }
+
+LEAVE:
+    ;
+    return msgs;
+}
+
+/*********************************************************************************/
+/* static */
+void StressLog::ThreadDetach(ThreadStressLog *msgs) {
+
+    if (msgs == 0)
+    {
+        return;
+    }
+
+    // We should write this message to the StressLog for deleted fiber.
+    msgs->LogMsg (LF_STARTUP, 0, "******* DllMain THREAD_DETACH called Thread dying *******\n");
+
+    msgs->isDead = TRUE;
+    PalInterlockedIncrement(&theLog.deadCount);
+}
+
+bool StressLog::AllowNewChunk (long numChunksInCurThread)
+{
+    _ASSERTE (numChunksInCurThread <= VolatileLoad(&theLog.totalChunk));
+    UInt32 perThreadLimit = theLog.MaxSizePerThread;
+
+    if (numChunksInCurThread == 0 /*&& IsSuspendEEThread()*/)
+        return TRUE;
+
+    if (ThreadStore::GetCurrentThread()->IsGCSpecial())
+    {
+        perThreadLimit *= GC_STRESSLOG_MULTIPLY;
+    }
+        
+    if ((UInt32)numChunksInCurThread * STRESSLOG_CHUNK_SIZE >= perThreadLimit)
+    {
+        return FALSE;
+    }
+
+    return (UInt32)VolatileLoad(&theLog.totalChunk) * STRESSLOG_CHUNK_SIZE < theLog.MaxSizeTotal;
+}
+
+bool StressLog::ReserveStressLogChunks (unsigned chunksToReserve)
+{
+    Thread *pThread = ThreadStore::GetCurrentThread();
+    ThreadStressLog* msgs = reinterpret_cast<ThreadStressLog*>(pThread->GetThreadStressLog());
+    if (msgs == 0) 
+    {
+        msgs = CreateThreadStressLog(pThread);
+
+        if (msgs == 0)
+            return FALSE;
+    }
+
+    if (chunksToReserve == 0)
+    {
+        chunksToReserve = (theLog.MaxSizePerThread + STRESSLOG_CHUNK_SIZE - 1)  / STRESSLOG_CHUNK_SIZE;
+    }
+
+    long numTries = (long)chunksToReserve - msgs->chunkListLength;
+    for (long i = 0; i < numTries; i++)
+    {
+        msgs->GrowChunkList ();
+    }
+
+    return msgs->chunkListLength >= (long)chunksToReserve;
+}
+
+/*********************************************************************************/
+/* fetch a buffer that can be used to write a stress message, it is thread safe */
+
+void ThreadStressLog::LogMsg ( UInt32 facility, int cArgs, const char* format, va_list Args)
+{
+
+    // Asserts in this function cause infinite loops in the asserting mechanism.
+    // Just use debug breaks instead.
+
+    ASSERT( cArgs >= 0 && cArgs <= StressMsg::maxArgCnt );
+    
+    size_t offs = ((size_t)format - StressLog::theLog.moduleOffset);
+
+    ASSERT(offs < StressMsg::maxOffset);
+    if (offs >= StressMsg::maxOffset)
+    {
+        // Set it to this string instead.
+        offs = 
+#ifdef _DEBUG
+            (size_t)"<BUG: StressLog format string beyond maxOffset>";
+#else // _DEBUG
+            0; // a 0 offset is ignored by StressLog::Dump
+#endif // _DEBUG else
+    }
+
+    // Get next available slot
+    StressMsg* msg = AdvanceWrite(cArgs);
+
+    msg->timeStamp = getTimeStamp();
+    msg->facility = facility;
+    msg->formatOffset = offs;
+    msg->numberOfArgs = cArgs;
+
+    for ( int i = 0; i < cArgs; ++i )
+    {
+        void* data = va_arg(Args, void*);
+        msg->args[i] = data;
+    }
+
+    ASSERT(IsValid() && threadId == PalGetCurrentThreadIdForLogging());
+}
+
+
+void ThreadStressLog::Activate (Thread * pThread)
+{
+    _ASSERTE(pThread != NULL);
+    //there is no need to zero buffers because we could handle garbage contents
+    threadId = PalGetCurrentThreadIdForLogging(); 
+    isDead = FALSE;        
+    curWriteChunk = chunkListTail;
+    curPtr = (StressMsg *)curWriteChunk->EndPtr ();
+    writeHasWrapped = FALSE;
+    this->pThread = pThread;
+    ASSERT(pThread->IsCurrentThread());
+}
+
+/* static */
+void StressLog::LogMsg (unsigned facility, int cArgs, const char* format, ... )
+{
+    _ASSERTE ( cArgs >= 0 && cArgs <= StressMsg::maxArgCnt );
+
+    va_list Args;
+    va_start(Args, format);        
+    
+    Thread *pThread = ThreadStore::GetCurrentThread();
+    if (pThread == NULL)
+        return;
+
+    ThreadStressLog* msgs = reinterpret_cast<ThreadStressLog*>(pThread->GetThreadStressLog());
+
+    if (msgs == 0) {
+        msgs = CreateThreadStressLog(pThread);
+
+        if (msgs == 0)
+            return;
+    }
+    msgs->LogMsg (facility, cArgs, format, Args);
+}
+
+#ifdef _DEBUG
+
+/* static */
+void  StressLog::LogCallStack(const char *const callTag){
+
+    size_t  CallStackTrace[MAX_CALL_STACK_TRACE];
+    UInt32 hash;
+    unsigned short stackTraceCount = PalCaptureStackBackTrace (2, MAX_CALL_STACK_TRACE, (void**)CallStackTrace, &hash);
+    if (stackTraceCount > MAX_CALL_STACK_TRACE)
+        stackTraceCount = MAX_CALL_STACK_TRACE;
+    LogMsgOL("Start of %s stack \n", callTag); 
+    unsigned short i = 0;
+    for (;i < stackTraceCount; i++)
+    {
+        LogMsgOL("(%s stack)%pK\n", callTag, CallStackTrace[i]);
+    }
+    LogMsgOL("End of %s stack\n", callTag);
+}
+
+#endif //_DEBUG
+
+#else // DACCESS_COMPILE
+
+bool StressLog::Initialize()
+{
+    ThreadStressLog* logs = 0;
+
+    ThreadStressLog* curThreadStressLog = this->logs;
+    unsigned __int64 lastTimeStamp = 0; // timestamp of last log entry
+    while(curThreadStressLog != 0)
+    {
+        if (!curThreadStressLog->IsReadyForRead())
+        {
+            if (curThreadStressLog->origCurPtr == NULL)
+                curThreadStressLog->origCurPtr = curThreadStressLog->curPtr;
+
+            // avoid repeated calls into this function
+            StressLogChunk * head = curThreadStressLog->chunkListHead;
+            StressLogChunk * curChunk = head;
+            bool curPtrInitialized = false;
+            do
+            {
+                if (!curChunk->IsValid ())
+                {
+                    // TODO: Report corrupt chunk PTR_HOST_TO_TADDR(curChunk)
+                }
+
+                if (!curPtrInitialized && curChunk == curThreadStressLog->curWriteChunk)
+                {
+                    // adjust curPtr to the debugger's address space
+                    curThreadStressLog->curPtr = (StressMsg *)((UInt8 *)curChunk + ((UInt8 *)curThreadStressLog->curPtr - (UInt8 *)PTR_HOST_TO_TADDR(curChunk)));
+                    curPtrInitialized = true;
+                }
+                
+                curChunk = curChunk->next;
+            } while (curChunk != head);
+            
+            if (!curPtrInitialized)
+            {
+                delete curThreadStressLog;
+                return false;
+            }
+
+            // adjust readPtr and curPtr if needed
+            curThreadStressLog->Activate (NULL);
+        }
+        curThreadStressLog = curThreadStressLog->next;
+    }
+    return true;
+}
+
+void StressLog::ResetForRead()
+{
+    ThreadStressLog* curThreadStressLog = this->logs;
+    while(curThreadStressLog != 0)
+    {
+        curThreadStressLog->readPtr = NULL;
+        curThreadStressLog->curPtr = curThreadStressLog->origCurPtr;
+        curThreadStressLog = curThreadStressLog->next;
+    }
+}
+
+// Initialization of the ThreadStressLog when dumping the log
+inline void ThreadStressLog::Activate (Thread * /*pThread*/)
+{
+    // avoid repeated calls into this function
+    if (IsReadyForRead())
+        return;
+
+    curReadChunk = curWriteChunk;
+    readPtr = curPtr;
+    readHasWrapped = false;
+    // the last written log, if it wrapped around may have partially overwritten
+    // a previous record.  Update curPtr to reflect the last safe beginning of a record,
+    // but curPtr shouldn't wrap around, otherwise it'll break our assumptions about stress 
+    // log
+    curPtr = (StressMsg*)((char*)curPtr - StressMsg::maxMsgSize());
+    if (curPtr < (StressMsg*)curWriteChunk->StartPtr())
+    {
+        curPtr = (StressMsg *)curWriteChunk->StartPtr();
+    }
+    // corner case: the log is empty
+    if (readPtr == (StressMsg *)curReadChunk->EndPtr ())
+    {
+        AdvReadPastBoundary();
+    }
+}
+
+ThreadStressLog* StressLog::FindLatestThreadLog() const 
+{
+    const ThreadStressLog* latestLog = 0;
+    for (const ThreadStressLog* ptr = this->logs; ptr != NULL; ptr = ptr->next) 
+    {
+        if (ptr->readPtr != NULL)
+            if (latestLog == 0 || ptr->readPtr->timeStamp > latestLog->readPtr->timeStamp)
+                latestLog = ptr;
+    }
+    return const_cast<ThreadStressLog*>(latestLog);
+}
+
+// Can't refer to the types in sospriv.h because it drags in windows.h
+void StressLog::EnumerateStressMsgs(/*STRESSMSGCALLBACK*/void* smcbWrapper, /*ENDTHREADLOGCALLBACK*/void* etcbWrapper, void *token)
+{
+    STRESSMSGCALLBACK smcb = (STRESSMSGCALLBACK)smcbWrapper;
+    ENDTHREADLOGCALLBACK etcb = (ENDTHREADLOGCALLBACK) etcbWrapper;
+    void *argsCopy[StressMsg::maxArgCnt];
+
+    for (;;) 
+    {
+        ThreadStressLog* latestLog = this->FindLatestThreadLog();
+
+        if (latestLog == 0)
+        {
+            break;
+        }
+        StressMsg* latestMsg = latestLog->readPtr;
+        if (latestMsg->formatOffset != 0 && !latestLog->CompletedDump()) 
+        {
+            char format[256];
+            TADDR taFmt = (latestMsg->formatOffset) + (TADDR)(this->moduleOffset);
+            HRESULT hr = DacReadAll(taFmt, format, _countof(format), false);
+            if (hr != S_OK) 
+                strcpy_s(format, _countof(format), "Could not read address of format string");
+
+            double deltaTime = ((double) (latestMsg->timeStamp - this->startTimeStamp)) / this->tickFrequency;
+
+            // Pass a copy of the args to the callback to avoid foreign code overwriting the stress log 
+            // entries (this was the case for %s arguments)
+            memcpy_s(argsCopy, sizeof(argsCopy), latestMsg->args, (latestMsg->numberOfArgs)*sizeof(void*));
+
+            // @TODO: CORERT: Truncating threadId to 32-bit
+            if (!smcb((UINT32)latestLog->threadId, deltaTime, latestMsg->facility, format, argsCopy, token))
+                break;
+        }
+
+        latestLog->readPtr = latestLog->AdvanceRead();
+        if (latestLog->CompletedDump())
+        {
+            latestLog->readPtr = NULL;
+
+            // @TODO: CORERT: Truncating threadId to 32-bit
+            if (!etcb((UINT32)latestLog->threadId, token))
+                break;
+        }
+    }
+}
+
+typedef DPTR(SIZE_T) PTR_SIZE_T;
+
+// Can't refer to the types in sospriv.h because it drags in windows.h
+void StressLog::EnumStressLogMemRanges(/*STRESSLOGMEMRANGECALLBACK*/void* slmrcbWrapper, void *token)
+{
+    STRESSLOGMEMRANGECALLBACK slmrcb = (STRESSLOGMEMRANGECALLBACK)slmrcbWrapper;
+
+    // we go to extreme lengths to ensure we don't read in the whole memory representation 
+    // of the stress log, but only the ranges...
+    //
+
+    size_t ThreadStressLogAddr = *dac_cast<PTR_SIZE_T>(PTR_HOST_MEMBER_TADDR(StressLog, this, logs));
+    while (ThreadStressLogAddr != NULL) 
+    {
+        size_t ChunkListHeadAddr = *dac_cast<PTR_SIZE_T>(ThreadStressLogAddr + offsetof(ThreadStressLog, chunkListHead));
+        size_t StressLogChunkAddr = ChunkListHeadAddr;
+        
+        do
+        {
+            slmrcb(StressLogChunkAddr, sizeof (StressLogChunk), token);
+            StressLogChunkAddr = *dac_cast<PTR_SIZE_T>(StressLogChunkAddr + offsetof (StressLogChunk, next));
+            if (StressLogChunkAddr == NULL)
+            {
+                return;
+            }            
+        } while (StressLogChunkAddr != ChunkListHeadAddr);
+
+        ThreadStressLogAddr = *dac_cast<PTR_SIZE_T>(ThreadStressLogAddr + offsetof(ThreadStressLog, next));
+    }
+}
+
+
+#endif // !DACCESS_COMPILE
+
+#endif // STRESS_LOG
+
diff --git a/src/coreclr/src/nativeaot/Runtime/strongname.cpp b/src/coreclr/src/nativeaot/Runtime/strongname.cpp
new file mode 100644
index 0000000000000..5177ec086b491
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/strongname.cpp
@@ -0,0 +1,44 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Unmanaged helpers for strong name parsing.
+//
+
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "sha1.h"
+
+//
+// Converts a public key into a public key token, by computing the SHA1 of the public key, then taking the last 8 bytes in reverse order.
+//
+// The only legal value for "cbPublicKeyTokenOut" is 8 - this parameter exists as defense in depth.
+//
+
+#define PUBLIC_KEY_TOKEN_LEN 8
+
+COOP_PINVOKE_HELPER(void, RhConvertPublicKeyToPublicKeyToken, (const UInt8* pbPublicKey, int cbPublicKey, UInt8 *pbPublicKeyTokenOut, int cbPublicKeyTokenOut))
+{
+    ASSERT(pbPublicKey != NULL);
+    ASSERT(pbPublicKeyTokenOut != NULL);
+
+    if (cbPublicKeyTokenOut != PUBLIC_KEY_TOKEN_LEN)
+    {
+        RhFailFast();
+    }
+
+    SHA1Hash sha1;
+    sha1.AddData(pbPublicKey, cbPublicKey);
+    UInt8* pHash = sha1.GetHash();
+
+    for (int i = 0; i < PUBLIC_KEY_TOKEN_LEN; i++)
+    {
+        pbPublicKeyTokenOut[i] = pHash[SHA1_HASH_SIZE - i - 1];
+    }
+
+    return;
+}
+
diff --git a/src/coreclr/src/nativeaot/Runtime/thread.cpp b/src/coreclr/src/nativeaot/Runtime/thread.cpp
new file mode 100644
index 0000000000000..3a4414b9a35f4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/thread.cpp
@@ -0,0 +1,1425 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "holder.h"
+#include "Crst.h"
+#include "event.h"
+#include "RWLock.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "thread.inl"
+#include "RuntimeInstance.h"
+#include "shash.h"
+#include "rhbinder.h"
+#include "stressLog.h"
+#include "RhConfig.h"
+
+#ifndef DACCESS_COMPILE
+
+EXTERN_C REDHAWK_API void* REDHAWK_CALLCONV RhpHandleAlloc(void* pObject, int type);
+EXTERN_C REDHAWK_API void REDHAWK_CALLCONV RhHandleSet(void* handle, void* pObject);
+EXTERN_C REDHAWK_API void REDHAWK_CALLCONV RhHandleFree(void* handle);
+
+static int (*g_RuntimeInitializationCallback)();
+static Thread* g_RuntimeInitializingThread;
+
+#ifdef _MSC_VER
+extern "C" void _ReadWriteBarrier(void);
+#pragma intrinsic(_ReadWriteBarrier)
+#else // _MSC_VER
+#define _ReadWriteBarrier() __asm__ volatile("" : : : "memory")
+#endif // _MSC_VER
+#endif //!DACCESS_COMPILE
+
+PTR_VOID Thread::GetTransitionFrame()
+{
+    if (ThreadStore::GetSuspendingThread() == this)
+    {
+        // This thread is in cooperative mode, so we grab the transition frame 
+        // from the 'tunnel' location, which will have the frame from the most
+        // recent 'cooperative pinvoke' transition that brought us here.
+        ASSERT(m_pHackPInvokeTunnel != NULL);
+        return m_pHackPInvokeTunnel;
+    }
+
+    ASSERT(m_pCachedTransitionFrame != NULL);
+    return m_pCachedTransitionFrame;
+}
+
+#ifndef DACCESS_COMPILE
+
+PTR_VOID Thread::GetTransitionFrameForStackTrace()
+{
+    ASSERT_MSG(ThreadStore::GetSuspendingThread() == NULL, "Not allowed when suspended for GC.");
+    ASSERT_MSG(this == ThreadStore::GetCurrentThread(), "Only supported for current thread.");
+    ASSERT(Thread::IsCurrentThreadInCooperativeMode());
+    ASSERT(m_pHackPInvokeTunnel != NULL);
+    return m_pHackPInvokeTunnel;
+}
+
+void Thread::WaitForSuspend()
+{
+    Unhijack();
+    GetThreadStore()->WaitForSuspendComplete();
+}
+
+void Thread::WaitForGC(void * pTransitionFrame)
+{
+    ASSERT(!IsDoNotTriggerGcSet());
+
+    do
+    {
+        m_pTransitionFrame = pTransitionFrame;
+
+        Unhijack();
+        RedhawkGCInterface::WaitForGCCompletion();
+
+        m_pTransitionFrame = NULL;
+
+        // We need to prevent compiler reordering between above write and below read.
+        _ReadWriteBarrier();
+    }
+    while (ThreadStore::IsTrapThreadsRequested());
+}
+
+//
+// This is used by the suspension code when driving all threads to unmanaged code.  It is performed after
+// the FlushProcessWriteBuffers call so that we know that once the thread reaches unmanaged code, it won't 
+// reenter managed code.  Therefore, the m_pTransitionFrame is stable.  Except that it isn't.  The return-to-
+// managed sequence will temporarily overwrite the m_pTransitionFrame to be 0.  As a result, we need to cache
+// the non-zero m_pTransitionFrame value that we saw during suspend so that stackwalks can read this value
+// without concern of sometimes reading a 0, as would be the case if they read m_pTransitionFrame directly.
+//
+// Returns true if it sucessfully cached the transition frame (i.e. the thread was in unmanaged).
+// Returns false otherwise.
+//
+bool Thread::CacheTransitionFrameForSuspend()
+{
+    if (m_pCachedTransitionFrame != NULL)
+        return true;
+
+    PTR_VOID temp = m_pTransitionFrame;     // volatile read
+    if (temp == NULL)
+        return false;
+
+    m_pCachedTransitionFrame = temp;
+    return true;
+}
+
+void Thread::ResetCachedTransitionFrame()
+{
+    // @TODO: I don't understand this assert because ResumeAllThreads is clearly written
+    // to be reseting other threads' cached transition frames.
+
+    //ASSERT((ThreadStore::GetCurrentThreadIfAvailable() == this) || 
+    //       (m_pCachedTransitionFrame != NULL));
+    m_pCachedTransitionFrame = NULL;
+}
+
+// This function simulates a PInvoke transition using a frame pointer from somewhere further up the stack that
+// was passed in via the m_pHackPInvokeTunnel field.  It is used to allow us to grandfather-in the set of GC
+// code that runs in cooperative mode without having to rewrite it in managed code.  The result is that the
+// code that calls into this special mode must spill preserved registers as if it's going to PInvoke, but 
+// record its transition frame pointer in m_pHackPInvokeTunnel and leave the thread in the cooperative
+// mode.  Later on, when this function is called, we effect the state transition to 'unmanaged' using the 
+// previously setup transition frame.
+void Thread::EnablePreemptiveMode()
+{
+    ASSERT(ThreadStore::GetCurrentThread() == this);
+#if !defined(HOST_WASM)
+    ASSERT(m_pHackPInvokeTunnel != NULL);
+#endif
+
+    Unhijack();
+
+    // ORDERING -- this write must occur before checking the trap
+    m_pTransitionFrame = m_pHackPInvokeTunnel;
+
+    // We need to prevent compiler reordering between above write and below read.  Both the read and the write
+    // are volatile, so it's possible that the particular semantic for volatile that MSVC provides is enough,
+    // but if not, this barrier would be required.  If so, it won't change anything to add the barrier.
+    _ReadWriteBarrier();
+
+    if (ThreadStore::IsTrapThreadsRequested())
+    {
+        WaitForSuspend();
+    }
+}
+
+void Thread::DisablePreemptiveMode()
+{
+    ASSERT(ThreadStore::GetCurrentThread() == this);
+
+    // ORDERING -- this write must occur before checking the trap
+    m_pTransitionFrame = NULL;
+
+    // We need to prevent compiler reordering between above write and below read.  Both the read and the write
+    // are volatile, so it's possible that the particular semantic for volatile that MSVC provides is enough,
+    // but if not, this barrier would be required.  If so, it won't change anything to add the barrier.
+    _ReadWriteBarrier();
+
+    if (ThreadStore::IsTrapThreadsRequested() && (this != ThreadStore::GetSuspendingThread()))
+    {
+        WaitForGC(m_pHackPInvokeTunnel);
+    }
+}
+#endif // !DACCESS_COMPILE
+
+bool Thread::IsCurrentThreadInCooperativeMode()
+{
+#ifndef DACCESS_COMPILE
+    ASSERT(ThreadStore::GetCurrentThread() == this);
+#endif // !DACCESS_COMPILE
+    return (m_pTransitionFrame == NULL);
+}
+
+//
+// This is used by the EH system to find the place where execution left managed code when an exception leaks out of a 
+// pinvoke and we need to FailFast via the appropriate class library.
+// 
+// May only be used from the same thread and while in preemptive mode with an active pinvoke on the stack.  
+//
+#ifndef DACCESS_COMPILE
+void * Thread::GetCurrentThreadPInvokeReturnAddress()
+{
+    ASSERT(ThreadStore::GetCurrentThread() == this);
+    ASSERT(!IsCurrentThreadInCooperativeMode());
+    return ((PInvokeTransitionFrame*)m_pTransitionFrame)->m_RIP;
+}
+#endif // !DACCESS_COMPILE
+
+
+
+PTR_UInt8 Thread::GetTEB()
+{
+    return m_pTEB;
+}
+
+#ifndef DACCESS_COMPILE
+void Thread::SetThreadStressLog(void * ptsl)
+{
+    m_pThreadStressLog = ptsl;
+}
+#endif // DACCESS_COMPILE
+
+PTR_VOID Thread::GetThreadStressLog() const
+{
+    return m_pThreadStressLog;
+}
+
+#if defined(FEATURE_GC_STRESS) & !defined(DACCESS_COMPILE)
+void Thread::SetRandomSeed(UInt32 seed)
+{
+    ASSERT(!IsStateSet(TSF_IsRandSeedSet));
+    m_uRand = seed;
+    SetState(TSF_IsRandSeedSet);
+}
+
+// Generates pseudo random numbers in the range [0, 2^31) 
+// using only multiplication and addition
+UInt32 Thread::NextRand()
+{
+    // Uses Carta's algorithm for Park-Miller's PRNG:
+    // x_{k+1} = 16807 * x_{k} mod (2^31-1)
+
+    UInt32 hi,lo;
+
+    // (high word of seed) * 16807 - at most 31 bits
+    hi = 16807 * (m_uRand >> 16);
+    // (low word of seed) * 16807 - at most 31 bits
+    lo = 16807 * (m_uRand & 0xFFFF);
+
+    // Proof that below operations (multiplication and addition only)
+    // are equivalent to the original formula:
+    //    x_{k+1} = 16807 * x_{k} mod (2^31-1)
+    // We denote hi2 as the low 15 bits in hi, 
+    //       and hi1 as the remaining 16 bits in hi:
+    // (hi                 * 2^16 + lo) mod (2^31-1) = 
+    // ((hi1 * 2^15 + hi2) * 2^16 + lo) mod (2^31-1) =
+    // ( hi1 * 2^31 + hi2 * 2^16  + lo) mod (2^31-1) =
+    // ( hi1 * (2^31-1) + hi1 + hi2 * 2^16 + lo) mod (2^31-1) =
+    // ( hi2 * 2^16 + hi1 + lo ) mod (2^31-1)
+
+    // lo + (hi2 * 2^16)
+    lo += (hi & 0x7FFF) << 16;
+    // lo + (hi2 * 2^16) + hi1
+    lo += (hi >> 15);
+    // modulo (2^31-1)
+    if (lo > 0x7fffFFFF)
+        lo -= 0x7fffFFFF;
+
+    m_uRand = lo;
+
+    return m_uRand;
+}
+
+bool Thread::IsRandInited()
+{
+    return IsStateSet(TSF_IsRandSeedSet);
+}
+#endif // FEATURE_GC_STRESS & !DACCESS_COMPILE
+
+PTR_ExInfo Thread::GetCurExInfo()
+{
+    ValidateExInfoStack();
+    return m_pExInfoStackHead;
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef DACCESS_COMPILE
+
+void Thread::Construct()
+{
+#ifndef USE_PORTABLE_HELPERS
+    C_ASSERT(OFFSETOF__Thread__m_pTransitionFrame == 
+             (offsetof(Thread, m_pTransitionFrame)));
+#endif // USE_PORTABLE_HELPERS
+
+    m_numDynamicTypesTlsCells = 0;
+    m_pDynamicTypesTlsCells = NULL;
+
+    m_pThreadLocalModuleStatics = NULL;
+    m_numThreadLocalModuleStatics = 0;
+
+    // NOTE: We do not explicitly defer to the GC implementation to initialize the alloc_context.  The 
+    // alloc_context will be initialized to 0 via the static initialization of tls_CurrentThread. If the
+    // alloc_context ever needs different initialization, a matching change to the tls_CurrentThread 
+    // static initialization will need to be made.
+
+    m_uPalThreadIdForLogging = PalGetCurrentThreadIdForLogging();
+    m_threadId.SetToCurrentThread();
+
+    HANDLE curProcessPseudo = PalGetCurrentProcess();
+    HANDLE curThreadPseudo  = PalGetCurrentThread();
+
+    // This can fail!  Users of m_hPalThread must be able to handle INVALID_HANDLE_VALUE!!
+    PalDuplicateHandle(curProcessPseudo, curThreadPseudo, curProcessPseudo, &m_hPalThread,
+                       0,      // ignored
+                       FALSE,  // inherit
+                       DUPLICATE_SAME_ACCESS);
+
+    if (!PalGetMaximumStackBounds(&m_pStackLow, &m_pStackHigh))
+        RhFailFast();
+
+    m_pTEB = PalNtCurrentTeb();
+
+#ifdef STRESS_LOG
+    if (StressLog::StressLogOn(~0u, 0))
+        m_pThreadStressLog = StressLog::CreateThreadStressLog(this);
+#endif // STRESS_LOG
+
+    m_threadAbortException = NULL;
+}
+
+bool Thread::IsInitialized()
+{
+    return (m_ThreadStateFlags != TSF_Unknown);
+}
+
+// -----------------------------------------------------------------------------------------------------------
+// GC support APIs - do not use except from GC itself
+//
+void Thread::SetGCSpecial(bool isGCSpecial)
+{
+    if (!IsInitialized())
+        Construct();
+    if (isGCSpecial)
+        SetState(TSF_IsGcSpecialThread);
+    else
+        ClearState(TSF_IsGcSpecialThread);
+}
+
+bool Thread::IsGCSpecial()
+{
+    return IsStateSet(TSF_IsGcSpecialThread);
+}
+
+bool Thread::CatchAtSafePoint()
+{
+    // This is only called by the GC on a background GC worker thread that's explicitly interested in letting
+    // a foreground GC proceed at that point. So it's always safe to return true.
+    ASSERT(IsGCSpecial());
+    return true;
+}
+
+UInt64 Thread::GetPalThreadIdForLogging()
+{
+    return m_uPalThreadIdForLogging;
+}
+
+bool Thread::IsCurrentThread()
+{
+    return m_threadId.IsCurrentThread();
+}
+
+void Thread::Destroy()
+{
+    if (m_hPalThread != INVALID_HANDLE_VALUE)
+        PalCloseHandle(m_hPalThread);
+
+    if (m_pDynamicTypesTlsCells != NULL)
+    {
+        for (UInt32 i = 0; i < m_numDynamicTypesTlsCells; i++)
+        {
+            if (m_pDynamicTypesTlsCells[i] != NULL)
+                delete[] m_pDynamicTypesTlsCells[i];
+        }
+        delete[] m_pDynamicTypesTlsCells;
+    }
+
+    if (m_pThreadLocalModuleStatics != NULL)
+    {
+        for (UInt32 i = 0; i < m_numThreadLocalModuleStatics; i++)
+        {
+            if (m_pThreadLocalModuleStatics[i] != NULL)
+            {
+                RhHandleFree(m_pThreadLocalModuleStatics[i]);
+            }
+        }
+        delete[] m_pThreadLocalModuleStatics;
+    }
+
+    RedhawkGCInterface::ReleaseAllocContext(GetAllocContext());
+
+    // Thread::Destroy is called when the thread's "home" fiber dies.  We mark the thread as "detached" here
+    // so that we can validate, in our DLL_THREAD_DETACH handler, that the thread was already destroyed at that
+    // point.
+    SetDetached();
+}
+
+#ifdef HOST_WASM
+extern RtuObjectRef * t_pShadowStackTop;
+extern RtuObjectRef * t_pShadowStackBottom;
+
+void GcScanWasmShadowStack(void * pfnEnumCallback, void * pvCallbackData)
+{
+    // Wasm does not permit iteration of stack frames so is uses a shadow stack instead
+    RedhawkGCInterface::EnumGcRefsInRegionConservatively(t_pShadowStackBottom, t_pShadowStackTop, pfnEnumCallback, pvCallbackData);
+}
+#endif
+
+void Thread::GcScanRoots(void * pfnEnumCallback, void * pvCallbackData)
+{
+#ifdef HOST_WASM
+    GcScanWasmShadowStack(pfnEnumCallback, pvCallbackData);
+#else
+    StackFrameIterator  frameIterator(this, GetTransitionFrame());
+    GcScanRootsWorker(pfnEnumCallback, pvCallbackData, frameIterator);
+#endif
+}
+
+#endif // !DACCESS_COMPILE
+
+#ifdef DACCESS_COMPILE
+// A trivial wrapper that unpacks the DacScanCallbackData and calls the callback provided to GcScanRoots
+void GcScanRootsCallbackWrapper(PTR_RtuObjectRef ppObject, DacScanCallbackData* callbackData, UInt32 flags)
+{
+    Thread::GcScanRootsCallbackFunc * pfnUserCallback = (Thread::GcScanRootsCallbackFunc *)callbackData->pfnUserCallback;
+    pfnUserCallback(ppObject, callbackData->token, flags);
+}
+
+bool Thread::GcScanRoots(GcScanRootsCallbackFunc * pfnEnumCallback, void * token, PTR_PAL_LIMITED_CONTEXT pInitialContext)
+{
+    DacScanCallbackData callbackDataWrapper;
+    callbackDataWrapper.thread_under_crawl = this;
+    callbackDataWrapper.promotion = true;
+    callbackDataWrapper.token = token;
+    callbackDataWrapper.pfnUserCallback = pfnEnumCallback;
+    //When debugging we might be trying to enumerate with or without a transition frame
+    //on top of the stack. If there is one use it, otherwise the debugger provides a set of initial registers
+    //to use.
+    PTR_VOID pTransitionFrame = GetTransitionFrame();
+    if(pTransitionFrame != NULL)
+    {
+        StackFrameIterator  frameIterator(this, GetTransitionFrame());
+        GcScanRootsWorker(&GcScanRootsCallbackWrapper, &callbackDataWrapper, frameIterator);
+    }
+    else
+    {
+        if(pInitialContext == NULL)
+            return false;
+        StackFrameIterator frameIterator(this, pInitialContext);
+        GcScanRootsWorker(&GcScanRootsCallbackWrapper, &callbackDataWrapper, frameIterator);
+    }
+    return true;
+}
+#endif //DACCESS_COMPILE
+
+void Thread::GcScanRootsWorker(void * pfnEnumCallback, void * pvCallbackData, StackFrameIterator & frameIterator)
+{
+    PTR_RtuObjectRef pHijackedReturnValue = NULL;
+    GCRefKind        returnValueKind      = GCRK_Unknown;
+
+    if (frameIterator.GetHijackedReturnValueLocation(&pHijackedReturnValue, &returnValueKind))
+    {
+#ifdef TARGET_ARM64
+        GCRefKind reg0Kind = ExtractReg0ReturnKind(returnValueKind);
+        GCRefKind reg1Kind = ExtractReg1ReturnKind(returnValueKind);
+
+        // X0 and X1 are saved next to each other in this order
+        if (reg0Kind != GCRK_Scalar)
+        {
+            RedhawkGCInterface::EnumGcRef(pHijackedReturnValue, reg0Kind, pfnEnumCallback, pvCallbackData);
+        }
+        if (reg1Kind != GCRK_Scalar)
+        {
+            RedhawkGCInterface::EnumGcRef(pHijackedReturnValue + 1, reg1Kind, pfnEnumCallback, pvCallbackData);
+        }
+#else
+        RedhawkGCInterface::EnumGcRef(pHijackedReturnValue, returnValueKind, pfnEnumCallback, pvCallbackData);
+#endif
+    }
+
+#ifndef DACCESS_COMPILE
+    if (GetRuntimeInstance()->IsConservativeStackReportingEnabled())
+    {
+        if (frameIterator.IsValid())
+        {
+            PTR_VOID pLowerBound = dac_cast<PTR_VOID>(frameIterator.GetRegisterSet()->GetSP());
+
+            // Transition frame may contain callee saved registers that need to be reported as well
+            PTR_VOID pTransitionFrame = GetTransitionFrame();
+            ASSERT(pTransitionFrame != NULL);
+            if (pTransitionFrame < pLowerBound)
+                pLowerBound = pTransitionFrame;
+
+            PTR_VOID pUpperBound = m_pStackHigh;
+
+            RedhawkGCInterface::EnumGcRefsInRegionConservatively(
+                dac_cast<PTR_RtuObjectRef>(pLowerBound),
+                dac_cast<PTR_RtuObjectRef>(pUpperBound),
+                pfnEnumCallback,
+                pvCallbackData);
+        }
+    }
+    else
+#endif // !DACCESS_COMPILE
+    {
+        while (frameIterator.IsValid())
+        {
+            frameIterator.CalculateCurrentMethodState();
+        
+            STRESS_LOG1(LF_GCROOTS, LL_INFO1000, "Scanning method %pK\n", (void*)frameIterator.GetRegisterSet()->IP);
+
+            if (!frameIterator.ShouldSkipRegularGcReporting())
+            {
+                RedhawkGCInterface::EnumGcRefs(frameIterator.GetCodeManager(),
+                                               frameIterator.GetMethodInfo(), 
+                                               frameIterator.GetEffectiveSafePointAddress(),
+                                               frameIterator.GetRegisterSet(),
+                                               pfnEnumCallback,
+                                               pvCallbackData);
+            }
+        
+            // Each enumerated frame (including the first one) may have an associated stack range we need to
+            // report conservatively (every pointer aligned value that looks like it might be a GC reference is
+            // reported as a pinned interior reference). This occurs in an edge case where a managed method whose
+            // signature the runtime is not aware of calls into the runtime which subsequently calls back out
+            // into managed code (allowing the possibility of a garbage collection). This can happen in certain
+            // interface invocation slow paths for instance. Since the original managed call may have passed GC
+            // references which are unreported by any managed method on the stack at the time of the GC we
+            // identify (again conservatively) the range of the stack that might contain these references and
+            // report everything. Since it should be a very rare occurrence indeed that we actually have to do
+            // this this, it's considered a better trade-off than storing signature metadata for every potential
+            // callsite of the type described above.
+            if (frameIterator.HasStackRangeToReportConservatively())
+            {
+                PTR_RtuObjectRef pLowerBound;
+                PTR_RtuObjectRef pUpperBound;
+                frameIterator.GetStackRangeToReportConservatively(&pLowerBound, &pUpperBound);
+                RedhawkGCInterface::EnumGcRefsInRegionConservatively(pLowerBound,
+                                                                     pUpperBound,
+                                                                     pfnEnumCallback,
+                                                                     pvCallbackData);
+            }
+
+            frameIterator.Next();
+        }
+    }
+
+    // ExInfos hold exception objects that are not reported by anyone else.  In fact, sometimes they are in
+    // logically dead parts of the stack that the typical GC stackwalk skips.  (This happens in the case where 
+    // one exception dispatch superseded a previous one.)  We keep them alive as long as they are in the 
+    // ExInfo chain to aid in post-mortem debugging.  SOS will access them through the DAC and the exported 
+    // API, RhGetExceptionsForCurrentThread, will access them at runtime to gather additional information to
+    // add to a dump file during FailFast.
+    for (PTR_ExInfo curExInfo = GetCurExInfo(); curExInfo != NULL; curExInfo = curExInfo->m_pPrevExInfo)
+    {
+        PTR_RtuObjectRef pExceptionObj = dac_cast<PTR_RtuObjectRef>(&curExInfo->m_exception);
+        RedhawkGCInterface::EnumGcRef(pExceptionObj, GCRK_Object, pfnEnumCallback, pvCallbackData);
+    }
+
+    // Keep alive the ThreadAbortException that's stored in the target thread during thread abort
+    PTR_RtuObjectRef pThreadAbortExceptionObj = dac_cast<PTR_RtuObjectRef>(&m_threadAbortException);
+    RedhawkGCInterface::EnumGcRef(pThreadAbortExceptionObj, GCRK_Object, pfnEnumCallback, pvCallbackData);    
+}
+
+#ifndef DACCESS_COMPILE
+
+#ifndef TARGET_ARM64
+EXTERN_C void FASTCALL RhpGcProbeHijackScalar();
+EXTERN_C void FASTCALL RhpGcProbeHijackObject();
+EXTERN_C void FASTCALL RhpGcProbeHijackByref();
+
+static void* NormalHijackTargets[3] =
+{
+    reinterpret_cast<void*>(RhpGcProbeHijackScalar), // GCRK_Scalar = 0,
+    reinterpret_cast<void*>(RhpGcProbeHijackObject), // GCRK_Object = 1,
+    reinterpret_cast<void*>(RhpGcProbeHijackByref)   // GCRK_Byref  = 2,
+};
+#else // TARGET_ARM64
+EXTERN_C void FASTCALL RhpGcProbeHijack();
+
+static void* NormalHijackTargets[1] =
+{
+    reinterpret_cast<void*>(RhpGcProbeHijack)
+};
+#endif // TARGET_ARM64
+
+#ifdef FEATURE_GC_STRESS
+#ifndef TARGET_ARM64
+EXTERN_C void FASTCALL RhpGcStressHijackScalar();
+EXTERN_C void FASTCALL RhpGcStressHijackObject();
+EXTERN_C void FASTCALL RhpGcStressHijackByref();
+
+static void* GcStressHijackTargets[3] =
+{
+    reinterpret_cast<void*>(RhpGcStressHijackScalar), // GCRK_Scalar = 0,
+    reinterpret_cast<void*>(RhpGcStressHijackObject), // GCRK_Object = 1,
+    reinterpret_cast<void*>(RhpGcStressHijackByref)   // GCRK_Byref  = 2,
+};
+#else // TARGET_ARM64
+EXTERN_C void FASTCALL RhpGcStressHijack();
+
+static void* GcStressHijackTargets[1] =
+{
+    reinterpret_cast<void*>(RhpGcStressHijack)
+};
+#endif // TARGET_ARM64
+#endif // FEATURE_GC_STRESS
+
+// static
+bool Thread::IsHijackTarget(void * address)
+{
+    for (int i = 0; i < COUNTOF(NormalHijackTargets); i++)
+    {
+        if (NormalHijackTargets[i] == address)
+            return true;
+    }
+#ifdef FEATURE_GC_STRESS
+    for (int i = 0; i < COUNTOF(GcStressHijackTargets); i++)
+    {
+        if (GcStressHijackTargets[i] == address)
+            return true;
+    }
+#endif // FEATURE_GC_STRESS
+    return false;
+}
+
+bool Thread::Hijack()
+{
+    ASSERT(ThreadStore::GetCurrentThread() == ThreadStore::GetSuspendingThread());
+
+    ASSERT_MSG(ThreadStore::GetSuspendingThread() != this, "You may not hijack a thread from itself.");
+
+    if (m_hPalThread == INVALID_HANDLE_VALUE)
+    {
+        // cannot proceed
+        return false;
+    }
+
+    // requires THREAD_SUSPEND_RESUME / THREAD_GET_CONTEXT / THREAD_SET_CONTEXT permissions
+
+    return PalHijack(m_hPalThread, HijackCallback, this) == 0;
+}
+
+UInt32_BOOL Thread::HijackCallback(HANDLE /*hThread*/, PAL_LIMITED_CONTEXT* pThreadContext, void* pCallbackContext)
+{
+    Thread* pThread = (Thread*) pCallbackContext;
+
+    //
+    // WARNING: The hijack operation will take a read lock on the RuntimeInstance's module list.
+    // (This is done to find a Module based on an IP.)  Therefore, if the thread we've just 
+    // suspended owns the write lock on the module list, we'll deadlock with it when we try to 
+    // take the read lock below.  So we must attempt a non-blocking acquire of the read lock 
+    // early and fail the hijack if we can't get it.  This will cause us to simply retry later.
+    //
+    if (GetRuntimeInstance()->m_ModuleListLock.DangerousTryPulseReadLock())
+    {
+        if (pThread->CacheTransitionFrameForSuspend())
+        {
+            // IMPORTANT: GetThreadContext should not be trusted arbitrarily.  We are careful here to recheck 
+            // the thread's state flag that indicates whether or not it has made it to unmanaged code.  If
+            // it has reached unmanaged code (even our own wait helper routines), then we cannot trust the
+            // context returned by it.  This is due to various races that occur updating the reported context 
+            // during syscalls.
+            return TRUE;
+        }
+        else
+        {
+            return pThread->InternalHijack(pThreadContext, NormalHijackTargets) ? TRUE : FALSE;
+        }
+    }
+
+    return FALSE;
+}
+
+#ifdef FEATURE_GC_STRESS
+// This is a helper called from RhpHijackForGcStress which will place a GC Stress 
+// hijack on this thread's call stack. This is never called from another thread.
+// static
+void Thread::HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx)
+{
+    Thread * pCurrentThread = ThreadStore::GetCurrentThread();
+
+    // don't hijack for GC stress if we're in a "no GC stress" region
+    if (pCurrentThread->IsSuppressGcStressSet())
+        return;
+
+    RuntimeInstance * pInstance = GetRuntimeInstance();
+
+    UIntNative ip = pSuspendCtx->GetIp();
+
+    bool bForceGC = g_pRhConfig->GetGcStressThrottleMode() == 0;
+    // we enable collecting statistics by callsite even for stochastic-only
+    // stress mode. this will force a stack walk, but it's worthwhile for
+    // collecting data (we only actually need the IP when 
+    // (g_pRhConfig->GetGcStressThrottleMode() & 1) != 0)
+    if (!bForceGC)
+    {
+        StackFrameIterator sfi(pCurrentThread, pSuspendCtx);
+        if (sfi.IsValid())
+        {
+            pCurrentThread->Unhijack();
+            sfi.CalculateCurrentMethodState();
+            // unwind to method below the one whose epilog set up the hijack
+            sfi.Next();
+            if (sfi.IsValid())
+            {
+                ip = sfi.GetRegisterSet()->GetIP();
+            }
+        }
+    }
+    if (bForceGC || pInstance->ShouldHijackCallsiteForGcStress(ip))
+    {
+        pCurrentThread->InternalHijack(pSuspendCtx, GcStressHijackTargets);
+    }
+}
+#endif // FEATURE_GC_STRESS
+
+// This function is called in one of two scenarios:
+// 1) from a thread to place a return hijack onto its own stack. This is only done for GC stress cases
+//    via Thread::HijackForGcStress above.
+// 2) from another thread to place a return hijack onto this thread's stack. In this case the target
+//    thread is OS suspended someplace in managed code. The only constraint on the suspension is that the
+//    stack be crawlable enough to yield the location of the return address.
+bool Thread::InternalHijack(PAL_LIMITED_CONTEXT * pSuspendCtx, void * pvHijackTargets[])
+{
+    bool fSuccess = false;
+
+    if (IsDoNotTriggerGcSet())
+        return false;
+
+    StackFrameIterator frameIterator(this, pSuspendCtx);
+
+    if (frameIterator.IsValid())
+    {
+        frameIterator.CalculateCurrentMethodState();
+
+        frameIterator.GetCodeManager()->UnsynchronizedHijackMethodLoops(frameIterator.GetMethodInfo());
+
+        PTR_PTR_VOID ppvRetAddrLocation;
+        GCRefKind retValueKind;
+
+        if (frameIterator.GetCodeManager()->GetReturnAddressHijackInfo(frameIterator.GetMethodInfo(),
+            frameIterator.GetRegisterSet(),
+            &ppvRetAddrLocation,
+            &retValueKind))
+        {
+            // ARM64 epilogs have a window between loading the hijackable return address into LR and the RET instruction.
+            // We cannot hijack or unhijack a thread while it is suspended in that window unless we implement hijacking
+            // via LR register modification. Therefore it is important to check our ability to hijack the thread before
+            // unhijacking it.
+            CrossThreadUnhijack();
+
+            void* pvRetAddr = *ppvRetAddrLocation;
+            ASSERT(ppvRetAddrLocation != NULL);
+            ASSERT(pvRetAddr != NULL);
+
+            ASSERT(StackFrameIterator::IsValidReturnAddress(pvRetAddr));
+
+            m_ppvHijackedReturnAddressLocation = ppvRetAddrLocation;
+            m_pvHijackedReturnAddress = pvRetAddr;
+#ifdef TARGET_ARM64
+            m_uHijackedReturnValueFlags = ReturnKindToTransitionFrameFlags(retValueKind);
+            *ppvRetAddrLocation = pvHijackTargets[0];
+#else
+            void* pvHijackTarget = pvHijackTargets[retValueKind];
+            ASSERT_MSG(IsHijackTarget(pvHijackTarget), "unexpected method used as hijack target");
+            *ppvRetAddrLocation = pvHijackTarget;
+#endif
+            fSuccess = true;
+        }
+    }
+
+    STRESS_LOG3(LF_STACKWALK, LL_INFO10000, "InternalHijack: TgtThread = %llx, IP = %p, result = %d\n", 
+        GetPalThreadIdForLogging(), pSuspendCtx->GetIp(), fSuccess);
+
+    return fSuccess;
+}
+
+// This is the standard Unhijack, which is only allowed to be called on your own thread.
+// Note that all the asm-implemented Unhijacks should also only be operating on their 
+// own thread.
+void Thread::Unhijack()
+{
+    ASSERT(ThreadStore::GetCurrentThread() == this);
+    UnhijackWorker();
+}
+
+// This unhijack routine is only called from Thread::InternalHijack() to undo a possibly existing 
+// hijack before placing a new one. Although there are many code sequences (here and in asm) to 
+// perform an unhijack operation, they will never execute concurrently. A thread may unhijack itself
+// at any time so long as it does so from unmanaged code. This ensures that another thread will not 
+// suspend it and attempt to unhijack it, since we only suspend threads that are executing managed 
+// code.
+void Thread::CrossThreadUnhijack()
+{
+    ASSERT((ThreadStore::GetCurrentThread() == this) || DebugIsSuspended());
+    UnhijackWorker();
+}
+
+// This is the hijack worker routine which merely implements the hijack mechanism.  
+// DO NOT USE DIRECTLY.  Use Unhijack() or CrossThreadUnhijack() instead.
+void Thread::UnhijackWorker()
+{
+    if (m_pvHijackedReturnAddress == NULL)
+    {
+        ASSERT(m_ppvHijackedReturnAddressLocation == NULL);
+        return;
+    }
+
+    // Restore the original return address.
+    ASSERT(m_ppvHijackedReturnAddressLocation != NULL);
+    *m_ppvHijackedReturnAddressLocation = m_pvHijackedReturnAddress;
+
+    // Clear the hijack state.
+    m_ppvHijackedReturnAddressLocation  = NULL;
+    m_pvHijackedReturnAddress           = NULL;
+#ifdef TARGET_ARM64
+    m_uHijackedReturnValueFlags         = 0;
+#endif
+}
+
+#if _DEBUG
+bool Thread::DebugIsSuspended()
+{
+    ASSERT(ThreadStore::GetCurrentThread() != this);
+#if 0
+    PalSuspendThread(m_hPalThread);
+    UInt32 suspendCount = PalResumeThread(m_hPalThread);
+    return (suspendCount > 0);
+#else
+    // @TODO: I don't trust the above implementation, so I want to implement this myself
+    // by marking the thread state as "yes, we suspended it" and checking that state here.
+    return true;
+#endif
+}
+#endif
+
+// @TODO: it would be very, very nice if we did not have to bleed knowledge of hijacking
+// and hijack state to other components in the runtime. For now, these are only used
+// when getting EH info during exception dispatch. We should find a better way to encapsulate
+// this.
+bool Thread::IsHijacked()
+{
+    // Note: this operation is only valid from the current thread. If one thread invokes
+    // this on another then it may be racing with other changes to the thread's hijack state.
+    ASSERT(ThreadStore::GetCurrentThread() == this);
+
+    return m_pvHijackedReturnAddress != NULL;
+}
+
+//
+// WARNING: This method must ONLY be called during stackwalks when we believe that all threads are 
+// synchronized and there is no other thread racing with us trying to apply hijacks.
+//
+bool Thread::DangerousCrossThreadIsHijacked()
+{
+    // If we have a CachedTransitionFrame available, then we're in the proper state.  Otherwise, this method
+    // was called from an improper state.
+    ASSERT(GetTransitionFrame() != NULL);
+    return m_pvHijackedReturnAddress != NULL;
+}
+
+void * Thread::GetHijackedReturnAddress()
+{
+    // Note: this operation is only valid from the current thread. If one thread invokes
+    // this on another then it may be racing with other changes to the thread's hijack state.
+    ASSERT(IsHijacked());
+    ASSERT(ThreadStore::GetCurrentThread() == this);
+
+    return m_pvHijackedReturnAddress;
+}
+
+void * Thread::GetUnhijackedReturnAddress(void ** ppvReturnAddressLocation)
+{
+    ASSERT(ThreadStore::GetCurrentThread() == this);
+
+    void * pvReturnAddress;
+    if (m_ppvHijackedReturnAddressLocation == ppvReturnAddressLocation)
+        pvReturnAddress = m_pvHijackedReturnAddress;
+    else
+        pvReturnAddress = *ppvReturnAddressLocation;
+
+    ASSERT(NULL != GetRuntimeInstance()->FindCodeManagerByAddress(pvReturnAddress));
+    return pvReturnAddress;
+}
+
+void Thread::SetState(ThreadStateFlags flags)
+{
+    PalInterlockedOr(&m_ThreadStateFlags, flags);
+}
+
+void Thread::ClearState(ThreadStateFlags flags)
+{
+    PalInterlockedAnd(&m_ThreadStateFlags, ~flags);
+}
+
+bool Thread::IsStateSet(ThreadStateFlags flags)
+{
+    return ((m_ThreadStateFlags & flags) == (UInt32) flags);
+}
+
+bool Thread::IsSuppressGcStressSet()
+{
+    return IsStateSet(TSF_SuppressGcStress);
+}
+
+void Thread::SetSuppressGcStress()
+{
+    ASSERT(!IsStateSet(TSF_SuppressGcStress));
+    SetState(TSF_SuppressGcStress);
+}
+
+void Thread::ClearSuppressGcStress()
+{
+    ASSERT(IsStateSet(TSF_SuppressGcStress));
+    ClearState(TSF_SuppressGcStress);
+}
+
+#endif //!DACCESS_COMPILE
+
+#ifndef DACCESS_COMPILE
+#ifdef FEATURE_GC_STRESS
+#ifdef HOST_X86 // the others are implemented in assembly code to avoid trashing the argument registers
+EXTERN_C void FASTCALL RhpSuppressGcStress()
+{
+    ThreadStore::GetCurrentThread()->SetSuppressGcStress();
+}
+#endif // HOST_X86
+
+EXTERN_C void FASTCALL RhpUnsuppressGcStress()
+{
+    ThreadStore::GetCurrentThread()->ClearSuppressGcStress();
+}
+#else
+EXTERN_C void FASTCALL RhpSuppressGcStress()
+{
+}
+EXTERN_C void FASTCALL RhpUnsuppressGcStress()
+{
+}
+#endif // FEATURE_GC_STRESS
+
+// Standard calling convention variant and actual implementation for RhpWaitForSuspend
+EXTERN_C NOINLINE void FASTCALL RhpWaitForSuspend2()
+{
+    // The wait operation below may trash the last win32 error. We save the error here so that it can be
+    // restored after the wait operation;
+    Int32 lastErrorOnEntry = PalGetLastError();
+
+    ThreadStore::GetCurrentThread()->WaitForSuspend();
+    
+    // Restore the saved error
+    PalSetLastError(lastErrorOnEntry);
+}
+
+// Standard calling convention variant and actual implementation for RhpWaitForGC
+EXTERN_C NOINLINE void FASTCALL RhpWaitForGC2(PInvokeTransitionFrame * pFrame)
+{
+
+    Thread * pThread = pFrame->m_pThread;
+
+    if (pThread->IsDoNotTriggerGcSet())
+        return;
+
+    // The wait operation below may trash the last win32 error. We save the error here so that it can be
+    // restored after the wait operation;
+    Int32 lastErrorOnEntry = PalGetLastError();
+
+    pThread->WaitForGC(pFrame);
+
+    // Restore the saved error
+    PalSetLastError(lastErrorOnEntry);
+}
+
+void Thread::PushExInfo(ExInfo * pExInfo)
+{
+    ValidateExInfoStack();
+
+    pExInfo->m_pPrevExInfo = m_pExInfoStackHead;
+    m_pExInfoStackHead = pExInfo;
+}
+
+void Thread::ValidateExInfoPop(ExInfo * pExInfo, void * limitSP)
+{
+#ifdef _DEBUG
+    ValidateExInfoStack();
+    ASSERT_MSG(pExInfo == m_pExInfoStackHead, "not popping the head element");
+    pExInfo = pExInfo->m_pPrevExInfo;
+
+    while (pExInfo && pExInfo < limitSP)
+    {
+        ASSERT_MSG(pExInfo->m_kind & EK_SupersededFlag, "popping a non-superseded ExInfo");
+        pExInfo = pExInfo->m_pPrevExInfo;
+    }
+#else
+    UNREFERENCED_PARAMETER(pExInfo);
+    UNREFERENCED_PARAMETER(limitSP);
+#endif // _DEBUG
+}
+
+COOP_PINVOKE_HELPER(void, RhpValidateExInfoPop, (Thread * pThread, ExInfo * pExInfo, void * limitSP))
+{
+    pThread->ValidateExInfoPop(pExInfo, limitSP);
+}
+
+bool Thread::IsDoNotTriggerGcSet()
+{
+    return IsStateSet(TSF_DoNotTriggerGc);
+}
+
+void Thread::SetDoNotTriggerGc()
+{
+    ASSERT(!IsStateSet(TSF_DoNotTriggerGc));
+    SetState(TSF_DoNotTriggerGc);
+}
+
+void Thread::ClearDoNotTriggerGc()
+{
+    // Allowing unmatched clears simplifies the EH dispatch code, so we do not assert anything here.
+    ClearState(TSF_DoNotTriggerGc);
+}
+
+bool Thread::IsDetached()
+{
+    return IsStateSet(TSF_Detached);
+}
+
+void Thread::SetDetached()
+{
+    ASSERT(!IsStateSet(TSF_Detached));
+    SetState(TSF_Detached);
+}
+
+#endif // !DACCESS_COMPILE
+
+void Thread::ValidateExInfoStack()
+{
+#ifndef DACCESS_COMPILE
+#ifdef _DEBUG
+    ExInfo temp;
+    
+    ExInfo* pCur = m_pExInfoStackHead;
+    while (pCur)
+    {
+        ASSERT_MSG((this != ThreadStore::GetCurrentThread()) || (pCur > &temp), "an entry in the ExInfo chain points into dead stack");
+        ASSERT_MSG(pCur < m_pStackHigh, "an entry in the ExInfo chain isn't on this stack");
+        pCur = pCur->m_pPrevExInfo;
+    }
+#endif // _DEBUG
+#endif // !DACCESS_COMPILE
+}
+
+
+
+// Retrieve the start of the TLS storage block allocated for the given thread for a specific module identified
+// by the TLS slot index allocated to that module and the offset into the OS allocated block at which
+// Redhawk-specific data is stored.
+PTR_UInt8 Thread::GetThreadLocalStorage(UInt32 uTlsIndex, UInt32 uTlsStartOffset)
+{
+#if 0
+    return (*(UInt8***)(m_pTEB + OFFSETOF__TEB__ThreadLocalStoragePointer))[uTlsIndex] + uTlsStartOffset;
+#else
+    return (*dac_cast<PTR_PTR_PTR_UInt8>(dac_cast<TADDR>(m_pTEB) + OFFSETOF__TEB__ThreadLocalStoragePointer))[uTlsIndex] + uTlsStartOffset;
+#endif
+}
+
+PTR_UInt8 Thread::GetThreadLocalStorageForDynamicType(UInt32 uTlsTypeOffset)
+{
+    // Note: When called from GC root enumeration, no changes can be made by the AllocateThreadLocalStorageForDynamicType to 
+    // the 2 variables accessed here because AllocateThreadLocalStorageForDynamicType is called in cooperative mode.
+
+    uTlsTypeOffset &= ~DYNAMIC_TYPE_TLS_OFFSET_FLAG;
+    return dac_cast<PTR_UInt8>(uTlsTypeOffset < m_numDynamicTypesTlsCells ? m_pDynamicTypesTlsCells[uTlsTypeOffset] : NULL);
+}
+
+#ifndef DACCESS_COMPILE
+PTR_UInt8 Thread::AllocateThreadLocalStorageForDynamicType(UInt32 uTlsTypeOffset, UInt32 tlsStorageSize, UInt32 numTlsCells)
+{
+    uTlsTypeOffset &= ~DYNAMIC_TYPE_TLS_OFFSET_FLAG;
+
+    if (m_pDynamicTypesTlsCells == NULL || m_numDynamicTypesTlsCells <= uTlsTypeOffset)
+    {
+        // Keep at least a 2x grow so that we don't have to reallocate everytime a new type with TLS statics is created
+        if (numTlsCells < 2 * m_numDynamicTypesTlsCells)
+            numTlsCells = 2 * m_numDynamicTypesTlsCells;
+
+        PTR_UInt8* pTlsCells = new (nothrow) PTR_UInt8[numTlsCells];
+        if (pTlsCells == NULL)
+            return NULL;
+
+        memset(&pTlsCells[m_numDynamicTypesTlsCells], 0, sizeof(PTR_UInt8) * (numTlsCells - m_numDynamicTypesTlsCells));
+
+        if (m_pDynamicTypesTlsCells != NULL)
+        {
+            memcpy(pTlsCells, m_pDynamicTypesTlsCells, sizeof(PTR_UInt8) * m_numDynamicTypesTlsCells);
+            delete[] m_pDynamicTypesTlsCells;
+        }
+
+        m_pDynamicTypesTlsCells = pTlsCells;
+        m_numDynamicTypesTlsCells = numTlsCells;
+    }
+
+    ASSERT(uTlsTypeOffset < m_numDynamicTypesTlsCells);
+
+    if (m_pDynamicTypesTlsCells[uTlsTypeOffset] == NULL)
+    {
+        UInt8* pTlsStorage = new (nothrow) UInt8[tlsStorageSize];
+        if (pTlsStorage == NULL)
+            return NULL;
+
+        // Initialize storage to 0's before returning it
+        memset(pTlsStorage, 0, tlsStorageSize);
+
+        m_pDynamicTypesTlsCells[uTlsTypeOffset] = pTlsStorage;
+    }
+
+    return m_pDynamicTypesTlsCells[uTlsTypeOffset];
+}
+
+#ifndef TARGET_UNIX
+EXTERN_C REDHAWK_API UInt32 __cdecl RhCompatibleReentrantWaitAny(UInt32_BOOL alertable, UInt32 timeout, UInt32 count, HANDLE* pHandles)
+{
+    return PalCompatibleWaitAny(alertable, timeout, count, pHandles, /*allowReentrantWait:*/ TRUE);
+}
+#endif // TARGET_UNIX
+
+FORCEINLINE bool Thread::InlineTryFastReversePInvoke(ReversePInvokeFrame * pFrame)
+{
+    // Do we need to attach the thread?
+    if (!IsStateSet(TSF_Attached))
+        return false; // thread is not attached
+
+    // If the thread is already in cooperative mode, this is a bad transition that will be a fail fast unless we are in 
+    // a do not trigger mode.  The exception to the rule allows us to have [UnmanagedCallersOnly] methods that are called via 
+    // the "restricted GC callouts" as well as from native, which is necessary because the methods are CCW vtable 
+    // methods on interfaces passed to native.
+    if (IsCurrentThreadInCooperativeMode())
+    {
+        if (IsDoNotTriggerGcSet())
+        {
+            // RhpTrapThreads will always be set in this case, so we must skip that check.  We must be sure to 
+            // zero-out our 'previous transition frame' state first, however.
+            pFrame->m_savedPInvokeTransitionFrame = NULL;
+            return true;
+        }
+
+        return false; // bad transition
+    }
+
+    // save the previous transition frame
+    pFrame->m_savedPInvokeTransitionFrame = m_pTransitionFrame;
+
+    // set our mode to cooperative
+    m_pTransitionFrame = NULL;
+
+    // We need to prevent compiler reordering between above write and below read.
+    _ReadWriteBarrier();
+
+    // now check if we need to trap the thread
+    if (ThreadStore::IsTrapThreadsRequested())
+    {
+        // put the previous frame back (sets us back to preemptive mode)
+        m_pTransitionFrame = pFrame->m_savedPInvokeTransitionFrame;
+        return false; // need to trap the thread
+    }
+
+    return true;
+}
+
+EXTERN_C void RhSetRuntimeInitializationCallback(int (*fPtr)())
+{
+    g_RuntimeInitializationCallback = fPtr;
+}
+
+void Thread::ReversePInvokeAttachOrTrapThread(ReversePInvokeFrame * pFrame)
+{
+    if (!IsStateSet(TSF_Attached))
+    {
+        if (g_RuntimeInitializationCallback != NULL && g_RuntimeInitializingThread != this)
+        {
+            EnsureRuntimeInitialized();
+        }
+
+        ThreadStore::AttachCurrentThread();
+    }
+
+    // If the thread is already in cooperative mode, this is a bad transition.
+    if (IsCurrentThreadInCooperativeMode())
+    {
+        // The TSF_DoNotTriggerGc mode is handled by the fast path (InlineTryFastReversePInvoke or equivalent assembly code)
+        ASSERT(!IsDoNotTriggerGcSet());
+
+        // The platform specific assembly PInvoke helpers will route this fault to the class library inferred from the return 
+        // address for nicer error reporting. For configurations without assembly helpers, we are going to fail fast without 
+        // going through the class library here.
+        // RhpReversePInvokeBadTransition(return address);
+        RhFailFast();
+   }
+
+    // save the previous transition frame
+    pFrame->m_savedPInvokeTransitionFrame = m_pTransitionFrame;
+
+    // set our mode to cooperative
+    m_pTransitionFrame = NULL;
+
+    // We need to prevent compiler reordering between above write and below read.
+    _ReadWriteBarrier();
+
+    // now check if we need to trap the thread
+    if (ThreadStore::IsTrapThreadsRequested())
+    {
+        WaitForGC(pFrame->m_savedPInvokeTransitionFrame);
+    }
+}
+
+void Thread::EnsureRuntimeInitialized()
+{
+    while (PalInterlockedCompareExchangePointer((void *volatile *)&g_RuntimeInitializingThread, this, NULL) != NULL)
+    {
+        PalSleep(1);
+    }
+
+    if (g_RuntimeInitializationCallback != NULL)
+    {
+        if (g_RuntimeInitializationCallback() != 0)
+            RhFailFast();
+
+        g_RuntimeInitializationCallback = NULL;
+    }
+
+    PalInterlockedExchangePointer((void *volatile *)&g_RuntimeInitializingThread, NULL);
+}
+
+FORCEINLINE void Thread::InlineReversePInvokeReturn(ReversePInvokeFrame * pFrame)
+{
+    m_pTransitionFrame = pFrame->m_savedPInvokeTransitionFrame;
+    if (ThreadStore::IsTrapThreadsRequested())
+    {
+        RhpWaitForSuspend2();
+    }
+}
+
+FORCEINLINE void Thread::InlinePInvoke(PInvokeTransitionFrame * pFrame)
+{
+    pFrame->m_pThread = this;
+    // set our mode to preemptive
+    m_pTransitionFrame = pFrame;
+
+    // We need to prevent compiler reordering between above write and below read.
+    _ReadWriteBarrier();
+
+    // now check if we need to trap the thread
+    if (ThreadStore::IsTrapThreadsRequested())
+    {
+        RhpWaitForSuspend2();
+    }
+}
+
+FORCEINLINE void Thread::InlinePInvokeReturn(PInvokeTransitionFrame * pFrame)
+{
+    m_pTransitionFrame = NULL;
+    if (ThreadStore::IsTrapThreadsRequested())
+    {
+        RhpWaitForGC2(pFrame);
+    }
+}
+
+Object * Thread::GetThreadAbortException()
+{
+    return m_threadAbortException;
+}
+
+void Thread::SetThreadAbortException(Object *exception)
+{
+    m_threadAbortException = exception;
+}
+
+COOP_PINVOKE_HELPER(Object *, RhpGetThreadAbortException, ())
+{
+    Thread * pCurThread = ThreadStore::RawGetCurrentThread();
+    return pCurThread->GetThreadAbortException();
+}
+
+Object* Thread::GetThreadStaticStorageForModule(UInt32 moduleIndex)
+{
+    // Return a pointer to the TLS storage if it has already been
+    // allocated for the specified module.
+    if (moduleIndex < m_numThreadLocalModuleStatics)
+    {
+        Object** threadStaticsStorageHandle = (Object**)m_pThreadLocalModuleStatics[moduleIndex];
+        if (threadStaticsStorageHandle != NULL)
+        {
+            return *threadStaticsStorageHandle;
+        }
+    }
+
+    return NULL;
+}
+
+Boolean Thread::SetThreadStaticStorageForModule(Object * pStorage, UInt32 moduleIndex)
+{
+    // Grow thread local storage if needed.
+    if (m_numThreadLocalModuleStatics <= moduleIndex)
+    {
+        UInt32 newSize = moduleIndex + 1;
+        if (newSize < moduleIndex)
+        {
+            return FALSE;
+        }
+
+        PTR_PTR_VOID pThreadLocalModuleStatics = new (nothrow) PTR_VOID[newSize];
+        if (pThreadLocalModuleStatics == NULL)
+        {
+            return FALSE;
+        }
+
+        memset(&pThreadLocalModuleStatics[m_numThreadLocalModuleStatics], 0, sizeof(PTR_VOID) * (newSize - m_numThreadLocalModuleStatics));
+
+        if (m_pThreadLocalModuleStatics != NULL)
+        {
+            memcpy(pThreadLocalModuleStatics, m_pThreadLocalModuleStatics, sizeof(PTR_VOID) * m_numThreadLocalModuleStatics);
+            delete[] m_pThreadLocalModuleStatics;
+        }
+
+        m_pThreadLocalModuleStatics = pThreadLocalModuleStatics;
+        m_numThreadLocalModuleStatics = newSize;
+    }
+
+    if (m_pThreadLocalModuleStatics[moduleIndex] != NULL)
+    {
+        RhHandleSet(m_pThreadLocalModuleStatics[moduleIndex], pStorage);
+    }
+    else
+    {
+        void* threadStaticsStorageHandle = RhpHandleAlloc(pStorage, 2 /* Normal */);
+        if (threadStaticsStorageHandle == NULL)
+        {
+            return FALSE;
+        }
+        m_pThreadLocalModuleStatics[moduleIndex] = threadStaticsStorageHandle;
+    }
+
+    return TRUE;
+}
+
+COOP_PINVOKE_HELPER(Object*, RhGetThreadStaticStorageForModule, (UInt32 moduleIndex))
+{
+    Thread * pCurrentThread = ThreadStore::RawGetCurrentThread();
+    return pCurrentThread->GetThreadStaticStorageForModule(moduleIndex);
+}
+
+COOP_PINVOKE_HELPER(Boolean, RhSetThreadStaticStorageForModule, (Array * pStorage, UInt32 moduleIndex))
+{
+    Thread * pCurrentThread = ThreadStore::RawGetCurrentThread();
+    return pCurrentThread->SetThreadStaticStorageForModule((Object*)pStorage, moduleIndex);
+}
+
+// This is function is used to quickly query a value that can uniquely identify a thread
+COOP_PINVOKE_HELPER(UInt8*, RhCurrentNativeThreadId, ())
+{
+#ifndef TARGET_UNIX
+    return PalNtCurrentTeb();
+#else
+    return (UInt8*)ThreadStore::RawGetCurrentThread();
+#endif // TARGET_UNIX
+}
+
+// This function is used to get the OS thread identifier for the current thread.
+COOP_PINVOKE_HELPER(UInt64, RhCurrentOSThreadId, ())
+{
+    return PalGetCurrentThreadIdForLogging();
+}
+
+// Standard calling convention variant and actual implementation for RhpReversePInvokeAttachOrTrapThread
+EXTERN_C NOINLINE void FASTCALL RhpReversePInvokeAttachOrTrapThread2(ReversePInvokeFrame * pFrame)
+{
+    ASSERT(pFrame->m_savedThread == ThreadStore::RawGetCurrentThread());
+    pFrame->m_savedThread->ReversePInvokeAttachOrTrapThread(pFrame);
+}
+
+//
+// PInvoke
+//
+
+// Standard calling convention variant of RhpReversePInvoke
+COOP_PINVOKE_HELPER(void, RhpReversePInvoke2, (ReversePInvokeFrame * pFrame))
+{
+    Thread * pCurThread = ThreadStore::RawGetCurrentThread();
+    pFrame->m_savedThread = pCurThread;
+    if (pCurThread->InlineTryFastReversePInvoke(pFrame))
+        return;
+
+    RhpReversePInvokeAttachOrTrapThread2(pFrame);
+}
+
+// Standard calling convention variant of RhpReversePInvokeReturn
+COOP_PINVOKE_HELPER(void, RhpReversePInvokeReturn2, (ReversePInvokeFrame * pFrame))
+{
+    pFrame->m_savedThread->InlineReversePInvokeReturn(pFrame);
+}
+
+#ifdef USE_PORTABLE_HELPERS
+
+COOP_PINVOKE_HELPER(void, RhpPInvoke2, (PInvokeTransitionFrame* pFrame))
+{
+    Thread * pCurThread = ThreadStore::RawGetCurrentThread();
+    pCurThread->InlinePInvoke(pFrame);
+}
+
+COOP_PINVOKE_HELPER(void, RhpPInvokeReturn2, (PInvokeTransitionFrame* pFrame))
+{
+    //reenter cooperative mode
+    pFrame->m_pThread->InlinePInvokeReturn(pFrame);
+}
+
+#endif //USE_PORTABLE_HELPERS
+
+#endif // !DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/thread.h b/src/coreclr/src/nativeaot/Runtime/thread.h
new file mode 100644
index 0000000000000..50e24e27a28e6
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/thread.h
@@ -0,0 +1,305 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "forward_declarations.h"
+
+struct gc_alloc_context;
+class RuntimeInstance;
+class ThreadStore;
+class CLREventStatic;
+class Thread;
+
+// The offsets of some fields in the thread (in particular, m_pTransitionFrame) are known to the compiler and get 
+// inlined into the code.  Let's make sure they don't change just because we enable/disable server GC in a particular
+// runtime build.
+#define KEEP_THREAD_LAYOUT_CONSTANT
+
+#ifndef HOST_64BIT
+# if defined(FEATURE_SVR_GC) || defined(KEEP_THREAD_LAYOUT_CONSTANT)
+#  define SIZEOF_ALLOC_CONTEXT 40
+# else
+#  define SIZEOF_ALLOC_CONTEXT 28
+# endif
+#else // HOST_64BIT
+# if defined(FEATURE_SVR_GC) || defined(KEEP_THREAD_LAYOUT_CONSTANT)
+#  define SIZEOF_ALLOC_CONTEXT 56
+# else
+#  define SIZEOF_ALLOC_CONTEXT 40
+# endif
+#endif // HOST_64BIT
+
+#define TOP_OF_STACK_MARKER ((PTR_VOID)(UIntNative)(IntNative)-1)
+
+#define DYNAMIC_TYPE_TLS_OFFSET_FLAG 0x80000000
+
+
+enum SyncRequestResult
+{
+    TryAgain,
+    SuccessUnmanaged,
+    SuccessManaged,
+};
+
+typedef DPTR(PAL_LIMITED_CONTEXT) PTR_PAL_LIMITED_CONTEXT;
+
+struct ExInfo;
+typedef DPTR(ExInfo) PTR_ExInfo;
+
+
+// Also defined in ExceptionHandling.cs, layouts must match.
+// When adding new fields to this struct, ensure they get properly initialized in the exception handling 
+// assembly stubs
+struct ExInfo
+{
+
+    PTR_ExInfo              m_pPrevExInfo;
+    PTR_PAL_LIMITED_CONTEXT m_pExContext;
+    PTR_Object              m_exception;  // actual object reference, specially reported by GcScanRootsWorker
+    ExKind                  m_kind;
+    UInt8                   m_passNumber;
+    UInt32                  m_idxCurClause;
+    StackFrameIterator      m_frameIter;
+    volatile void*          m_notifyDebuggerSP;
+};
+
+struct ThreadBuffer
+{
+    UInt8                   m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT];
+    UInt32 volatile         m_ThreadStateFlags;                     // see Thread::ThreadStateFlags enum
+#if DACCESS_COMPILE
+    PTR_VOID                m_pTransitionFrame;
+#else
+    PTR_VOID volatile       m_pTransitionFrame;
+#endif
+    PTR_VOID                m_pHackPInvokeTunnel;                   // see Thread::EnablePreemptiveMode
+    PTR_VOID                m_pCachedTransitionFrame;
+    PTR_Thread              m_pNext;                                // used by ThreadStore's SList<Thread>
+    HANDLE                  m_hPalThread;                           // WARNING: this may legitimately be INVALID_HANDLE_VALUE
+    void **                 m_ppvHijackedReturnAddressLocation;
+    void *                  m_pvHijackedReturnAddress;
+#ifdef HOST_64BIT
+    UIntNative              m_uHijackedReturnValueFlags;            // used on ARM64 only; however, ARM64 and AMD64 share field offsets
+#endif // HOST_64BIT
+    PTR_ExInfo              m_pExInfoStackHead;
+    Object*                 m_threadAbortException;                 // ThreadAbortException instance -set only during thread abort
+    PTR_VOID                m_pStackLow;
+    PTR_VOID                m_pStackHigh;
+    PTR_UInt8               m_pTEB;                                 // Pointer to OS TEB structure for this thread
+    UInt64                  m_uPalThreadIdForLogging;               // @TODO: likely debug-only 
+    EEThreadId              m_threadId;               
+    PTR_VOID                m_pThreadStressLog;                     // pointer to head of thread's StressLogChunks
+#ifdef FEATURE_GC_STRESS
+    UInt32                  m_uRand;                                // current per-thread random number
+#endif // FEATURE_GC_STRESS
+
+    // Thread Statics Storage for dynamic types
+    UInt32          m_numDynamicTypesTlsCells;
+    PTR_PTR_UInt8   m_pDynamicTypesTlsCells;
+
+    PTR_PTR_VOID    m_pThreadLocalModuleStatics;
+    UInt32          m_numThreadLocalModuleStatics;
+};
+
+struct ReversePInvokeFrame
+{
+    void*   m_savedPInvokeTransitionFrame;
+    Thread* m_savedThread;
+};
+
+class Thread : private ThreadBuffer
+{
+    friend class AsmOffsets;
+    friend struct DefaultSListTraits<Thread>;
+    friend class ThreadStore;
+    IN_DAC(friend class ClrDataAccess;)
+
+public:
+    enum ThreadStateFlags
+    {
+        TSF_Unknown             = 0x00000000,       // Threads are created in this state
+        TSF_Attached            = 0x00000001,       // Thread was inited by first U->M transition on this thread
+        TSF_Detached            = 0x00000002,       // Thread was detached by DllMain
+        TSF_SuppressGcStress    = 0x00000008,       // Do not allow gc stress on this thread, used in DllMain
+                                                    // ...and on the Finalizer thread
+        TSF_DoNotTriggerGc      = 0x00000010,       // Do not allow hijacking of this thread, also intended to
+                                                    // ...be checked during allocations in debug builds.
+        TSF_IsGcSpecialThread   = 0x00000020,       // Set to indicate a GC worker thread used for background GC
+#ifdef FEATURE_GC_STRESS
+        TSF_IsRandSeedSet       = 0x00000040,       // set to indicate the random number generator for GCStress was inited
+#endif // FEATURE_GC_STRESS
+    };
+private:
+
+    void Construct();
+
+    void SetState(ThreadStateFlags flags);
+    void ClearState(ThreadStateFlags flags);
+    bool IsStateSet(ThreadStateFlags flags);
+
+    static UInt32_BOOL HijackCallback(HANDLE hThread, PAL_LIMITED_CONTEXT* pThreadContext, void* pCallbackContext);
+    bool InternalHijack(PAL_LIMITED_CONTEXT * pSuspendCtx, void * pvHijackTargets[]);
+
+    bool CacheTransitionFrameForSuspend();
+    void ResetCachedTransitionFrame();
+    void CrossThreadUnhijack();
+    void UnhijackWorker();
+    void EnsureRuntimeInitialized();
+#ifdef _DEBUG
+    bool DebugIsSuspended();
+#endif
+
+    // 
+    // SyncState members
+    //
+    PTR_VOID    GetTransitionFrame();
+
+    void GcScanRootsWorker(void * pfnEnumCallback, void * pvCallbackData, StackFrameIterator & sfIter);
+
+public:
+
+
+    void Destroy();
+
+    bool                IsInitialized();
+
+    gc_alloc_context *  GetAllocContext();  // @TODO: I would prefer to not expose this in this way
+
+#ifndef DACCESS_COMPILE
+    UInt64              GetPalThreadIdForLogging();
+    bool                IsCurrentThread();
+
+    void                GcScanRoots(void * pfnEnumCallback, void * pvCallbackData);
+#else
+    typedef void GcScanRootsCallbackFunc(PTR_RtuObjectRef ppObject, void* token, UInt32 flags);
+    bool GcScanRoots(GcScanRootsCallbackFunc * pfnCallback, void * token, PTR_PAL_LIMITED_CONTEXT pInitialContext);
+#endif
+
+    bool                Hijack();
+    void                Unhijack();
+#ifdef FEATURE_GC_STRESS
+    static void         HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx);
+#endif // FEATURE_GC_STRESS
+    bool                IsHijacked();
+    void *              GetHijackedReturnAddress();
+    void *              GetUnhijackedReturnAddress(void** ppvReturnAddressLocation);
+    bool                DangerousCrossThreadIsHijacked();
+
+    bool                IsSuppressGcStressSet();
+    void                SetSuppressGcStress();
+    void                ClearSuppressGcStress();
+    bool                IsWithinStackBounds(PTR_VOID p);
+
+    void                GetStackBounds(PTR_VOID * ppStackLow, PTR_VOID * ppStackHigh);
+
+    PTR_UInt8           AllocateThreadLocalStorageForDynamicType(UInt32 uTlsTypeOffset, UInt32 tlsStorageSize, UInt32 numTlsCells);
+    // mrt100 Debugger (dac) has dependencies on the GetThreadLocalStorageForDynamicType method.
+    PTR_UInt8           GetThreadLocalStorageForDynamicType(UInt32 uTlsTypeOffset);
+    PTR_UInt8           GetThreadLocalStorage(UInt32 uTlsIndex, UInt32 uTlsStartOffset);
+    PTR_UInt8           GetTEB();
+
+    void                PushExInfo(ExInfo * pExInfo);
+    void                ValidateExInfoPop(ExInfo * pExInfo, void * limitSP);
+    void                ValidateExInfoStack();
+    bool                IsDoNotTriggerGcSet();
+    void                SetDoNotTriggerGc();
+    void                ClearDoNotTriggerGc();
+
+    bool                IsDetached();
+    void                SetDetached();
+
+    PTR_VOID            GetThreadStressLog() const;
+#ifndef DACCESS_COMPILE
+    void                SetThreadStressLog(void * ptsl);
+#endif // DACCESS_COMPILE
+#ifdef FEATURE_GC_STRESS
+    void                SetRandomSeed(UInt32 seed);
+    UInt32              NextRand();
+    bool                IsRandInited();
+#endif // FEATURE_GC_STRESS
+    PTR_ExInfo          GetCurExInfo();
+
+    bool                IsCurrentThreadInCooperativeMode();
+
+    PTR_VOID            GetTransitionFrameForStackTrace();
+    void *              GetCurrentThreadPInvokeReturnAddress();
+
+    static bool         IsHijackTarget(void * address);
+
+    //
+    // The set of operations used to support unmanaged code running in cooperative mode
+    //
+    void                EnablePreemptiveMode();
+    void                DisablePreemptiveMode();
+
+    // Set the m_pHackPInvokeTunnel field for GC allocation helpers that setup transition frame 
+    // in assembly code. Do not use anywhere else.
+    void                SetCurrentThreadPInvokeTunnelForGcAlloc(void * pTransitionFrame);
+
+    // Setup the m_pHackPInvokeTunnel field for GC helpers entered via regular PInvoke.
+    // Do not use anywhere else.
+    void                SetupHackPInvokeTunnel();
+
+    //
+    // GC support APIs - do not use except from GC itself
+    //
+    void SetGCSpecial(bool isGCSpecial);
+    bool IsGCSpecial();
+    bool CatchAtSafePoint();
+
+    //
+    // Managed/unmanaged interop transitions support APIs
+    //
+    void WaitForSuspend();
+    void WaitForGC(void * pTransitionFrame);
+
+    void ReversePInvokeAttachOrTrapThread(ReversePInvokeFrame * pFrame);
+
+    bool InlineTryFastReversePInvoke(ReversePInvokeFrame * pFrame);
+    void InlineReversePInvokeReturn(ReversePInvokeFrame * pFrame);
+
+    void InlinePInvoke(PInvokeTransitionFrame * pFrame);
+    void InlinePInvokeReturn(PInvokeTransitionFrame * pFrame);
+
+    Object * GetThreadAbortException();
+    void SetThreadAbortException(Object *exception);
+
+    Object* GetThreadStaticStorageForModule(UInt32 moduleIndex);
+    Boolean SetThreadStaticStorageForModule(Object * pStorage, UInt32 moduleIndex);
+};
+
+#ifndef __GCENV_BASE_INCLUDED__
+typedef DPTR(Object) PTR_Object;
+typedef DPTR(PTR_Object) PTR_PTR_Object;
+#endif // !__GCENV_BASE_INCLUDED__
+#ifdef DACCESS_COMPILE
+
+// The DAC uses DebuggerEnumGcRefContext in place of a GCCONTEXT when doing reference
+// enumeration. The GC passes through additional data in the ScanContext which the debugger
+// neither has nor needs. While we could refactor the GC code to make an interface
+// with less coupling, that might affect perf or make integration messier. Instead
+// we use some typedefs so DAC and runtime can get strong yet distinct types.
+
+
+// Ideally we wouldn't need this wrapper, but PromoteCarefully needs access to the
+// thread and a promotion field. We aren't assuming the user's token will have this data.
+struct DacScanCallbackData
+{
+    Thread* thread_under_crawl;               // the thread being scanned
+    bool promotion;                           // are we emulating the GC promote phase or relocate phase?
+                                              // different references are reported for each
+    void* token;                              // the callback data passed to GCScanRoots
+    void* pfnUserCallback;                    // the callback passed in to GcScanRoots
+    uintptr_t stack_limit;                    // Lowest point on the thread stack that the scanning logic is permitted to read
+};
+
+typedef DacScanCallbackData EnumGcRefScanContext;
+typedef void EnumGcRefCallbackFunc(PTR_PTR_Object, EnumGcRefScanContext* callbackData, UInt32 flags);
+
+#else // DACCESS_COMPILE
+#ifndef __GCENV_BASE_INCLUDED__
+struct ScanContext;
+typedef void promote_func(PTR_PTR_Object, ScanContext*, unsigned);
+#endif // !__GCENV_BASE_INCLUDED__
+typedef promote_func EnumGcRefCallbackFunc;
+typedef ScanContext  EnumGcRefScanContext;
+
+#endif // DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/thread.inl b/src/coreclr/src/nativeaot/Runtime/thread.inl
new file mode 100644
index 0000000000000..cf0127d135b2c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/thread.inl
@@ -0,0 +1,31 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef DACCESS_COMPILE
+inline void Thread::SetCurrentThreadPInvokeTunnelForGcAlloc(void * pTransitionFrame)
+{
+    ASSERT(ThreadStore::GetCurrentThread() == this);
+    ASSERT(Thread::IsCurrentThreadInCooperativeMode());
+    m_pHackPInvokeTunnel = pTransitionFrame;
+}
+
+inline void Thread::SetupHackPInvokeTunnel()
+{
+    ASSERT(ThreadStore::GetCurrentThread() == this);
+    ASSERT(!Thread::IsCurrentThreadInCooperativeMode());
+    m_pHackPInvokeTunnel = m_pTransitionFrame;
+}
+#endif // DACCESS_COMPILE
+
+inline bool Thread::IsWithinStackBounds(PTR_VOID p)
+{
+    ASSERT((m_pStackLow != 0) && (m_pStackHigh != 0));
+    return (m_pStackLow <= p) && (p < m_pStackHigh);
+}
+
+inline void Thread::GetStackBounds(PTR_VOID * ppStackLow, PTR_VOID * ppStackHigh)
+{
+    ASSERT((m_pStackLow != 0) && (m_pStackHigh != 0));
+    *ppStackLow = m_pStackLow;
+    *ppStackHigh = m_pStackHigh;
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/threadstore.cpp b/src/coreclr/src/nativeaot/Runtime/threadstore.cpp
new file mode 100644
index 0000000000000..bababb625493a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/threadstore.cpp
@@ -0,0 +1,540 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+#include "gcenv.h"
+#include "gcheaputilities.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "gcrhinterface.h"
+#include "varint.h"
+#include "regdisplay.h"
+#include "StackFrameIterator.h"
+#include "thread.h"
+#include "holder.h"
+#include "rhbinder.h"
+#include "RWLock.h"
+#include "threadstore.h"
+#include "threadstore.inl"
+#include "RuntimeInstance.h"
+#include "TargetPtrs.h"
+#include "yieldprocessornormalized.h"
+
+#include "slist.inl"
+#include "GCMemoryHelpers.h"
+
+#include "Debug.h"
+#include "DebugEventSource.h"
+#include "DebugFuncEval.h"
+
+EXTERN_C volatile UInt32 RhpTrapThreads = (UInt32)TrapThreadsFlags::None;
+
+GVAL_IMPL_INIT(PTR_Thread, RhpSuspendingThread, 0);
+
+ThreadStore * GetThreadStore()
+{
+    return GetRuntimeInstance()->GetThreadStore();
+}
+
+ThreadStore::Iterator::Iterator() :
+    m_readHolder(&GetThreadStore()->m_Lock),
+    m_pCurrentPosition(GetThreadStore()->m_ThreadList.GetHead())
+{
+}
+
+ThreadStore::Iterator::~Iterator()
+{
+}
+
+PTR_Thread ThreadStore::Iterator::GetNext()
+{
+    PTR_Thread pResult = m_pCurrentPosition;
+    if (NULL != pResult)
+        m_pCurrentPosition = pResult->m_pNext;
+    return pResult;
+}
+
+//static
+PTR_Thread ThreadStore::GetSuspendingThread()
+{
+    return (RhpSuspendingThread);
+}
+
+#ifndef DACCESS_COMPILE
+
+
+ThreadStore::ThreadStore() : 
+    m_ThreadList(),
+    m_Lock(true /* writers (i.e. attaching/detaching threads) should wait on GC event */)
+{
+    SaveCurrentThreadOffsetForDAC();
+}
+
+ThreadStore::~ThreadStore()
+{
+}
+
+// static 
+ThreadStore * ThreadStore::Create(RuntimeInstance * pRuntimeInstance)
+{
+    NewHolder<ThreadStore> pNewThreadStore = new (nothrow) ThreadStore();
+    if (NULL == pNewThreadStore)
+        return NULL;
+
+    if (!pNewThreadStore->m_SuspendCompleteEvent.CreateManualEventNoThrow(true))
+        return NULL;
+
+    pNewThreadStore->m_pRuntimeInstance = pRuntimeInstance;
+
+    pNewThreadStore.SuppressRelease();
+    return pNewThreadStore;
+}
+
+void ThreadStore::Destroy()
+{
+    delete this;
+}
+
+// static 
+void ThreadStore::AttachCurrentThread(bool fAcquireThreadStoreLock)
+{
+    //
+    // step 1: ThreadStore::InitCurrentThread
+    // step 2: add this thread to the ThreadStore
+    //
+
+    // The thread has been constructed, during which some data is initialized (like which RuntimeInstance the
+    // thread belongs to), but it hasn't been added to the thread store because doing so takes a lock, which 
+    // we want to avoid at construction time because the loader lock is held then.
+    Thread * pAttachingThread = RawGetCurrentThread();
+
+    // The thread was already initialized, so it is already attached
+    if (pAttachingThread->IsInitialized())
+    {
+        return;
+    }
+
+    PalAttachThread(pAttachingThread);
+
+    //
+    // Init the thread buffer
+    //
+    pAttachingThread->Construct();
+    ASSERT(pAttachingThread->m_ThreadStateFlags == Thread::TSF_Unknown);
+
+    // The runtime holds the thread store lock for the duration of thread suspension for GC, so let's check to 
+    // see if that's going on and, if so, use a proper wait instead of the RWL's spinning.  NOTE: when we are 
+    // called with fAcquireThreadStoreLock==false, we are being called in a situation where the GC is trying to 
+    // init a GC thread, so we must honor the flag to mean "do not block on GC" or else we will deadlock.
+    if (fAcquireThreadStoreLock && (RhpTrapThreads != (UInt32)TrapThreadsFlags::None))
+        RedhawkGCInterface::WaitForGCCompletion();
+
+    ThreadStore* pTS = GetThreadStore();
+    ReaderWriterLock::WriteHolder write(&pTS->m_Lock, fAcquireThreadStoreLock);
+
+    //
+    // Set thread state to be attached
+    //
+    ASSERT(pAttachingThread->m_ThreadStateFlags == Thread::TSF_Unknown);
+    pAttachingThread->m_ThreadStateFlags = Thread::TSF_Attached;
+
+    pTS->m_ThreadList.PushHead(pAttachingThread);
+}
+
+// static 
+void ThreadStore::AttachCurrentThread()
+{
+    AttachCurrentThread(true);
+}
+
+void ThreadStore::DetachCurrentThread()
+{
+    // The thread may not have been initialized because it may never have run managed code before.
+    Thread * pDetachingThread = RawGetCurrentThread();
+
+    // The thread was not initialized yet, so it was not attached
+    if (!pDetachingThread->IsInitialized())
+    {
+        return;
+    }
+
+    if (!PalDetachThread(pDetachingThread))
+    {
+        return;
+    }
+
+#ifdef STRESS_LOG
+    ThreadStressLog * ptsl = reinterpret_cast<ThreadStressLog *>(
+        pDetachingThread->GetThreadStressLog());
+    StressLog::ThreadDetach(ptsl);
+#endif // STRESS_LOG
+
+    ThreadStore* pTS = GetThreadStore();
+    ReaderWriterLock::WriteHolder write(&pTS->m_Lock);
+    ASSERT(rh::std::count(pTS->m_ThreadList.Begin(), pTS->m_ThreadList.End(), pDetachingThread) == 1);
+    pTS->m_ThreadList.RemoveFirst(pDetachingThread);
+    pDetachingThread->Destroy();
+}
+
+// Used by GC to prevent new threads during a GC.  New threads must take a write lock to 
+// modify the list, but they won't be allowed to until all outstanding read locks are 
+// released.  This way, the GC always enumerates a consistent set of threads each time 
+// it enumerates threads between SuspendAllThreads and ResumeAllThreads.
+//
+// @TODO: Investigate if this requirement is actually necessary.  Threads already may 
+// not enter managed code during GC, so if new threads are added to the thread store, 
+// but haven't yet entered managed code, is that really a problem?
+//
+// @TODO: Investigate the suspend/resume algorithm's dependence on this lock's side-
+// effect of being a memory barrier.
+void ThreadStore::LockThreadStore()
+{
+    m_Lock.AcquireReadLock();
+}
+
+void ThreadStore::UnlockThreadStore()
+{ 
+    m_Lock.ReleaseReadLock();
+}
+
+void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
+{
+    ThreadStore::SuspendAllThreads(waitForGCEvent, /* fireDebugEvent = */ true);
+}
+
+void ThreadStore::SuspendAllThreads(bool waitForGCEvent, bool fireDebugEvent)
+{    
+    // 
+    // SuspendAllThreads requires all threads running
+    // 
+    // Threads are by default frozen by the debugger during FuncEval
+    // Therefore, in case of FuncEval, we need to inform the debugger 
+    // to unfreeze the threads.
+    // 
+    if (fireDebugEvent && DebugFuncEval::GetMostRecentFuncEvalHijackInstructionPointer() != 0)
+    {
+        struct DebuggerFuncEvalCrossThreadDependencyNotification crossThreadDependencyEventPayload;
+        crossThreadDependencyEventPayload.kind = DebuggerResponseKind::FuncEvalCrossThreadDependency;
+        crossThreadDependencyEventPayload.payload = 0;
+        DebugEventSource::SendCustomEvent(&crossThreadDependencyEventPayload, sizeof(struct DebuggerFuncEvalCrossThreadDependencyNotification));
+    }
+
+    Thread * pThisThread = GetCurrentThreadIfAvailable();
+
+    LockThreadStore();
+
+    RhpSuspendingThread = pThisThread;
+
+    if (waitForGCEvent)
+    {
+        GCHeapUtilities::GetGCHeap()->ResetWaitForGCEvent();
+    }
+    m_SuspendCompleteEvent.Reset();
+
+    // set the global trap for pinvoke leave and return
+    RhpTrapThreads |= (UInt32)TrapThreadsFlags::TrapThreads;
+
+    // Set each module's loop hijack flag
+    GetRuntimeInstance()->SetLoopHijackFlags(RhpTrapThreads);
+
+    // Our lock-free algorithm depends on flushing write buffers of all processors running RH code.  The
+    // reason for this is that we essentially implement Dekker's algorithm, which requires write ordering.
+    PalFlushProcessWriteBuffers();
+
+    bool keepWaiting;
+    YieldProcessorNormalizationInfo normalizationInfo;
+    do
+    {
+        keepWaiting = false;
+        FOREACH_THREAD(pTargetThread)
+        {
+            if (pTargetThread == pThisThread)
+                continue;
+
+            if (!pTargetThread->CacheTransitionFrameForSuspend())
+            {
+                // We drive all threads to preemptive mode by hijacking them with both a
+                // return-address hijack and loop hijacks.
+                keepWaiting = true;
+                pTargetThread->Hijack();
+            }
+            else if (pTargetThread->DangerousCrossThreadIsHijacked())
+            {
+                // Once a thread is safely in preemptive mode, we must wait until it is also 
+                // unhijacked.  This is done because, otherwise, we might race on into the 
+                // stackwalk and find the hijack still on the stack, which will cause the 
+                // stackwalking code to crash.
+                keepWaiting = true;
+            }
+        }
+        END_FOREACH_THREAD
+
+        if (keepWaiting)
+        {
+            if (PalSwitchToThread() == 0 && g_RhSystemInfo.dwNumberOfProcessors > 1)
+            {
+                // No threads are scheduled on this processor.  Perhaps we're waiting for a thread
+                // that's scheduled on another processor.  If so, let's give it a little time
+                // to make forward progress.  
+                // Note that we do not call Sleep, because the minimum granularity of Sleep is much
+                // too long (we probably don't need a 15ms wait here).  Instead, we'll just burn some
+                // cycles.
+    	        // @TODO: need tuning for spin
+                YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, 10000);
+            }
+        }
+
+    } while (keepWaiting);
+
+    m_SuspendCompleteEvent.Set();
+}
+
+void ThreadStore::ResumeAllThreads(bool waitForGCEvent)
+{
+    FOREACH_THREAD(pTargetThread)
+    {
+        pTargetThread->ResetCachedTransitionFrame();
+    }
+    END_FOREACH_THREAD
+
+    RhpTrapThreads &= ~(UInt32)TrapThreadsFlags::TrapThreads;
+
+    // Reset module's hijackLoops flag 
+    GetRuntimeInstance()->SetLoopHijackFlags(0);
+
+    RhpSuspendingThread = NULL;
+    if (waitForGCEvent)
+    {
+        GCHeapUtilities::GetGCHeap()->SetWaitForGCEvent();
+    }
+    UnlockThreadStore();
+} // ResumeAllThreads
+
+void ThreadStore::WaitForSuspendComplete()
+{
+    UInt32 waitResult = m_SuspendCompleteEvent.Wait(INFINITE, false);
+    if (waitResult == WAIT_FAILED)
+        RhFailFast();
+}
+
+#ifndef DACCESS_COMPILE
+
+void ThreadStore::InitiateThreadAbort(Thread* targetThread, Object * threadAbortException, bool doRudeAbort)
+{
+    SuspendAllThreads(/* waitForGCEvent = */ false, /* fireDebugEvent = */ false);
+    // TODO: consider enabling multiple thread aborts running in parallel on different threads
+    ASSERT((RhpTrapThreads & (UInt32)TrapThreadsFlags::AbortInProgress) == 0);
+    RhpTrapThreads |= (UInt32)TrapThreadsFlags::AbortInProgress;
+
+    targetThread->SetThreadAbortException(threadAbortException);
+
+    // TODO: Stage 2: Queue APC to the target thread to break out of possible wait
+
+    bool initiateAbort = false;
+
+    if (!doRudeAbort)
+    {
+        // TODO: Stage 3: protected regions (finally, catch) handling
+        //  If it was in a protected region, set the "throw at protected region end" flag on the native Thread object
+        // TODO: Stage 4: reverse PInvoke handling
+        //  If there was a reverse Pinvoke frame between the current frame and the funceval frame of the target thread, 
+        //  find the outermost reverse Pinvoke frame below the funceval frame and set the thread abort flag in its transition frame.
+        //  If both of these cases happened at once, find out which one of the outermost frame of the protected region
+        //  and the outermost reverse Pinvoke frame is closer to the funceval frame and perform one of the two actions
+        //  described above based on the one that's closer.
+        initiateAbort = true;
+    }
+    else
+    {
+        initiateAbort = true;
+    }
+
+    if (initiateAbort)
+    {
+        PInvokeTransitionFrame* transitionFrame = reinterpret_cast<PInvokeTransitionFrame*>(targetThread->GetTransitionFrame());
+        transitionFrame->m_Flags |= PTFF_THREAD_ABORT;
+    }
+
+    ResumeAllThreads(/* waitForGCEvent = */ false);
+}
+
+void ThreadStore::CancelThreadAbort(Thread* targetThread)
+{
+    SuspendAllThreads(/* waitForGCEvent = */ false, /* fireDebugEvent = */ false);
+
+    ASSERT((RhpTrapThreads & (UInt32)TrapThreadsFlags::AbortInProgress) != 0);
+    RhpTrapThreads &= ~(UInt32)TrapThreadsFlags::AbortInProgress;
+
+    PInvokeTransitionFrame* transitionFrame = reinterpret_cast<PInvokeTransitionFrame*>(targetThread->GetTransitionFrame());
+    if (transitionFrame != nullptr)
+    {
+        transitionFrame->m_Flags &= ~PTFF_THREAD_ABORT;
+    }
+
+    targetThread->SetThreadAbortException(nullptr);
+
+    ResumeAllThreads(/* waitForGCEvent = */ false);
+}
+
+COOP_PINVOKE_HELPER(void *, RhpGetCurrentThread, ())
+{
+    return ThreadStore::GetCurrentThread();
+}
+
+COOP_PINVOKE_HELPER(void, RhpInitiateThreadAbort, (void* thread, Object * threadAbortException, Boolean doRudeAbort))
+{
+    GetThreadStore()->InitiateThreadAbort((Thread*)thread, threadAbortException, doRudeAbort);
+}
+
+COOP_PINVOKE_HELPER(void, RhpCancelThreadAbort, (void* thread))
+{
+    GetThreadStore()->CancelThreadAbort((Thread*)thread);
+}
+
+#endif // DACCESS_COMPILE
+
+C_ASSERT(sizeof(Thread) == sizeof(ThreadBuffer));
+
+EXTERN_C DECLSPEC_THREAD ThreadBuffer tls_CurrentThread =
+{ 
+    { 0 },                              // m_rgbAllocContextBuffer
+    Thread::TSF_Unknown,                // m_ThreadStateFlags
+    TOP_OF_STACK_MARKER,                // m_pTransitionFrame
+    TOP_OF_STACK_MARKER,                // m_pHackPInvokeTunnel
+    0,                                  // m_pCachedTransitionFrame
+    0,                                  // m_pNext
+    INVALID_HANDLE_VALUE,               // m_hPalThread
+    0,                                  // m_ppvHijackedReturnAddressLocation
+    0,                                  // m_pvHijackedReturnAddress
+    0,                                  // all other fields are initialized by zeroes
+};
+
+#endif // !DACCESS_COMPILE
+
+#ifdef _WIN32
+
+#ifndef DACCESS_COMPILE
+
+// Keep a global variable in the target process which contains
+// the address of _tls_index.  This is the breadcrumb needed
+// by DAC to read _tls_index since we don't control the 
+// declaration of _tls_index directly.
+
+// volatile to prevent the compiler from removing the unused global variable
+volatile UInt32 * p_tls_index;
+volatile UInt32 SECTIONREL__tls_CurrentThread;
+
+EXTERN_C UInt32 _tls_index;
+#if defined(TARGET_ARM64)
+// ARM64TODO: Re-enable optimization
+#pragma optimize("", off)
+#endif
+void ThreadStore::SaveCurrentThreadOffsetForDAC()
+{
+    p_tls_index = &_tls_index;
+
+    UInt8 * pTls = *(UInt8 **)(PalNtCurrentTeb() + OFFSETOF__TEB__ThreadLocalStoragePointer);
+
+    UInt8 * pOurTls = *(UInt8 **)(pTls + (_tls_index * sizeof(void*)));
+
+    SECTIONREL__tls_CurrentThread = (UInt32)((UInt8 *)&tls_CurrentThread - pOurTls);
+}
+#if defined(TARGET_ARM64)
+#pragma optimize("", on)
+#endif
+#else // DACCESS_COMPILE
+
+GPTR_IMPL(UInt32, p_tls_index);
+GVAL_IMPL(UInt32, SECTIONREL__tls_CurrentThread);
+
+//
+// This routine supports the !Thread debugger extension routine
+//
+typedef DPTR(TADDR) PTR_TADDR;
+// static
+PTR_Thread ThreadStore::GetThreadFromTEB(TADDR pTEB)
+{
+    if (pTEB == NULL)
+        return NULL;
+
+    UInt32 tlsIndex = *p_tls_index;
+    TADDR pTls = *(PTR_TADDR)(pTEB + OFFSETOF__TEB__ThreadLocalStoragePointer);
+    if (pTls == NULL)
+        return NULL;
+
+    TADDR pOurTls = *(PTR_TADDR)(pTls + (tlsIndex * sizeof(void*)));
+    if (pOurTls == NULL)
+        return NULL;
+
+    return (PTR_Thread)(pOurTls + SECTIONREL__tls_CurrentThread);
+}
+
+#endif // DACCESS_COMPILE
+
+#else // _WIN32
+
+void ThreadStore::SaveCurrentThreadOffsetForDAC()
+{
+}
+
+#endif // _WIN32
+
+
+#ifndef DACCESS_COMPILE
+
+// internal static extern unsafe bool RhGetExceptionsForCurrentThread(Exception[] outputArray, out int writtenCountOut);
+COOP_PINVOKE_HELPER(Boolean, RhGetExceptionsForCurrentThread, (Array* pOutputArray, Int32* pWrittenCountOut))
+{
+    return GetThreadStore()->GetExceptionsForCurrentThread(pOutputArray, pWrittenCountOut);
+}
+
+Boolean ThreadStore::GetExceptionsForCurrentThread(Array* pOutputArray, Int32* pWrittenCountOut)
+{
+    Int32 countWritten = 0;
+    Object** pArrayElements;
+    Thread * pThread = GetCurrentThread();
+    
+    for (PTR_ExInfo pInfo = pThread->m_pExInfoStackHead; pInfo != NULL; pInfo = pInfo->m_pPrevExInfo)
+    {
+        if (pInfo->m_exception == NULL)
+            continue;
+
+        countWritten++;
+    }
+
+    // No input array provided, or it was of the wrong kind.  We'll fill out the count and return false.
+    if ((pOutputArray == NULL) || (pOutputArray->get_EEType()->get_ComponentSize() != POINTER_SIZE))
+        goto Error;
+
+    // Input array was not big enough.  We don't even partially fill it.
+    if (pOutputArray->GetArrayLength() < (UInt32)countWritten)
+        goto Error;
+
+    *pWrittenCountOut = countWritten;
+
+    // Success, but nothing to report.
+    if (countWritten == 0)
+        return Boolean_true;
+
+    pArrayElements = (Object**)pOutputArray->GetArrayData();
+    for (PTR_ExInfo pInfo = pThread->m_pExInfoStackHead; pInfo != NULL; pInfo = pInfo->m_pPrevExInfo)
+    {
+        if (pInfo->m_exception == NULL)
+            continue;
+
+        *pArrayElements = pInfo->m_exception;
+        pArrayElements++;
+    }
+
+    RhpBulkWriteBarrier(pArrayElements, countWritten * POINTER_SIZE);
+    return Boolean_true;
+
+Error:
+    *pWrittenCountOut = countWritten;
+    return Boolean_false;
+}
+#endif // DACCESS_COMPILE
diff --git a/src/coreclr/src/nativeaot/Runtime/threadstore.h b/src/coreclr/src/nativeaot/Runtime/threadstore.h
new file mode 100644
index 0000000000000..dbc113a303f20
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/threadstore.h
@@ -0,0 +1,80 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+class Thread;
+class CLREventStatic;
+class RuntimeInstance;
+class Array;
+typedef DPTR(RuntimeInstance) PTR_RuntimeInstance;
+
+enum class TrapThreadsFlags
+{
+    None = 0,
+    AbortInProgress = 1,
+    TrapThreads = 2
+};
+
+class ThreadStore
+{
+    SList<Thread>       m_ThreadList;
+    PTR_RuntimeInstance m_pRuntimeInstance;
+    CLREventStatic      m_SuspendCompleteEvent;
+    ReaderWriterLock    m_Lock;
+
+private:
+    ThreadStore();
+
+    void                    LockThreadStore();
+    void                    UnlockThreadStore();
+    void                    SuspendAllThreads(bool waitForGCEvent, bool fireDebugEvent);
+
+public:
+    class Iterator
+    {
+        ReaderWriterLock::ReadHolder    m_readHolder;
+        PTR_Thread                      m_pCurrentPosition;
+    public:
+        Iterator();
+        ~Iterator();
+        PTR_Thread GetNext();
+    };
+
+    ~ThreadStore();
+    static ThreadStore *    Create(RuntimeInstance * pRuntimeInstance);
+    static Thread *         RawGetCurrentThread();
+    static Thread *         GetCurrentThread();
+    static Thread *         GetCurrentThreadIfAvailable();
+    static PTR_Thread       GetSuspendingThread();
+    static void             AttachCurrentThread();
+    static void             AttachCurrentThread(bool fAcquireThreadStoreLock);
+    static void             DetachCurrentThread();
+#ifndef DACCESS_COMPILE
+    static void             SaveCurrentThreadOffsetForDAC();
+    void                    InitiateThreadAbort(Thread* targetThread, Object * threadAbortException, bool doRudeAbort);
+    void                    CancelThreadAbort(Thread* targetThread);
+#else
+    static PTR_Thread       GetThreadFromTEB(TADDR pvTEB);
+#endif
+    Boolean                 GetExceptionsForCurrentThread(Array* pOutputArray, Int32* pWrittenCountOut);
+
+    void        Destroy();
+    void        SuspendAllThreads(bool waitForGCEvent);
+    void        ResumeAllThreads(bool waitForGCEvent);
+
+    static bool IsTrapThreadsRequested();
+    void        WaitForSuspendComplete();
+};
+typedef DPTR(ThreadStore) PTR_ThreadStore;
+
+ThreadStore * GetThreadStore();
+
+#define FOREACH_THREAD(p_thread_name)                       \
+{                                                           \
+    ThreadStore::Iterator __threads;                        \
+    Thread * p_thread_name;                                 \
+    while ((p_thread_name = __threads.GetNext()) != NULL)   \
+    {                                                       \
+
+#define END_FOREACH_THREAD  \
+    }                       \
+}                           \
+
diff --git a/src/coreclr/src/nativeaot/Runtime/threadstore.inl b/src/coreclr/src/nativeaot/Runtime/threadstore.inl
new file mode 100644
index 0000000000000..811d1e91e2541
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/threadstore.inl
@@ -0,0 +1,39 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+EXTERN_C DECLSPEC_THREAD ThreadBuffer tls_CurrentThread;
+
+// static
+inline Thread * ThreadStore::RawGetCurrentThread()
+{
+    return (Thread *) &tls_CurrentThread;
+}
+
+// static
+inline Thread * ThreadStore::GetCurrentThread()
+{
+    Thread * pCurThread = RawGetCurrentThread();
+
+    // If this assert fires, and you only need the Thread pointer if the thread has ever previously
+    // entered the runtime, then you should be using GetCurrentThreadIfAvailable instead.
+    ASSERT(pCurThread->IsInitialized());    
+    return pCurThread;
+}
+
+// static
+inline Thread * ThreadStore::GetCurrentThreadIfAvailable()
+{
+    Thread * pCurThread = RawGetCurrentThread();
+    if (pCurThread->IsInitialized())
+        return pCurThread;
+
+    return NULL;
+}
+
+EXTERN_C volatile UInt32 RhpTrapThreads;
+
+// static
+inline bool ThreadStore::IsTrapThreadsRequested()
+{
+    return (RhpTrapThreads & (UInt32)TrapThreadsFlags::TrapThreads) != 0;
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/AsmOffsets.cpp b/src/coreclr/src/nativeaot/Runtime/unix/AsmOffsets.cpp
new file mode 100644
index 0000000000000..1dd26302cde49
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/AsmOffsets.cpp
@@ -0,0 +1,9 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#define HASH_DEFINE #define
+#define PLAT_ASM_OFFSET(offset, cls, member) HASH_DEFINE OFFSETOF__##cls##__##member 0x##offset
+#define PLAT_ASM_SIZEOF(size,   cls        ) HASH_DEFINE SIZEOF__##cls 0x##size
+#define PLAT_ASM_CONST(constant, expr)       HASH_DEFINE expr 0x##constant
+
+#include <AsmOffsets.h>
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/HardwareExceptions.cpp b/src/coreclr/src/nativeaot/Runtime/unix/HardwareExceptions.cpp
new file mode 100644
index 0000000000000..7374a2b477087
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/HardwareExceptions.cpp
@@ -0,0 +1,647 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "CommonTypes.h"
+#include "PalRedhawkCommon.h"
+#include "CommonMacros.h"
+#include "config.h"
+#include "daccess.h"
+#include "regdisplay.h"
+#include "UnixContext.h"
+
+#include <signal.h>
+#include "HardwareExceptions.h"
+
+#if !HAVE_SIGINFO_T
+#error Cannot handle hardware exceptions on this platform
+#endif
+
+#define REDHAWK_PALEXPORT extern "C"
+#define REDHAWK_PALAPI
+
+#define EXCEPTION_ACCESS_VIOLATION          0xC0000005u
+#define EXCEPTION_DATATYPE_MISALIGNMENT     0x80000002u
+#define EXCEPTION_BREAKPOINT                0x80000003u
+#define EXCEPTION_SINGLE_STEP               0x80000004u
+#define EXCEPTION_ARRAY_BOUNDS_EXCEEDED     0xC000008Cu
+#define EXCEPTION_FLT_DENORMAL_OPERAND      0xC000008Du
+#define EXCEPTION_FLT_DIVIDE_BY_ZERO        0xC000008Eu
+#define EXCEPTION_FLT_INEXACT_RESULT        0xC000008Fu
+#define EXCEPTION_FLT_INVALID_OPERATION     0xC0000090u
+#define EXCEPTION_FLT_OVERFLOW              0xC0000091u
+#define EXCEPTION_FLT_STACK_CHECK           0xC0000092u
+#define EXCEPTION_FLT_UNDERFLOW             0xC0000093u
+#define EXCEPTION_INT_DIVIDE_BY_ZERO        0xC0000094u
+#define EXCEPTION_INT_OVERFLOW              0xC0000095u
+#define EXCEPTION_PRIV_INSTRUCTION          0xC0000096u
+#define EXCEPTION_IN_PAGE_ERROR             0xC0000006u
+#define EXCEPTION_ILLEGAL_INSTRUCTION       0xC000001Du
+#define EXCEPTION_NONCONTINUABLE_EXCEPTION  0xC0000025u
+#define EXCEPTION_STACK_OVERFLOW            0xC00000FDu
+#define EXCEPTION_INVALID_DISPOSITION       0xC0000026u
+#define EXCEPTION_GUARD_PAGE                0x80000001u
+#define EXCEPTION_INVALID_HANDLE            0xC0000008u
+
+#define EXCEPTION_CONTINUE_EXECUTION (-1)
+#define EXCEPTION_CONTINUE_SEARCH (0)
+#define EXCEPTION_EXECUTE_HANDLER (1)
+
+struct sigaction g_previousSIGSEGV;
+struct sigaction g_previousSIGFPE;
+
+typedef void (*SignalHandler)(int code, siginfo_t *siginfo, void *context);
+
+// Exception handler for hardware exceptions
+static PHARDWARE_EXCEPTION_HANDLER g_hardwareExceptionHandler = NULL;
+
+#ifdef HOST_AMD64
+
+// Get value of an instruction operand represented by the ModR/M field
+// Parameters:
+// 	uint8_t rex :           REX prefix, 0 if there was none
+// 	uint8_t* ip :           instruction pointer pointing to the ModR/M field
+// 	void* context :     context containing the registers
+// 	bool is8Bit :           true if the operand size is 8 bit
+// 	bool hasOpSizePrefix :  true if the instruction has op size prefix (0x66)
+uint64_t GetModRMOperandValue(uint8_t rex, uint8_t* ip, void* context, bool is8Bit, bool hasOpSizePrefix)
+{
+    uint64_t result;
+    uint64_t resultReg;
+
+    uint8_t rex_b = (rex & 0x1);       // high bit to modrm r/m field or SIB base field
+    uint8_t rex_x = (rex & 0x2) >> 1;  // high bit to sib index field
+    uint8_t rex_r = (rex & 0x4) >> 2;  // high bit to modrm reg field
+    uint8_t rex_w = (rex & 0x8) >> 3;  // 1 = 64 bit operand size, 0 = operand size determined by hasOpSizePrefix
+
+    uint8_t modrm = *ip++;
+
+    ASSERT(modrm != 0);
+
+    uint8_t mod = (modrm & 0xC0) >> 6;
+    uint8_t reg = (modrm & 0x38) >> 3;
+    uint8_t rm = (modrm & 0x07);
+
+    reg |= (rex_r << 3);
+    uint8_t rmIndex = rm | (rex_b << 3);
+
+    // 8 bit idiv without the REX prefix uses registers AH, CH, DH, BH for rm 4..8
+    // which is an exception from the regular register indexes.
+    bool isAhChDhBh = is8Bit && (rex == 0) && (rm >= 4);
+
+    // See: Tables A-15,16,17 in AMD Dev Manual 3 for information
+    //      about how the ModRM/SIB/REX uint8_ts interact.
+
+    switch (mod)
+    {
+    case 0:
+    case 1:
+    case 2:
+        if (rm == 4) // we have an SIB uint8_t following
+        {
+            //
+            // Get values from the SIB uint8_t
+            //
+            uint8_t sib = *ip++;
+
+            ASSERT(sib != 0);
+
+            uint8_t ss = (sib & 0xC0) >> 6;
+            uint8_t index = (sib & 0x38) >> 3;
+            uint8_t base = (sib & 0x07);
+
+            index |= (rex_x << 3);
+            base |= (rex_b << 3);
+
+            //
+            // Get starting value
+            //
+            if ((mod == 0) && (base == 5))
+            {
+                result = 0;
+            }
+            else
+            {
+                result = GetRegisterValueByIndex(context, base);
+            }
+
+            //
+            // Add in the [index]
+            //
+            if (index != 4)
+            {
+                result += GetRegisterValueByIndex(context, index) << ss;
+            }
+
+            //
+            // Finally add in the offset
+            //
+            if (mod == 0)
+            {
+                if (base == 5)
+                {
+                    result += *((int32_t*)ip);
+                }
+            }
+            else if (mod == 1)
+            {
+                result += *((int8_t*)ip);
+            }
+            else // mod == 2
+            {
+                result += *((int32_t*)ip);
+            }
+
+        }
+        else
+        {
+            //
+            // Get the value we need from the register.
+            //
+
+            // Check for RIP-relative addressing mode.
+            if ((mod == 0) && (rm == 5))
+            {
+                result = (uint64_t)ip + sizeof(int32_t) + *(int32_t*)ip;
+            }
+            else
+            {
+                result = GetRegisterValueByIndex(context, rmIndex);
+
+                if (mod == 1)
+                {
+                    result += *((int8_t*)ip);
+                }
+                else if (mod == 2)
+                {
+                    result += *((int32_t*)ip);
+                }
+            }
+        }
+
+        break;
+
+    case 3:
+    default:
+        // The operand is stored in a register.
+        if (isAhChDhBh)
+        {
+            // 8 bit idiv without the REX prefix uses registers AH, CH, DH or BH for rm 4..8.
+            // So we shift the register index to get the real register index.
+            rmIndex -= 4;
+        }
+
+        resultReg = GetRegisterValueByIndex(context, rmIndex);
+        result = (uint64_t)&resultReg;
+
+        if (isAhChDhBh)
+        {
+            // Move one uint8_t higher to get an address of the AH, CH, DH or BH
+            result++;
+        }
+
+        break;
+
+    }
+
+    // Now dereference thru the result to get the resulting value.
+    if (is8Bit)
+    {
+        result = *((uint8_t*)result);
+    }
+    else if (rex_w != 0)
+    {
+        result = *((uint64_t*)result);
+    }
+    else if (hasOpSizePrefix)
+    {
+        result = *((uint16_t*)result);
+    }
+    else
+    {
+        result = *((uint32_t*)result);
+    }
+
+    return result;
+}
+
+// Skip all prefixes until the instruction code or the REX prefix is found
+// Parameters:
+// 	uint8_t** ip :          Pointer to the current instruction pointer. Updated 
+//                          as the function walks the codes.
+//  bool* hasOpSizePrefix : Pointer to bool, on exit set to true if a op size prefix
+//                          was found.
+// Return value :
+//  Code of the REX prefix or the instruction code after the prefixes.
+uint8_t SkipPrefixes(uint8_t **ip, bool* hasOpSizePrefix)
+{
+    *hasOpSizePrefix = false;
+
+    while (true)
+    {
+        uint8_t code = *(*ip)++;
+
+        switch (code)
+        {
+        case 0x66: // Operand-Size
+            *hasOpSizePrefix = true;
+            break;
+            
+            // Segment overrides
+        case 0x26: // ES
+        case 0x2E: // CS
+        case 0x36: // SS
+        case 0x3E: // DS 
+        case 0x64: // FS
+        case 0x65: // GS
+
+            // Size overrides
+        case 0x67: // Address-Size
+
+            // Lock
+        case 0xf0:
+
+            // String REP prefixes
+        case 0xf2: // REPNE/REPNZ
+        case 0xf3:
+            break;
+
+        default:
+            // Return address of the nonprefix code
+            return code;
+        }
+    }
+}
+
+// Check if a division by zero exception is in fact a division overflow. The
+// x64 processor generate the same exception in both cases for the IDIV / DIV
+// instruction. So we need to decode the instruction argument and check
+// whether it was zero or not.
+bool IsDivByZeroAnIntegerOverflow(void* context)
+{
+    uint8_t * ip = (uint8_t*)GetPC(context);
+    uint8_t rex = 0;
+    bool hasOpSizePrefix = false;
+
+    uint8_t code = SkipPrefixes(&ip, &hasOpSizePrefix);
+
+    // The REX prefix must directly preceed the instruction code
+    if ((code & 0xF0) == 0x40)
+    {
+        rex = code;
+        code = *ip++;
+    }
+
+    uint64_t divisor = 0;
+
+    // Check if the instruction is IDIV or DIV. The instruction code includes the three
+    // 'reg' bits in the ModRM uint8_t. These are 7 for IDIV and 6 for DIV
+    uint8_t regBits = (*ip & 0x38) >> 3;
+    if ((code == 0xF7 || code == 0xF6) && (regBits == 7 || regBits == 6))
+    {
+        bool is8Bit = (code == 0xF6);
+        divisor = GetModRMOperandValue(rex, ip, context, is8Bit, hasOpSizePrefix);
+    }
+    else
+    {
+        ASSERT_UNCONDITIONALLY("Invalid instruction (expected IDIV or DIV)");
+    }
+
+    // If the division operand is zero, it was division by zero. Otherwise the failure 
+    // must have been an overflow.
+    return divisor != 0;
+}
+#endif //HOST_AMD64
+
+// Translates signal and context information to a Win32 exception code.
+uint32_t GetExceptionCodeForSignal(const siginfo_t *siginfo, const void *context)
+{
+    // IMPORTANT NOTE: This function must not call any signal unsafe functions
+    // since it is called from signal handlers.
+#ifdef ILL_ILLOPC
+    switch (siginfo->si_signo)
+    {
+        case SIGILL:
+            switch (siginfo->si_code)
+            {
+                case ILL_ILLOPC:    // Illegal opcode
+                case ILL_ILLOPN:    // Illegal operand
+                case ILL_ILLADR:    // Illegal addressing mode
+                case ILL_ILLTRP:    // Illegal trap
+                case ILL_COPROC:    // Co-processor error
+                    return EXCEPTION_ILLEGAL_INSTRUCTION;
+                case ILL_PRVOPC:    // Privileged opcode
+                case ILL_PRVREG:    // Privileged register
+                    return EXCEPTION_PRIV_INSTRUCTION;
+                case ILL_BADSTK:    // Internal stack error
+                    return EXCEPTION_STACK_OVERFLOW;
+                default:
+                    break;
+            }
+            break;
+        case SIGFPE:
+            switch (siginfo->si_code)
+            {
+                case FPE_INTDIV:
+                    return EXCEPTION_INT_DIVIDE_BY_ZERO;
+                case FPE_INTOVF:
+                    return EXCEPTION_INT_OVERFLOW;
+                case FPE_FLTDIV:
+                    return EXCEPTION_FLT_DIVIDE_BY_ZERO;
+                case FPE_FLTOVF:
+                    return EXCEPTION_FLT_OVERFLOW;
+                case FPE_FLTUND:
+                    return EXCEPTION_FLT_UNDERFLOW;
+                case FPE_FLTRES:
+                    return EXCEPTION_FLT_INEXACT_RESULT;
+                case FPE_FLTINV:
+                    return EXCEPTION_FLT_INVALID_OPERATION;
+                case FPE_FLTSUB:
+                    return EXCEPTION_FLT_INVALID_OPERATION;
+                default:
+                    break;
+            }
+            break;
+        case SIGSEGV:
+            switch (siginfo->si_code)
+            {
+                case SI_USER:       // User-generated signal, sometimes sent
+                                    // for SIGSEGV under normal circumstances
+                case SEGV_MAPERR:   // Address not mapped to object
+                case SEGV_ACCERR:   // Invalid permissions for mapped object
+                    return EXCEPTION_ACCESS_VIOLATION;
+
+#ifdef SI_KERNEL
+                case SI_KERNEL:
+                {
+                    return EXCEPTION_ACCESS_VIOLATION;
+                }
+#endif
+                default:
+                    break;
+            }
+            break;
+        case SIGBUS:
+            switch (siginfo->si_code)
+            {
+                case BUS_ADRALN:    // Invalid address alignment
+                    return EXCEPTION_DATATYPE_MISALIGNMENT;
+                case BUS_ADRERR:    // Non-existent physical address
+                    return EXCEPTION_ACCESS_VIOLATION;
+                case BUS_OBJERR:    // Object-specific hardware error
+                default:
+                    break;
+            }
+        case SIGTRAP:
+            switch (siginfo->si_code)
+            {
+#ifdef SI_KERNEL
+                case SI_KERNEL:
+#endif
+                case SI_USER:
+                case TRAP_BRKPT:    // Process breakpoint
+                    return EXCEPTION_BREAKPOINT;
+                case TRAP_TRACE:    // Process trace trap
+                    return EXCEPTION_SINGLE_STEP;
+                default:
+                    // Got unknown SIGTRAP signal with code siginfo->si_code;
+                    return EXCEPTION_ILLEGAL_INSTRUCTION;
+            }
+        default:
+            break;
+    }
+
+    // Got unknown signal number siginfo->si_signo with code siginfo->si_code;
+    return EXCEPTION_ILLEGAL_INSTRUCTION;
+#else   // ILL_ILLOPC
+    int trap;
+
+    if (siginfo->si_signo == SIGFPE)
+    {
+        // Floating point exceptions are mapped by their si_code.
+        switch (siginfo->si_code)
+        {
+            case FPE_INTDIV :
+                return EXCEPTION_INT_DIVIDE_BY_ZERO;
+            case FPE_INTOVF :
+                return EXCEPTION_INT_OVERFLOW;
+            case FPE_FLTDIV :
+                return EXCEPTION_FLT_DIVIDE_BY_ZERO;
+            case FPE_FLTOVF :
+                return EXCEPTION_FLT_OVERFLOW;
+            case FPE_FLTUND :
+                return EXCEPTION_FLT_UNDERFLOW;
+            case FPE_FLTRES :
+                return EXCEPTION_FLT_INEXACT_RESULT;
+            case FPE_FLTINV :
+                return EXCEPTION_FLT_INVALID_OPERATION;
+            case FPE_FLTSUB :/* subscript out of range */
+                return EXCEPTION_FLT_INVALID_OPERATION;
+            default:
+                // Got unknown signal code siginfo->si_code;
+                return 0;
+        }
+    }
+
+    trap = ((ucontext_t*)context)->uc_mcontext.mc_trapno;
+    switch (trap)
+    {
+        case T_PRIVINFLT : /* privileged instruction */
+            return EXCEPTION_PRIV_INSTRUCTION; 
+        case T_BPTFLT :    /* breakpoint instruction */
+            return EXCEPTION_BREAKPOINT;
+        case T_ARITHTRAP : /* arithmetic trap */
+            return 0;      /* let the caller pick an exception code */
+#ifdef T_ASTFLT
+        case T_ASTFLT :    /* system forced exception : ^C, ^\. SIGINT signal 
+                              handler shouldn't be calling this function, since
+                              it doesn't need an exception code */
+            // Trap code T_ASTFLT received, shouldn't get here;
+            return 0;
+#endif  // T_ASTFLT
+        case T_PROTFLT :   /* protection fault */
+            return EXCEPTION_ACCESS_VIOLATION; 
+        case T_TRCTRAP :   /* debug exception (sic) */
+            return EXCEPTION_SINGLE_STEP;
+        case T_PAGEFLT :   /* page fault */
+            return EXCEPTION_ACCESS_VIOLATION;
+        case T_ALIGNFLT :  /* alignment fault */
+            return EXCEPTION_DATATYPE_MISALIGNMENT;
+        case T_DIVIDE :
+            return EXCEPTION_INT_DIVIDE_BY_ZERO;
+        case T_NMI :       /* non-maskable trap */
+            return EXCEPTION_ILLEGAL_INSTRUCTION;
+        case T_OFLOW :
+            return EXCEPTION_INT_OVERFLOW;
+        case T_BOUND :     /* bound instruction fault */
+            return EXCEPTION_ARRAY_BOUNDS_EXCEEDED; 
+        case T_DNA :       /* device not available fault */
+            return EXCEPTION_ILLEGAL_INSTRUCTION; 
+        case T_DOUBLEFLT : /* double fault */
+            return EXCEPTION_ILLEGAL_INSTRUCTION; 
+        case T_FPOPFLT :   /* fp coprocessor operand fetch fault */
+            return EXCEPTION_FLT_INVALID_OPERATION; 
+        case T_TSSFLT :    /* invalid tss fault */
+            return EXCEPTION_ILLEGAL_INSTRUCTION; 
+        case T_SEGNPFLT :  /* segment not present fault */
+            return EXCEPTION_ACCESS_VIOLATION; 
+        case T_STKFLT :    /* stack fault */
+            return EXCEPTION_STACK_OVERFLOW; 
+        case T_MCHK :      /* machine check trap */
+            return EXCEPTION_ILLEGAL_INSTRUCTION; 
+        case T_RESERVED :  /* reserved (unknown) */
+            return EXCEPTION_ILLEGAL_INSTRUCTION; 
+        default:
+            // Got unknown trap code trap;
+            break;
+    }
+    return EXCEPTION_ILLEGAL_INSTRUCTION;
+#endif  // ILL_ILLOPC
+}
+
+// Common handler for hardware exception signals
+bool HardwareExceptionHandler(int code, siginfo_t *siginfo, void *context, void* faultAddress)
+{
+    if (g_hardwareExceptionHandler != NULL)
+    {
+        UIntNative faultCode = GetExceptionCodeForSignal(siginfo, context);
+
+#ifdef HOST_AMD64
+        // It is possible that an overflow was mapped to a divide-by-zero exception. 
+        // This happens when we try to divide the maximum negative value of a
+        // signed integer with -1. 
+        //
+        // Thus, we will attempt to decode the instruction @ RIP to determine if that
+        // is the case using the faulting context.
+        if ((faultCode == EXCEPTION_INT_DIVIDE_BY_ZERO) && IsDivByZeroAnIntegerOverflow(context))
+        {
+            // The exception was an integer overflow, so augment the fault code.
+            faultCode = EXCEPTION_INT_OVERFLOW;
+        }
+#endif //HOST_AMD64
+
+        PAL_LIMITED_CONTEXT palContext;
+        NativeContextToPalContext(context, &palContext);
+
+        UIntNative arg0Reg;
+        UIntNative arg1Reg;
+        Int32 disposition = g_hardwareExceptionHandler(faultCode, (UIntNative)faultAddress, &palContext, &arg0Reg, &arg1Reg);
+        if (disposition == EXCEPTION_CONTINUE_EXECUTION)
+        {
+            // TODO: better name
+            RedirectNativeContext(context, &palContext, arg0Reg, arg1Reg);
+            return true;
+        }
+    }
+
+    return false;
+}
+
+// Add handler for hardware exception signal
+bool AddSignalHandler(int signal, SignalHandler handler, struct sigaction* previousAction)
+{
+    struct sigaction newAction;
+
+    newAction.sa_flags = SA_RESTART;
+    newAction.sa_handler = NULL;
+    newAction.sa_sigaction = handler;
+    newAction.sa_flags |= SA_SIGINFO;
+
+    sigemptyset(&newAction.sa_mask);
+
+    if (sigaction(signal, NULL, previousAction) == -1)
+    {
+        ASSERT_UNCONDITIONALLY("Failed to get previous signal handler");
+        return false;
+    }
+
+    if (previousAction->sa_flags & SA_ONSTACK)
+    {
+        // If the previous signal handler uses an alternate stack, we need to use it too
+        // so that when we chain-call the previous handler, it is called on the kind of
+        // stack it expects.
+        // We also copy the signal mask to make sure that if some signals were blocked
+        // from execution on the alternate stack by the previous action, we honor that.
+        newAction.sa_flags |= SA_ONSTACK;
+        newAction.sa_mask = previousAction->sa_mask;
+    }
+
+    if (sigaction(signal, &newAction, previousAction) == -1)
+    {
+        ASSERT_UNCONDITIONALLY("Failed to install signal handler");
+        return false;
+    }
+
+    return true;
+}
+
+// Restore original handler for hardware exception signal
+void RestoreSignalHandler(int signal_id, struct sigaction *previousAction)
+{
+    if (-1 == sigaction(signal_id, previousAction, NULL))
+    {
+        ASSERT_UNCONDITIONALLY("RestoreSignalHandler: sigaction() call failed");
+    }
+}
+
+// Handler for the SIGSEGV signal
+void SIGSEGVHandler(int code, siginfo_t *siginfo, void *context)
+{
+    bool isHandled = HardwareExceptionHandler(code, siginfo, context, siginfo->si_addr);
+    if (isHandled)
+    {
+        return;
+    }
+
+    if (g_previousSIGSEGV.sa_sigaction != NULL)
+    {
+        g_previousSIGSEGV.sa_sigaction(code, siginfo, context);
+    }
+    else
+    {
+        // Restore the original or default handler and restart h/w exception
+        RestoreSignalHandler(code, &g_previousSIGSEGV);
+    }
+}
+
+// Handler for the SIGFPE signal
+void SIGFPEHandler(int code, siginfo_t *siginfo, void *context)
+{
+    bool isHandled = HardwareExceptionHandler(code, siginfo, context, NULL);
+    if (isHandled)
+    {
+        return;
+    }
+
+    if (g_previousSIGFPE.sa_sigaction != NULL)
+    {
+        g_previousSIGFPE.sa_sigaction(code, siginfo, context);
+    }
+    else
+    {
+        // Restore the original or default handler and restart h/w exception
+        RestoreSignalHandler(code, &g_previousSIGFPE);
+    }
+}
+
+// Initialize hardware exception handling
+bool InitializeHardwareExceptionHandling()
+{
+    if (!AddSignalHandler(SIGSEGV, SIGSEGVHandler, &g_previousSIGSEGV))
+    {
+        return false;
+    }
+
+    if (!AddSignalHandler(SIGFPE, SIGFPEHandler, &g_previousSIGFPE))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+// Set CoreRT hardware exception handler
+REDHAWK_PALEXPORT void REDHAWK_PALAPI PalSetHardwareExceptionHandler(PHARDWARE_EXCEPTION_HANDLER handler)
+{
+    ASSERT_MSG(g_hardwareExceptionHandler == NULL, "Hardware exception handler already set")
+    g_hardwareExceptionHandler = handler;
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/HardwareExceptions.h b/src/coreclr/src/nativeaot/Runtime/unix/HardwareExceptions.h
new file mode 100644
index 0000000000000..8a4a18af1a64d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/HardwareExceptions.h
@@ -0,0 +1,10 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef __HARDWARE_EXCEPTIONS_H__
+#define __HARDWARE_EXCEPTIONS_H__
+
+// Initialize hardware exception handling
+bool InitializeHardwareExceptionHandling();
+
+#endif // __HARDWARE_EXCEPTIONS_H__
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/PalRedhawkInline.h b/src/coreclr/src/nativeaot/Runtime/unix/PalRedhawkInline.h
new file mode 100644
index 0000000000000..bbc0e2b5cbde5
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/PalRedhawkInline.h
@@ -0,0 +1,102 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// Implementation of Redhawk PAL inline functions
+
+#include <errno.h>
+
+FORCEINLINE Int32 PalInterlockedIncrement(_Inout_ _Interlocked_operand_ Int32 volatile *pDst)
+{
+    return __sync_add_and_fetch(pDst, 1);
+}
+
+FORCEINLINE Int32 PalInterlockedDecrement(_Inout_ _Interlocked_operand_ Int32 volatile *pDst)
+{
+    return __sync_sub_and_fetch(pDst, 1);
+}
+
+FORCEINLINE UInt32 PalInterlockedOr(_Inout_ _Interlocked_operand_ UInt32 volatile *pDst, UInt32 iValue)
+{
+    return __sync_or_and_fetch(pDst, iValue);
+}
+
+FORCEINLINE UInt32 PalInterlockedAnd(_Inout_ _Interlocked_operand_ UInt32 volatile *pDst, UInt32 iValue)
+{
+    return __sync_and_and_fetch(pDst, iValue);
+}
+
+FORCEINLINE Int32 PalInterlockedExchange(_Inout_ _Interlocked_operand_ Int32 volatile *pDst, Int32 iValue)
+{
+    return __sync_swap(pDst, iValue);
+}
+
+FORCEINLINE Int64 PalInterlockedExchange64(_Inout_ _Interlocked_operand_ Int64 volatile *pDst, Int64 iValue)
+{
+    return __sync_swap(pDst, iValue);
+}
+
+FORCEINLINE Int32 PalInterlockedCompareExchange(_Inout_ _Interlocked_operand_ Int32 volatile *pDst, Int32 iValue, Int32 iComparand)
+{
+    return __sync_val_compare_and_swap(pDst, iComparand, iValue);
+}
+
+FORCEINLINE Int64 PalInterlockedCompareExchange64(_Inout_ _Interlocked_operand_ Int64 volatile *pDst, Int64 iValue, Int64 iComparand)
+{
+    return __sync_val_compare_and_swap(pDst, iComparand, iValue);
+}
+
+#if defined(HOST_AMD64) || defined(HOST_ARM64)
+FORCEINLINE UInt8 PalInterlockedCompareExchange128(_Inout_ _Interlocked_operand_ Int64 volatile *pDst, Int64 iValueHigh, Int64 iValueLow, Int64 *pComparandAndResult)
+{
+    __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (UInt64)pComparandAndResult[0];
+    __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (UInt64)iValueLow);
+    pComparandAndResult[0] = (Int64)iResult; pComparandAndResult[1] = (Int64)(iResult >> 64);
+    return iComparand == iResult;
+}
+#endif // HOST_AMD64
+
+#ifdef HOST_64BIT
+
+#define PalInterlockedExchangePointer(_pDst, _pValue) \
+    ((void *)PalInterlockedExchange64((Int64 volatile *)(_pDst), (Int64)(size_t)(_pValue)))
+
+#define PalInterlockedCompareExchangePointer(_pDst, _pValue, _pComparand) \
+    ((void *)PalInterlockedCompareExchange64((Int64 volatile *)(_pDst), (Int64)(size_t)(_pValue), (Int64)(size_t)(_pComparand)))
+
+#else // HOST_64BIT
+
+#define PalInterlockedExchangePointer(_pDst, _pValue) \
+    ((void *)PalInterlockedExchange((Int32 volatile *)(_pDst), (Int32)(size_t)(_pValue)))
+
+#define PalInterlockedCompareExchangePointer(_pDst, _pValue, _pComparand) \
+    ((void *)PalInterlockedCompareExchange((Int32 volatile *)(_pDst), (Int32)(size_t)(_pValue), (Int32)(size_t)(_pComparand)))
+
+#endif // HOST_64BIT
+
+
+FORCEINLINE void PalYieldProcessor()
+{
+#if defined(HOST_X86) || defined(HOST_AMD64)
+    __asm__ __volatile__(
+        "rep\n"
+        "nop"
+        );
+#endif
+}
+
+FORCEINLINE void PalMemoryBarrier()
+{
+    __sync_synchronize();
+}
+
+#define PalDebugBreak() abort()
+
+FORCEINLINE Int32 PalGetLastError()
+{
+    return errno;
+}
+
+FORCEINLINE void PalSetLastError(Int32 error)
+{
+    errno = error;
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/src/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
new file mode 100644
index 0000000000000..16c5fe81deda0
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
@@ -0,0 +1,1344 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Implementation of the Redhawk Platform Abstraction Layer (PAL) library when Unix is the platform.
+//
+
+#include <stdio.h>
+#include <errno.h>
+#include <cwchar>
+#include <sal.h>
+#include "config.h"
+#include "UnixHandle.h"
+#include <pthread.h>
+#include "gcenv.h"
+#include "holder.h"
+#include "HardwareExceptions.h"
+
+#include <unistd.h>
+#include <sched.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <dlfcn.h>
+#include <dirent.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <cstdarg>
+#include <signal.h>
+
+#if HAVE_PTHREAD_GETTHREADID_NP
+#include <pthread_np.h>
+#endif
+
+#if HAVE_LWP_SELF
+#include <lwp.h>
+#endif
+
+#if HAVE_SYSCONF
+// <unistd.h> already included above
+#elif HAVE_SYSCTL
+#include <sys/sysctl.h>
+#else
+#error Either sysctl or sysconf is required for GetSystemInfo.
+#endif
+
+#if HAVE_SYS_VMPARAM_H
+#include <sys/vmparam.h>
+#endif  // HAVE_SYS_VMPARAM_H
+
+#if HAVE_MACH_VM_TYPES_H
+#include <mach/vm_types.h>
+#endif // HAVE_MACH_VM_TYPES_H
+
+#if HAVE_MACH_VM_PARAM_H
+#include <mach/vm_param.h>
+#endif  // HAVE_MACH_VM_PARAM_H
+
+#ifdef __APPLE__
+#include <mach/vm_statistics.h>
+#include <mach/mach_types.h>
+#include <mach/mach_init.h>
+#include <mach/mach_host.h>
+#include <mach/mach_port.h>
+#endif // __APPLE__
+
+#if HAVE_MACH_ABSOLUTE_TIME
+#include <mach/mach_time.h>
+static mach_timebase_info_data_t s_TimebaseInfo;
+#endif
+
+using std::nullptr_t;
+
+#ifndef __APPLE__
+#if HAVE_SYSCONF && HAVE__SC_AVPHYS_PAGES
+#define SYSCONF_PAGES _SC_AVPHYS_PAGES
+#elif HAVE_SYSCONF && HAVE__SC_PHYS_PAGES
+#define SYSCONF_PAGES _SC_PHYS_PAGES
+#else
+#error Dont know how to get page-size on this architecture!
+#endif
+#endif // __APPLE__
+
+#if defined(HOST_ARM) || defined(HOST_ARM64)
+#define SYSCONF_GET_NUMPROCS       _SC_NPROCESSORS_CONF
+#define SYSCONF_GET_NUMPROCS_NAME "_SC_NPROCESSORS_CONF"
+#else
+#define SYSCONF_GET_NUMPROCS       _SC_NPROCESSORS_ONLN
+#define SYSCONF_GET_NUMPROCS_NAME "_SC_NPROCESSORS_ONLN"
+#endif
+
+#define PalRaiseFailFastException RaiseFailFastException
+
+#define FATAL_ASSERT(e, msg) \
+    do \
+    { \
+        if (!(e)) \
+        { \
+            fprintf(stderr, "FATAL ERROR: " msg); \
+            RhFailFast(); \
+        } \
+    } \
+    while(0)
+
+#define INVALID_HANDLE_VALUE    ((HANDLE)(IntNative)-1)
+
+#define PAGE_NOACCESS           0x01
+#define PAGE_READWRITE          0x04
+#define PAGE_EXECUTE_READ       0x20
+#define PAGE_EXECUTE_READWRITE  0x40
+#define MEM_COMMIT              0x1000
+#define MEM_RESERVE             0x2000
+#define MEM_DECOMMIT            0x4000
+#define MEM_RELEASE             0x8000
+
+#define WAIT_OBJECT_0           0
+#define WAIT_TIMEOUT            258
+#define WAIT_FAILED             0xFFFFFFFF
+
+static const int tccSecondsToMilliSeconds = 1000;
+static const int tccSecondsToMicroSeconds = 1000000;
+static const int tccSecondsToNanoSeconds = 1000000000;
+static const int tccMilliSecondsToMicroSeconds = 1000;
+static const int tccMilliSecondsToNanoSeconds = 1000000;
+static const int tccMicroSecondsToNanoSeconds = 1000;
+
+static uint32_t g_dwPALCapabilities;
+static UInt32 g_cNumProcs = 0;
+
+// HACK: the gcenv.h declares OS_PAGE_SIZE as a call instead of a constant, but we need a constant
+#undef OS_PAGE_SIZE
+#define OS_PAGE_SIZE 0x1000
+
+// Helper memory page used by the FlushProcessWriteBuffers
+static uint8_t g_helperPage[OS_PAGE_SIZE] __attribute__((aligned(OS_PAGE_SIZE)));
+
+// Mutex to make the FlushProcessWriteBuffersMutex thread safe
+pthread_mutex_t g_flushProcessWriteBuffersMutex;
+
+bool QueryLogicalProcessorCount();
+bool InitializeFlushProcessWriteBuffers();
+
+extern "C" void RaiseFailFastException(PEXCEPTION_RECORD arg1, PCONTEXT arg2, UInt32 arg3)
+{
+    // Abort aborts the process and causes creation of a crash dump
+    abort();
+}
+
+static void TimeSpecAdd(timespec* time, uint32_t milliseconds)
+{
+    uint64_t nsec = time->tv_nsec + (uint64_t)milliseconds * tccMilliSecondsToNanoSeconds;
+    if (nsec >= tccSecondsToNanoSeconds)
+    {
+        time->tv_sec += nsec / tccSecondsToNanoSeconds;
+        nsec %= tccSecondsToNanoSeconds;
+    }
+
+    time->tv_nsec = nsec;
+}
+
+// Convert nanoseconds to the timespec structure
+// Parameters:
+//  nanoseconds - time in nanoseconds to convert
+//  t           - the target timespec structure
+static void NanosecondsToTimeSpec(uint64_t nanoseconds, timespec* t)
+{
+    t->tv_sec = nanoseconds / tccSecondsToNanoSeconds;
+    t->tv_nsec = nanoseconds % tccSecondsToNanoSeconds;
+}
+
+void ReleaseCondAttr(pthread_condattr_t* condAttr)
+{
+    int st = pthread_condattr_destroy(condAttr);
+    ASSERT_MSG(st == 0, "Failed to destroy pthread_condattr_t object");
+}
+
+class PthreadCondAttrHolder : public Wrapper<pthread_condattr_t*, DoNothing, ReleaseCondAttr, nullptr>
+{
+public:
+    PthreadCondAttrHolder(pthread_condattr_t* attrs)
+    : Wrapper<pthread_condattr_t*, DoNothing, ReleaseCondAttr, nullptr>(attrs)
+    {
+    }
+};
+
+class UnixEvent
+{
+    pthread_cond_t m_condition;
+    pthread_mutex_t m_mutex;
+    bool m_manualReset;
+    bool m_state;
+    bool m_isValid;
+
+public:
+
+    UnixEvent(bool manualReset, bool initialState)
+    : m_manualReset(manualReset),
+      m_state(initialState),
+      m_isValid(false)
+    {
+    }
+
+    bool Initialize()
+    {
+        pthread_condattr_t attrs;
+        int st = pthread_condattr_init(&attrs);
+        if (st != 0)
+        {
+            ASSERT_UNCONDITIONALLY("Failed to initialize UnixEvent condition attribute");
+            return false;
+        }
+
+        PthreadCondAttrHolder attrsHolder(&attrs);
+
+#if HAVE_PTHREAD_CONDATTR_SETCLOCK && !HAVE_MACH_ABSOLUTE_TIME
+        // Ensure that the pthread_cond_timedwait will use CLOCK_MONOTONIC
+        st = pthread_condattr_setclock(&attrs, CLOCK_MONOTONIC);
+        if (st != 0)
+        {
+            ASSERT_UNCONDITIONALLY("Failed to set UnixEvent condition variable wait clock");
+            return false;
+        }
+#endif // HAVE_PTHREAD_CONDATTR_SETCLOCK && !HAVE_MACH_ABSOLUTE_TIME
+
+        st = pthread_mutex_init(&m_mutex, NULL);
+        if (st != 0)
+        {
+            ASSERT_UNCONDITIONALLY("Failed to initialize UnixEvent mutex");
+            return false;
+        }
+
+        st = pthread_cond_init(&m_condition, &attrs);
+        if (st != 0)
+        {
+            ASSERT_UNCONDITIONALLY("Failed to initialize UnixEvent condition variable");
+
+            st = pthread_mutex_destroy(&m_mutex);
+            ASSERT_MSG(st == 0, "Failed to destroy UnixEvent mutex");
+            return false;
+        }
+
+        m_isValid = true;
+
+        return true;
+    }
+
+    bool Destroy()
+    {
+        bool success = true;
+
+        if (m_isValid)
+        {
+            int st = pthread_mutex_destroy(&m_mutex);
+            ASSERT_MSG(st == 0, "Failed to destroy UnixEvent mutex");
+            success = success && (st == 0);
+
+            st = pthread_cond_destroy(&m_condition);
+            ASSERT_MSG(st == 0, "Failed to destroy UnixEvent condition variable");
+            success = success && (st == 0);
+        }
+
+        return success;
+    }
+
+    uint32_t Wait(uint32_t milliseconds)
+    {
+        timespec endTime;
+#if HAVE_MACH_ABSOLUTE_TIME
+        uint64_t endMachTime;
+        if (milliseconds != INFINITE)
+        {
+            uint64_t nanoseconds = (uint64_t)milliseconds * tccMilliSecondsToNanoSeconds;
+            NanosecondsToTimeSpec(nanoseconds, &endTime);
+            endMachTime = mach_absolute_time() + nanoseconds * s_TimebaseInfo.denom / s_TimebaseInfo.numer;
+        }
+#elif HAVE_PTHREAD_CONDATTR_SETCLOCK
+        if (milliseconds != INFINITE)
+        {
+            clock_gettime(CLOCK_MONOTONIC, &endTime);
+            TimeSpecAdd(&endTime, milliseconds);
+        }
+#else
+#error Don't know how to perform timed wait on this platform
+#endif
+
+        int st = 0;
+
+        pthread_mutex_lock(&m_mutex);
+        while (!m_state)
+        {
+            if (milliseconds == INFINITE)
+            {
+                st = pthread_cond_wait(&m_condition, &m_mutex);
+            }
+            else
+            {
+#if HAVE_MACH_ABSOLUTE_TIME
+                // Since OSX doesn't support CLOCK_MONOTONIC, we use relative variant of the 
+                // timed wait and we need to handle spurious wakeups properly.
+                st = pthread_cond_timedwait_relative_np(&m_condition, &m_mutex, &endTime);
+                if ((st == 0) && !m_state)
+                {
+                    uint64_t machTime = mach_absolute_time();
+                    if (machTime < endMachTime)
+                    {
+                        // The wake up was spurious, recalculate the relative endTime
+                        uint64_t remainingNanoseconds = (endMachTime - machTime) * s_TimebaseInfo.numer / s_TimebaseInfo.denom;
+                        NanosecondsToTimeSpec(remainingNanoseconds, &endTime);
+                    }
+                    else
+                    {
+                        // Although the timed wait didn't report a timeout, time calculated from the
+                        // mach time shows we have already reached the end time. It can happen if
+                        // the wait was spuriously woken up right before the timeout.
+                        st = ETIMEDOUT;
+                    }
+                }
+#else // HAVE_MACH_ABSOLUTE_TIME
+                st = pthread_cond_timedwait(&m_condition, &m_mutex, &endTime);
+#endif // HAVE_MACH_ABSOLUTE_TIME
+                // Verify that if the wait timed out, the event was not set
+                ASSERT((st != ETIMEDOUT) || !m_state);
+            }
+
+            if (st != 0)
+            {
+                // wait failed or timed out
+                break;
+            }
+        }
+
+        if ((st == 0) && !m_manualReset)
+        {
+            // Clear the state for auto-reset events so that only one waiter gets released
+            m_state = false;
+        }
+
+        pthread_mutex_unlock(&m_mutex);
+
+        uint32_t waitStatus;
+
+        if (st == 0)
+        {
+            waitStatus = WAIT_OBJECT_0;
+        }
+        else if (st == ETIMEDOUT)
+        {
+            waitStatus = WAIT_TIMEOUT;
+        }
+        else
+        {
+            waitStatus = WAIT_FAILED;
+        }
+
+        return waitStatus;
+    }
+
+    void Set()
+    {
+        pthread_mutex_lock(&m_mutex);
+        m_state = true;
+        pthread_mutex_unlock(&m_mutex);
+
+        // Unblock all threads waiting for the condition variable
+        pthread_cond_broadcast(&m_condition);
+    }
+
+    void Reset()
+    {
+        pthread_mutex_lock(&m_mutex);
+        m_state = false;
+        pthread_mutex_unlock(&m_mutex);
+    }
+};
+
+class EventUnixHandle : public UnixHandle<UnixHandleType::Event, UnixEvent>
+{
+public:
+    EventUnixHandle(UnixEvent event)
+    : UnixHandle<UnixHandleType::Event, UnixEvent>(event)
+    {
+    }
+
+    virtual bool Destroy()
+    {
+        return m_object.Destroy();
+    }
+};
+
+typedef UnixHandle<UnixHandleType::Thread, pthread_t> ThreadUnixHandle;
+
+#if !HAVE_THREAD_LOCAL
+extern "C" int __cxa_thread_atexit(void (*)(void*), void*, void *);
+extern "C" void *__dso_handle;
+#endif
+
+// This functions configures behavior of the signals that are not
+// related to hardware exception handling.
+void ConfigureSignals()
+{
+    // The default action for SIGPIPE is process termination.
+    // Since SIGPIPE can be signaled when trying to write on a socket for which
+    // the connection has been dropped, we need to tell the system we want
+    // to ignore this signal.
+    // Instead of terminating the process, the system call which would had
+    // issued a SIGPIPE will, instead, report an error and set errno to EPIPE.
+    signal(SIGPIPE, SIG_IGN);
+}
+
+// The Redhawk PAL must be initialized before any of its exports can be called. Returns true for a successful
+// initialization and false on failure.
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalInit()
+{
+    g_dwPALCapabilities = 0;
+
+    if (!QueryLogicalProcessorCount())
+        return false;
+
+#if HAVE_MACH_ABSOLUTE_TIME
+    kern_return_t machRet;
+    if ((machRet = mach_timebase_info(&s_TimebaseInfo)) != KERN_SUCCESS)
+    {
+        return false;
+    }
+#endif
+
+    if (!InitializeFlushProcessWriteBuffers())
+    {
+        return false;
+    }
+#ifndef USE_PORTABLE_HELPERS
+    if (!InitializeHardwareExceptionHandling())
+    {
+        return false;
+    }
+#endif // !USE_PORTABLE_HELPERS
+
+    ConfigureSignals();
+
+    return true;
+}
+
+// Given a mask of capabilities return true if all of them are supported by the current PAL.
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalHasCapability(PalCapability capability)
+{
+    return (g_dwPALCapabilities & (uint32_t)capability) == (uint32_t)capability;
+}
+
+#if HAVE_THREAD_LOCAL
+
+struct TlsDestructionMonitor
+{
+    void* m_thread = nullptr;
+
+    void SetThread(void* thread)
+    {
+        m_thread = thread;
+    }
+
+    ~TlsDestructionMonitor()
+    {
+        if (m_thread != nullptr)
+        {
+            RuntimeThreadShutdown(m_thread);
+        }
+    }
+};
+
+// This thread local object is used to detect thread shutdown. Its destructor
+// is called when a thread is being shut down.
+thread_local TlsDestructionMonitor tls_destructionMonitor;
+
+#endif // HAVE_THREAD_LOCAL
+
+// This thread local variable is used for delegate marshalling
+DECLSPEC_THREAD intptr_t tls_thunkData;
+
+// Attach thread to PAL. 
+// It can be called multiple times for the same thread.
+// It fails fast if a different thread was already registered.
+// Parameters:
+//  thread        - thread to attach
+extern "C" void PalAttachThread(void* thread)
+{
+#if HAVE_THREAD_LOCAL
+    tls_destructionMonitor.SetThread(thread);
+#else
+    __cxa_thread_atexit(RuntimeThreadShutdown, thread, &__dso_handle);
+#endif
+}
+
+// Detach thread from PAL.
+// It fails fast if some other thread value was attached to PAL.
+// Parameters:
+//  thread        - thread to detach
+// Return:
+//  true if the thread was detached, false if there was no attached thread
+extern "C" bool PalDetachThread(void* thread)
+{
+    UNREFERENCED_PARAMETER(thread);
+    if (g_threadExitCallback != nullptr)
+    {
+        g_threadExitCallback();
+    }
+    return true;
+}
+
+#if !defined(USE_PORTABLE_HELPERS) && !defined(FEATURE_RX_THUNKS)
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(HANDLE hTemplateModule, uint32_t templateRva, size_t templateSize, void** newThunksOut)
+{
+    PORTABILITY_ASSERT("UNIXTODO: Implement this function");
+}
+
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalFreeThunksFromTemplate(void *pBaseAddress)
+{
+    PORTABILITY_ASSERT("UNIXTODO: Implement this function");
+}
+#endif // !USE_PORTABLE_HELPERS && !FEATURE_RX_THUNKS
+
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalMarkThunksAsValidCallTargets(
+    void *virtualAddress,
+    int thunkSize,
+    int thunksPerBlock,
+    int thunkBlockSize,
+    int thunkBlocksPerMapping)
+{
+    return UInt32_TRUE;
+}
+
+REDHAWK_PALEXPORT void REDHAWK_PALAPI PalSleep(uint32_t milliseconds)
+{
+#if HAVE_CLOCK_NANOSLEEP
+    timespec endTime;
+    clock_gettime(CLOCK_MONOTONIC, &endTime);
+    TimeSpecAdd(&endTime, milliseconds);
+    while (clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &endTime, NULL) == EINTR)
+    {
+    }
+#else // HAVE_CLOCK_NANOSLEEP
+    timespec requested;
+    requested.tv_sec = milliseconds / tccSecondsToMilliSeconds;
+    requested.tv_nsec = (milliseconds - requested.tv_sec * tccSecondsToMilliSeconds) * tccMilliSecondsToNanoSeconds;
+
+    timespec remaining;
+    while (nanosleep(&requested, &remaining) == EINTR)
+    {
+        requested = remaining;
+    }
+#endif // HAVE_CLOCK_NANOSLEEP
+}
+
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI __stdcall PalSwitchToThread()
+{
+    // sched_yield yields to another thread in the current process. This implementation
+    // won't work well for cross-process synchronization.
+    return sched_yield() == 0;
+}
+
+extern "C" UInt32_BOOL CloseHandle(HANDLE handle)
+{
+    if ((handle == NULL) || (handle == INVALID_HANDLE_VALUE))
+    {
+        return UInt32_FALSE;
+    }
+
+    UnixHandleBase* handleBase = (UnixHandleBase*)handle;
+
+    bool success = handleBase->Destroy();
+
+    delete handleBase;
+
+    return success ? UInt32_TRUE : UInt32_FALSE;
+}
+
+REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ const wchar_t* pName)
+{
+    UnixEvent event = UnixEvent(manualReset, initialState);
+    if (!event.Initialize())
+    {
+        return INVALID_HANDLE_VALUE;
+    }
+
+    EventUnixHandle* handle = new (nothrow) EventUnixHandle(event);
+
+    if (handle == NULL)
+    {
+        return INVALID_HANDLE_VALUE;
+    }
+
+    return handle;
+}
+
+typedef UInt32(__stdcall *BackgroundCallback)(_In_opt_ void* pCallbackContext);
+
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartBackgroundWork(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext, UInt32_BOOL highPriority)
+{
+#ifdef HOST_WASM
+    // No threads, so we can't start one
+    ASSERT(false);
+#endif // HOST_WASM
+    pthread_attr_t attrs;
+
+    int st = pthread_attr_init(&attrs);
+    ASSERT(st == 0);
+
+    static const int NormalPriority = 0;
+    static const int HighestPriority = -20;
+
+    // TODO: Figure out which scheduler to use, the default one doesn't seem to
+    // support per thread priorities.
+#if 0
+    sched_param params;
+    memset(&params, 0, sizeof(params));
+
+    params.sched_priority = highPriority ? HighestPriority : NormalPriority;
+
+    // Set the priority of the thread
+    st = pthread_attr_setschedparam(&attrs, &params);
+    ASSERT(st == 0);
+#endif
+    // Create the thread as detached, that means not joinable
+    st = pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
+    ASSERT(st == 0);
+
+    pthread_t threadId;
+    st = pthread_create(&threadId, &attrs, (void *(*)(void*))callback, pCallbackContext);
+
+    int st2 = pthread_attr_destroy(&attrs);
+    ASSERT(st2 == 0);
+
+    return st == 0;
+}
+
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext)
+{
+    return PalStartBackgroundWork(callback, pCallbackContext, UInt32_FALSE);
+}
+
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext)
+{
+#ifdef HOST_WASM
+    // WASMTODO: No threads so we can't start the finalizer thread
+    return true;
+#else // HOST_WASM
+    return PalStartBackgroundWork(callback, pCallbackContext, UInt32_TRUE);
+#endif // HOST_WASM
+}
+
+// Returns a 64-bit tick count with a millisecond resolution. It tries its best
+// to return monotonically increasing counts and avoid being affected by changes
+// to the system clock (either due to drift or due to explicit changes to system
+// time).
+REDHAWK_PALEXPORT UInt64 REDHAWK_PALAPI PalGetTickCount64()
+{
+    UInt64 retval = 0;
+
+#if HAVE_MACH_ABSOLUTE_TIME
+    {
+        retval = (mach_absolute_time() * s_TimebaseInfo.numer / s_TimebaseInfo.denom) / tccMilliSecondsToNanoSeconds;
+    }
+#elif HAVE_CLOCK_MONOTONIC
+    {
+        clockid_t clockType =
+#if HAVE_CLOCK_MONOTONIC_COARSE
+            CLOCK_MONOTONIC_COARSE; // good enough resolution, fastest speed
+#else
+            CLOCK_MONOTONIC;
+#endif
+        struct timespec ts;
+        if (clock_gettime(clockType, &ts) == 0)
+        {
+            retval = (ts.tv_sec * tccSecondsToMilliSeconds) + (ts.tv_nsec / tccMilliSecondsToNanoSeconds);
+        }
+        else
+        {
+            ASSERT_UNCONDITIONALLY("clock_gettime(CLOCK_MONOTONIC) failed\n");
+        }
+    }
+#else
+    {
+        struct timeval tv;
+        if (gettimeofday(&tv, NULL) == 0)
+        {
+            retval = (tv.tv_sec * tccSecondsToMilliSeconds) + (tv.tv_usec / tccMilliSecondsToMicroSeconds);
+        }
+        else
+        {
+            ASSERT_UNCONDITIONALLY("gettimeofday() failed\n");
+        }
+    }
+#endif
+
+    return retval;
+}
+
+REDHAWK_PALEXPORT UInt32 REDHAWK_PALAPI PalGetTickCount()
+{
+    return (UInt32)PalGetTickCount64();
+}
+
+REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer)
+{
+    HANDLE moduleHandle = NULL;
+
+    // Emscripten's implementation of dladdr corrupts memory,
+    // but always returns 0 for the module handle, so just skip the call
+#if !defined(HOST_WASM)
+    Dl_info info;
+    int st = dladdr(pointer, &info);
+    if (st != 0)
+    {
+        moduleHandle = info.dli_fbase;
+    }
+#endif //!defined(HOST_WASM)
+
+    return moduleHandle;
+}
+
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvxEnabled()
+{
+    return true;
+}
+
+REDHAWK_PALEXPORT void PalPrintFatalError(const char* message)
+{
+    // Write the message using lowest-level OS API available. This is used to print the stack overflow
+    // message, so there is not much that can be done here.
+    write(STDERR_FILENO, message, sizeof(message));
+}
+
+bool QueryLogicalProcessorCount()
+{
+#if HAVE_SYSCONF
+    g_cNumProcs = sysconf(SYSCONF_GET_NUMPROCS);
+    if (g_cNumProcs < 1)
+    {
+        ASSERT_UNCONDITIONALLY("sysconf failed for " SYSCONF_GET_NUMPROCS_NAME "\n");
+        return false;
+    }
+#elif HAVE_SYSCTL
+    size_t sz = sizeof(g_cNumProcs);
+
+    int st = 0;
+    if (sysctlbyname("hw.logicalcpu_max", &g_cNumProcs, &sz, NULL, 0) != 0)
+    {
+        ASSERT_UNCONDITIONALLY("sysctl failed for hw.logicalcpu_max\n");
+        return false;
+    }
+
+#endif // HAVE_SYSCONF
+
+    return true;
+}
+
+static int W32toUnixAccessControl(uint32_t flProtect)
+{
+    int prot = 0;
+
+    switch (flProtect & 0xff)
+    {
+    case PAGE_NOACCESS:
+        prot = PROT_NONE;
+        break;
+    case PAGE_READWRITE:
+        prot = PROT_READ | PROT_WRITE;
+        break;
+    case PAGE_EXECUTE_READ:
+        prot = PROT_READ | PROT_EXEC;
+        break;
+    case PAGE_EXECUTE_READWRITE:
+        prot = PROT_READ | PROT_WRITE | PROT_EXEC;
+        break;
+    default:
+        ASSERT(false);
+        break;
+    }
+    return prot;
+}
+
+REDHAWK_PALEXPORT _Ret_maybenull_ _Post_writable_byte_size_(size) void* REDHAWK_PALAPI PalVirtualAlloc(_In_opt_ void* pAddress, size_t size, uint32_t allocationType, uint32_t protect)
+{
+    // TODO: thread safety!
+
+    if ((allocationType & ~(MEM_RESERVE | MEM_COMMIT)) != 0)
+    {
+        // TODO: Implement
+        return NULL;
+    }
+
+    ASSERT(((size_t)pAddress & (OS_PAGE_SIZE - 1)) == 0);
+
+    // Align size to whole pages
+    size = (size + (OS_PAGE_SIZE - 1)) & ~(OS_PAGE_SIZE - 1);
+    int unixProtect = W32toUnixAccessControl(protect);
+
+    if (allocationType & (MEM_RESERVE | MEM_COMMIT))
+    {
+        // For Windows compatibility, let the PalVirtualAlloc reserve memory with 64k alignment.
+        static const size_t Alignment = 64 * 1024;
+
+        size_t alignedSize = size + (Alignment - OS_PAGE_SIZE);
+
+        void * pRetVal = mmap(pAddress, alignedSize, unixProtect, MAP_ANON | MAP_PRIVATE, -1, 0);
+
+        if (pRetVal != NULL)
+        {
+            void * pAlignedRetVal = (void *)(((size_t)pRetVal + (Alignment - 1)) & ~(Alignment - 1));
+            size_t startPadding = (size_t)pAlignedRetVal - (size_t)pRetVal;
+            if (startPadding != 0)
+            {
+                int ret = munmap(pRetVal, startPadding);
+                ASSERT(ret == 0);
+            }
+
+            size_t endPadding = alignedSize - (startPadding + size);
+            if (endPadding != 0)
+            {
+                int ret = munmap((void *)((size_t)pAlignedRetVal + size), endPadding);
+                ASSERT(ret == 0);
+            }
+
+            pRetVal = pAlignedRetVal;
+        }
+
+        return pRetVal;
+    }
+
+    if (allocationType & MEM_COMMIT)
+    {
+        int ret = mprotect(pAddress, size, unixProtect);
+        return (ret == 0) ? pAddress : NULL;
+    }
+
+    return NULL;
+}
+
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualFree(_In_ void* pAddress, size_t size, uint32_t freeType)
+{
+    ASSERT(((freeType & MEM_RELEASE) != MEM_RELEASE) || size == 0);
+    ASSERT((freeType & (MEM_RELEASE | MEM_DECOMMIT)) != (MEM_RELEASE | MEM_DECOMMIT));
+    ASSERT(freeType != 0);
+
+    // UNIXTODO: Implement this function
+    return UInt32_TRUE;
+}
+
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualProtect(_In_ void* pAddress, size_t size, uint32_t protect)
+{
+    int unixProtect = W32toUnixAccessControl(protect);
+
+    return mprotect(pAddress, size, unixProtect) == 0;
+}
+
+REDHAWK_PALEXPORT _Ret_maybenull_ void* REDHAWK_PALAPI PalSetWerDataBuffer(_In_ void* pNewBuffer)
+{
+    static void* pBuffer;
+    return _InterlockedExchangePointer(&pBuffer, pNewBuffer);
+}
+
+extern "C" HANDLE GetCurrentProcess()
+{
+    return (HANDLE)-1;
+}
+
+extern "C" uint32_t GetCurrentProcessId()
+{
+    return getpid();
+}
+
+extern "C" HANDLE GetCurrentThread()
+{
+    return (HANDLE)-2;
+}
+
+extern "C" UInt32_BOOL DuplicateHandle(
+    HANDLE hSourceProcessHandle,
+    HANDLE hSourceHandle,
+    HANDLE hTargetProcessHandle,
+    HANDLE * lpTargetHandle,
+    UInt32 dwDesiredAccess,
+    UInt32_BOOL bInheritHandle,
+    UInt32 dwOptions)
+{
+    // We can only duplicate the current thread handle. That is all that the MRT uses.
+    ASSERT(hSourceProcessHandle == GetCurrentProcess());
+    ASSERT(hTargetProcessHandle == GetCurrentProcess());
+    ASSERT(hSourceHandle == GetCurrentThread());
+    *lpTargetHandle = new (nothrow) ThreadUnixHandle(pthread_self());
+
+    return lpTargetHandle != nullptr;
+}
+
+extern "C" UInt32_BOOL InitializeCriticalSection(CRITICAL_SECTION * lpCriticalSection)
+{
+    return pthread_mutex_init(&lpCriticalSection->mutex, NULL) == 0;
+}
+
+extern "C" UInt32_BOOL InitializeCriticalSectionEx(CRITICAL_SECTION * lpCriticalSection, UInt32 arg2, UInt32 arg3)
+{
+    return InitializeCriticalSection(lpCriticalSection);
+}
+
+
+extern "C" void DeleteCriticalSection(CRITICAL_SECTION * lpCriticalSection)
+{
+    pthread_mutex_destroy(&lpCriticalSection->mutex);
+}
+
+extern "C" void EnterCriticalSection(CRITICAL_SECTION * lpCriticalSection)
+{
+    pthread_mutex_lock(&lpCriticalSection->mutex);;
+}
+
+extern "C" void LeaveCriticalSection(CRITICAL_SECTION * lpCriticalSection)
+{
+    pthread_mutex_unlock(&lpCriticalSection->mutex);
+}
+
+extern "C" UInt32_BOOL IsDebuggerPresent()
+{
+#ifdef HOST_WASM
+    // For now always true since the browser will handle it in case of WASM.
+    return UInt32_TRUE;
+#else
+    // UNIXTODO: Implement this function
+    return UInt32_FALSE;
+#endif
+}
+
+extern "C" void TerminateProcess(HANDLE arg1, UInt32 arg2)
+{
+    // TODO: change it to TerminateCurrentProcess
+    // Then if we modified the signature of the DuplicateHandle too, we can
+    // get rid of the PalGetCurrentProcess.
+    PORTABILITY_ASSERT("UNIXTODO: Implement this function");
+}
+
+extern "C" UInt32_BOOL SetEvent(HANDLE event)
+{
+    EventUnixHandle* unixHandle = (EventUnixHandle*)event;
+    unixHandle->GetObject()->Set();
+
+    return UInt32_TRUE;
+}
+
+extern "C" UInt32_BOOL ResetEvent(HANDLE event)
+{
+    EventUnixHandle* unixHandle = (EventUnixHandle*)event;
+    unixHandle->GetObject()->Reset();
+
+    return UInt32_TRUE;
+}
+
+extern "C" UInt32 GetEnvironmentVariableA(const char * name, char * buffer, UInt32 size)
+{
+    // Using std::getenv instead of getenv since it is guaranteed to be thread safe w.r.t. other
+    // std::getenv calls in C++11
+    const char* value = std::getenv(name);
+    if (value == NULL)
+    {
+        return 0;
+    }
+
+    size_t valueLen = strlen(value);
+
+    if (valueLen < size)
+    {
+        strcpy(buffer, value);
+        return valueLen;
+    }
+
+    // return required size including the null character or 0 if the size doesn't fit into UInt32
+    return (valueLen < UINT32_MAX) ? (valueLen + 1) : 0;
+}
+
+extern "C" UInt16 RtlCaptureStackBackTrace(UInt32 arg1, UInt32 arg2, void* arg3, UInt32* arg4)
+{
+    // UNIXTODO: Implement this function
+    return 0;
+}
+
+typedef UInt32 (__stdcall *HijackCallback)(HANDLE hThread, _In_ PAL_LIMITED_CONTEXT* pThreadContext, _In_opt_ void* pCallbackContext);
+
+REDHAWK_PALEXPORT UInt32 REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_ HijackCallback callback, _In_opt_ void* pCallbackContext)
+{
+    // UNIXTODO: Implement PalHijack
+    return E_FAIL;
+}
+
+extern "C" UInt32 WaitForSingleObjectEx(HANDLE handle, UInt32 milliseconds, UInt32_BOOL alertable)
+{
+    // The handle can only represent an event here
+    // TODO: encapsulate this stuff
+    UnixHandleBase* handleBase = (UnixHandleBase*)handle;
+    ASSERT(handleBase->GetType() == UnixHandleType::Event);
+    EventUnixHandle* unixHandle = (EventUnixHandle*)handleBase;
+
+    return unixHandle->GetObject()->Wait(milliseconds);
+}
+
+REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI PalCompatibleWaitAny(UInt32_BOOL alertable, uint32_t timeout, uint32_t handleCount, HANDLE* pHandles, UInt32_BOOL allowReentrantWait)
+{
+    // Only a single handle wait for event is supported
+    ASSERT(handleCount == 1);
+
+    return WaitForSingleObjectEx(pHandles[0], timeout, alertable);
+}
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#if !__has_builtin(_mm_pause)
+extern "C" void _mm_pause()
+// Defined for implementing PalYieldProcessor in PalRedhawk.h
+{
+#if defined(HOST_AMD64) || defined(HOST_X86)
+  __asm__ volatile ("pause");
+#endif
+}
+#endif
+
+extern "C" Int32 _stricmp(const char *string1, const char *string2)
+{
+    return strcasecmp(string1, string2);
+}
+
+REDHAWK_PALEXPORT Int32 PalGetProcessCpuCount()
+{
+    return g_cNumProcs;
+}
+
+//Reads the entire contents of the file into the specified buffer, buff
+//returns the number of bytes read if the file is successfully read
+//returns 0 if the file is not found, size is greater than maxBytesToRead or the file couldn't be opened or read
+REDHAWK_PALEXPORT UInt32 PalReadFileContents(_In_z_ const TCHAR* fileName, _Out_writes_all_(maxBytesToRead) char* buff, _In_ UInt32 maxBytesToRead)
+{
+    int fd = open(fileName, O_RDONLY);
+    if (fd < 0)
+    {
+        return 0;
+    }
+
+
+    UInt32 bytesRead = 0;
+    struct stat fileStats;
+    if ((fstat(fd, &fileStats) == 0) && (fileStats.st_size <= maxBytesToRead))
+    {
+        bytesRead = read(fd, buff, fileStats.st_size);
+    }
+
+    close(fd);
+
+    return bytesRead;
+}
+
+__thread void* pStackHighOut = NULL;
+__thread void* pStackLowOut = NULL;
+
+// Retrieves the entire range of memory dedicated to the calling thread's stack.  This does
+// not get the current dynamic bounds of the stack, which can be significantly smaller than
+// the maximum bounds.
+REDHAWK_PALEXPORT bool PalGetMaximumStackBounds(_Out_ void** ppStackLowOut, _Out_ void** ppStackHighOut)
+{
+    if (pStackHighOut == NULL)
+    {
+#ifdef __APPLE__
+        // This is a Mac specific method
+        pStackHighOut = pthread_get_stackaddr_np(pthread_self());
+        pStackLowOut = ((uint8_t *)pStackHighOut - pthread_get_stacksize_np(pthread_self()));
+#else // __APPLE__
+        pthread_attr_t attr;
+        size_t stackSize;
+        int status;
+
+        pthread_t thread = pthread_self();
+
+        status = pthread_attr_init(&attr);
+        ASSERT_MSG(status == 0, "pthread_attr_init call failed");
+
+#if HAVE_PTHREAD_ATTR_GET_NP
+        status = pthread_attr_get_np(thread, &attr);
+#elif HAVE_PTHREAD_GETATTR_NP
+        status = pthread_getattr_np(thread, &attr);
+#else
+#error Dont know how to get thread attributes on this platform!
+#endif
+        ASSERT_MSG(status == 0, "pthread_getattr_np call failed");
+
+        status = pthread_attr_getstack(&attr, &pStackLowOut, &stackSize);
+        ASSERT_MSG(status == 0, "pthread_attr_getstack call failed");
+
+        status = pthread_attr_destroy(&attr);
+        ASSERT_MSG(status == 0, "pthread_attr_destroy call failed");
+
+        pStackHighOut = (uint8_t*)pStackLowOut + stackSize;
+#endif // __APPLE__
+    }
+
+    *ppStackLowOut = pStackLowOut;
+    *ppStackHighOut = pStackHighOut;
+
+    return true;
+}
+
+// retrieves the full path to the specified module, if moduleBase is NULL retreieves the full path to the
+// executable module of the current process.
+//
+// Return value:  number of characters in name string
+//
+REDHAWK_PALEXPORT Int32 PalGetModuleFileName(_Out_ const TCHAR** pModuleNameOut, HANDLE moduleBase)
+{
+#if defined(HOST_WASM)
+    // Emscripten's implementation of dladdr corrupts memory and doesn't have the real name, so make up a name instead
+    const TCHAR* wasmModuleName = "WebAssemblyModule";
+    *pModuleNameOut = wasmModuleName;
+    return strlen(wasmModuleName);
+#else // HOST_WASM
+    Dl_info dl;
+    if (dladdr(moduleBase, &dl) == 0)
+    {
+        *pModuleNameOut = NULL;
+        return 0;
+    }
+
+    *pModuleNameOut = dl.dli_fname;
+    return strlen(dl.dli_fname);
+#endif // defined(HOST_WASM)
+}
+
+GCSystemInfo g_RhSystemInfo;
+
+// Initialize the g_SystemInfo
+bool InitializeSystemInfo()
+{
+    long pagesize = getpagesize();
+    g_RhSystemInfo.dwPageSize = pagesize;
+    g_RhSystemInfo.dwAllocationGranularity = pagesize;
+
+    int nrcpus = 0;
+
+#if HAVE_SYSCONF
+    nrcpus = sysconf(SYSCONF_GET_NUMPROCS);
+    if (nrcpus < 1)
+    {
+        ASSERT_UNCONDITIONALLY("sysconf failed for " SYSCONF_GET_NUMPROCS_NAME "\n");
+        return false;
+    }
+#elif HAVE_SYSCTL
+    int mib[2];
+
+    size_t sz = sizeof(nrcpus);
+    mib[0] = CTL_HW;
+    mib[1] = HW_NCPU;
+    int rc = sysctl(mib, 2, &nrcpus, &sz, NULL, 0);
+    if (rc != 0)
+    {
+        ASSERT_UNCONDITIONALLY("sysctl failed for HW_NCPU\n");
+        return false;
+    }
+#endif // HAVE_SYSCONF
+
+    g_RhSystemInfo.dwNumberOfProcessors = nrcpus;
+
+    return true;
+}
+
+// This function initializes data structures needed for the FlushProcessWriteBuffers
+// Return:
+//  true if it succeeded, false otherwise
+bool InitializeFlushProcessWriteBuffers()
+{
+    // Verify that the s_helperPage is really aligned to the g_SystemInfo.dwPageSize
+    ASSERT((((size_t)g_helperPage) & (OS_PAGE_SIZE - 1)) == 0);
+
+    // Locking the page ensures that it stays in memory during the two mprotect
+    // calls in the FlushProcessWriteBuffers below. If the page was unmapped between
+    // those calls, they would not have the expected effect of generating IPI.
+    int status = mlock(g_helperPage, OS_PAGE_SIZE);
+
+    if (status != 0)
+    {
+        return false;
+    }
+
+    status = pthread_mutex_init(&g_flushProcessWriteBuffersMutex, NULL);
+    if (status != 0)
+    {
+        munlock(g_helperPage, OS_PAGE_SIZE);
+    }
+
+    return status == 0;
+}
+
+extern "C" void FlushProcessWriteBuffers()
+{
+    int status = pthread_mutex_lock(&g_flushProcessWriteBuffersMutex);
+    FATAL_ASSERT(status == 0, "Failed to lock the flushProcessWriteBuffersMutex lock");
+
+    // Changing a helper memory page protection from read / write to no access
+    // causes the OS to issue IPI to flush TLBs on all processors. This also
+    // results in flushing the processor buffers.
+    status = mprotect(g_helperPage, OS_PAGE_SIZE, PROT_READ | PROT_WRITE);
+    FATAL_ASSERT(status == 0, "Failed to change helper page protection to read / write");
+
+    // Ensure that the page is dirty before we change the protection so that
+    // we prevent the OS from skipping the global TLB flush.
+    __sync_add_and_fetch((size_t*)g_helperPage, 1);
+
+    status = mprotect(g_helperPage, OS_PAGE_SIZE, PROT_NONE);
+    FATAL_ASSERT(status == 0, "Failed to change helper page protection to no access");
+
+    status = pthread_mutex_unlock(&g_flushProcessWriteBuffersMutex);
+    FATAL_ASSERT(status == 0, "Failed to unlock the flushProcessWriteBuffersMutex lock");
+}
+
+static const int64_t SECS_BETWEEN_1601_AND_1970_EPOCHS = 11644473600LL;
+static const int64_t SECS_TO_100NS = 10000000; /* 10^7 */
+
+extern "C" void GetSystemTimeAsFileTime(FILETIME *lpSystemTimeAsFileTime)
+{
+    struct timeval time = { 0 };
+    gettimeofday(&time, NULL);
+
+    int64_t result = ((int64_t)time.tv_sec + SECS_BETWEEN_1601_AND_1970_EPOCHS) * SECS_TO_100NS +
+        (time.tv_usec * 10);
+
+    lpSystemTimeAsFileTime->dwLowDateTime = (uint32_t)result;
+    lpSystemTimeAsFileTime->dwHighDateTime = (uint32_t)(result >> 32);
+}
+
+extern "C" UInt32_BOOL QueryPerformanceCounter(LARGE_INTEGER *lpPerformanceCount)
+{
+    // TODO: More efficient, platform-specific implementation
+    struct timeval tv;
+    if (gettimeofday(&tv, NULL) == -1)
+    {
+        ASSERT_UNCONDITIONALLY("gettimeofday() failed");
+        return UInt32_FALSE;
+    }
+    lpPerformanceCount->QuadPart =
+        (int64_t) tv.tv_sec * (int64_t) tccSecondsToMicroSeconds + (int64_t) tv.tv_usec;
+    return UInt32_TRUE;
+}
+
+extern "C" UInt32_BOOL QueryPerformanceFrequency(LARGE_INTEGER *lpFrequency)
+{
+    lpFrequency->QuadPart = (int64_t) tccSecondsToMicroSeconds;
+    return UInt32_TRUE;
+}
+
+extern "C" UInt64 PalGetCurrentThreadIdForLogging()
+{
+#if defined(__linux__)
+    return (uint64_t)syscall(SYS_gettid);
+#elif defined(__APPLE__)
+    uint64_t tid;
+    pthread_threadid_np(pthread_self(), &tid);
+    return (uint64_t)tid;
+#elif HAVE_PTHREAD_GETTHREADID_NP
+    return (uint64_t)pthread_getthreadid_np();
+#elif HAVE_LWP_SELF
+    return (uint64_t)_lwp_self();
+#else
+    // Fallback in case we don't know how to get integer thread id on the current platform
+    return (uint64_t)pthread_self();
+#endif
+}
+
+#if defined(HOST_X86) || defined(HOST_AMD64)
+REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI getcpuid(uint32_t arg, unsigned char result[16])
+{
+    DWORD eax;
+#if defined(HOST_X86)
+    __asm("  xor %%ecx, %%ecx\n" \
+          "  cpuid\n" \
+          "  mov %%eax, 0(%[result])\n" \
+          "  mov %%ebx, 4(%[result])\n" \
+          "  mov %%ecx, 8(%[result])\n" \
+          "  mov %%edx, 12(%[result])\n" \
+          : "=a"(eax) /*output in eax*/\
+          : "a"(arg), [result]"r"(result) /*inputs - arg in eax, result in any register*/\
+          : "ebx", "ecx", "edx", "memory" /* registers that are clobbered, *result is clobbered */
+        );
+#endif // defined(HOST_X86)
+#if defined(HOST_AMD64)
+    __asm("  xor %%ecx, %%ecx\n" \
+          "  cpuid\n" \
+          "  mov %%eax, 0(%[result])\n" \
+          "  mov %%ebx, 4(%[result])\n" \
+          "  mov %%ecx, 8(%[result])\n" \
+          "  mov %%edx, 12(%[result])\n" \
+          : "=a"(eax) /*output in eax*/\
+          : "a"(arg), [result]"r"(result) /*inputs - arg in eax, result in any register*/\
+          : "rbx", "ecx", "edx", "memory" /* registers that are clobbered, *result is clobbered */
+        );
+#endif // defined(HOST_AMD64)
+    return eax;
+}
+
+REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI getextcpuid(uint32_t arg1, uint32_t arg2, unsigned char result[16])
+{
+    DWORD eax;
+#if defined(HOST_X86)
+    DWORD ecx;
+    __asm("  cpuid\n" \
+          "  mov %%eax, 0(%[result])\n" \
+          "  mov %%ebx, 4(%[result])\n" \
+          "  mov %%ecx, 8(%[result])\n" \
+          "  mov %%edx, 12(%[result])\n" \
+          : "=a"(eax), "=c"(ecx) /*output in eax, ecx is rewritten*/\
+          : "c"(arg1), "a"(arg2), [result]"r"(result) /*inputs - arg1 in ecx, arg2 in eax, result in any register*/\
+          : "ebx", "edx", "memory" /* registers that are clobbered, *result is clobbered */
+        );
+#endif // defined(HOST_X86)
+#if defined(HOST_AMD64)
+    __asm("  cpuid\n" \
+          "  mov %%eax, 0(%[result])\n" \
+          "  mov %%ebx, 4(%[result])\n" \
+          "  mov %%ecx, 8(%[result])\n" \
+          "  mov %%edx, 12(%[result])\n" \
+          : "=a"(eax) /*output in eax*/\
+          : "c"(arg1), "a"(arg2), [result]"r"(result) /*inputs - arg1 in ecx, arg2 in eax, result in any register*/\
+          : "rbx", "edx", "memory" /* registers that are clobbered, *result is clobbered */
+        );
+#endif // defined(HOST_AMD64)
+    return eax;
+}
+
+REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport()
+{
+    DWORD eax;
+    __asm("  xgetbv\n" \
+        : "=a"(eax) /*output in eax*/\
+        : "c"(0) /*inputs - 0 in ecx*/\
+        : "edx" /* registers that are clobbered*/
+      );
+    // check OS has enabled both XMM and YMM state support
+    return ((eax & 0x06) == 0x06) ? 1 : 0;
+}
+#endif // defined(HOST_X86) || defined(HOST_AMD64)
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnixContext.cpp b/src/coreclr/src/nativeaot/Runtime/unix/UnixContext.cpp
new file mode 100644
index 0000000000000..458214bbe56f7
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/UnixContext.cpp
@@ -0,0 +1,636 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "common.h"
+
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "regdisplay.h"
+#include "config.h"
+
+#include <libunwind.h>
+
+#if HAVE_UCONTEXT_T
+#include <ucontext.h>
+#endif  // HAVE_UCONTEXT_T
+
+#include "UnixContext.h"
+#include "UnwindHelpers.h"
+
+// WebAssembly has a slightly different version of LibUnwind that doesn't define unw_get_save_loc
+#if defined(HOST_WASM)
+enum unw_save_loc_type_t
+{
+    UNW_SLT_NONE,       /* register is not saved ("not an l-value") */
+    UNW_SLT_MEMORY,     /* register has been saved in memory */
+    UNW_SLT_REG         /* register has been saved in (another) register */
+};
+typedef enum unw_save_loc_type_t unw_save_loc_type_t;
+
+struct unw_save_loc_t
+{
+    unw_save_loc_type_t type;
+    union
+    {
+        unw_word_t addr;        /* valid if type==UNW_SLT_MEMORY */
+        unw_regnum_t regnum;    /* valid if type==UNW_SLT_REG */
+    }
+    u;
+};
+typedef struct unw_save_loc_t unw_save_loc_t;
+
+int unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*)
+{
+    return -1;
+}
+#endif // _WASM
+
+#ifdef __APPLE__
+
+#define MCREG_Rip(mc)       ((mc)->__ss.__rip)
+#define MCREG_Rsp(mc)       ((mc)->__ss.__rsp)
+#define MCREG_Rax(mc)       ((mc)->__ss.__rax)
+#define MCREG_Rbx(mc)       ((mc)->__ss.__rbx)
+#define MCREG_Rcx(mc)       ((mc)->__ss.__rcx)
+#define MCREG_Rdx(mc)       ((mc)->__ss.__rdx)
+#define MCREG_Rsi(mc)       ((mc)->__ss.__rsi)
+#define MCREG_Rdi(mc)       ((mc)->__ss.__rdi)
+#define MCREG_Rbp(mc)       ((mc)->__ss.__rbp)
+#define MCREG_R8(mc)        ((mc)->__ss.__r8)
+#define MCREG_R9(mc)        ((mc)->__ss.__r9)
+#define MCREG_R10(mc)       ((mc)->__ss.__r10)
+#define MCREG_R11(mc)       ((mc)->__ss.__r11)
+#define MCREG_R12(mc)       ((mc)->__ss.__r12)
+#define MCREG_R13(mc)       ((mc)->__ss.__r13)
+#define MCREG_R14(mc)       ((mc)->__ss.__r14)
+#define MCREG_R15(mc)       ((mc)->__ss.__r15)
+
+#else
+
+#if HAVE___GREGSET_T
+
+#ifdef HOST_64BIT
+#define MCREG_Rip(mc)       ((mc).__gregs[_REG_RIP])
+#define MCREG_Rsp(mc)       ((mc).__gregs[_REG_RSP])
+#define MCREG_Rax(mc)       ((mc).__gregs[_REG_RAX])
+#define MCREG_Rbx(mc)       ((mc).__gregs[_REG_RBX])
+#define MCREG_Rcx(mc)       ((mc).__gregs[_REG_RCX])
+#define MCREG_Rdx(mc)       ((mc).__gregs[_REG_RDX])
+#define MCREG_Rsi(mc)       ((mc).__gregs[_REG_RSI])
+#define MCREG_Rdi(mc)       ((mc).__gregs[_REG_RDI])
+#define MCREG_Rbp(mc)       ((mc).__gregs[_REG_RBP])
+#define MCREG_R8(mc)        ((mc).__gregs[_REG_R8])
+#define MCREG_R9(mc)        ((mc).__gregs[_REG_R9])
+#define MCREG_R10(mc)       ((mc).__gregs[_REG_R10])
+#define MCREG_R11(mc)       ((mc).__gregs[_REG_R11])
+#define MCREG_R12(mc)       ((mc).__gregs[_REG_R12])
+#define MCREG_R13(mc)       ((mc).__gregs[_REG_R13])
+#define MCREG_R14(mc)       ((mc).__gregs[_REG_R14])
+#define MCREG_R15(mc)       ((mc).__gregs[_REG_R15])
+
+#else // HOST_64BIT
+
+#define MCREG_Eip(mc)       ((mc).__gregs[_REG_EIP])
+#define MCREG_Esp(mc)       ((mc).__gregs[_REG_ESP])
+#define MCREG_Eax(mc)       ((mc).__gregs[_REG_EAX])
+#define MCREG_Ebx(mc)       ((mc).__gregs[_REG_EBX])
+#define MCREG_Ecx(mc)       ((mc).__gregs[_REG_ECX])
+#define MCREG_Edx(mc)       ((mc).__gregs[_REG_EDX])
+#define MCREG_Esi(mc)       ((mc).__gregs[_REG_ESI])
+#define MCREG_Edi(mc)       ((mc).__gregs[_REG_EDI])
+#define MCREG_Ebp(mc)       ((mc).__gregs[_REG_EBP])
+
+#endif // HOST_64BIT
+
+#elif HAVE_GREGSET_T
+
+#ifdef HOST_64BIT
+#define MCREG_Rip(mc)       ((mc).gregs[REG_RIP])
+#define MCREG_Rsp(mc)       ((mc).gregs[REG_RSP])
+#define MCREG_Rax(mc)       ((mc).gregs[REG_RAX])
+#define MCREG_Rbx(mc)       ((mc).gregs[REG_RBX])
+#define MCREG_Rcx(mc)       ((mc).gregs[REG_RCX])
+#define MCREG_Rdx(mc)       ((mc).gregs[REG_RDX])
+#define MCREG_Rsi(mc)       ((mc).gregs[REG_RSI])
+#define MCREG_Rdi(mc)       ((mc).gregs[REG_RDI])
+#define MCREG_Rbp(mc)       ((mc).gregs[REG_RBP])
+#define MCREG_R8(mc)        ((mc).gregs[REG_R8])
+#define MCREG_R9(mc)        ((mc).gregs[REG_R9])
+#define MCREG_R10(mc)       ((mc).gregs[REG_R10])
+#define MCREG_R11(mc)       ((mc).gregs[REG_R11])
+#define MCREG_R12(mc)       ((mc).gregs[REG_R12])
+#define MCREG_R13(mc)       ((mc).gregs[REG_R13])
+#define MCREG_R14(mc)       ((mc).gregs[REG_R14])
+#define MCREG_R15(mc)       ((mc).gregs[REG_R15])
+
+#else // HOST_64BIT
+
+#define MCREG_Eip(mc)       ((mc).gregs[REG_EIP])
+#define MCREG_Esp(mc)       ((mc).gregs[REG_ESP])
+#define MCREG_Eax(mc)       ((mc).gregs[REG_EAX])
+#define MCREG_Ebx(mc)       ((mc).gregs[REG_EBX])
+#define MCREG_Ecx(mc)       ((mc).gregs[REG_ECX])
+#define MCREG_Edx(mc)       ((mc).gregs[REG_EDX])
+#define MCREG_Esi(mc)       ((mc).gregs[REG_ESI])
+#define MCREG_Edi(mc)       ((mc).gregs[REG_EDI])
+#define MCREG_Ebp(mc)       ((mc).gregs[REG_EBP])
+
+#endif // HOST_64BIT
+
+#else // HAVE_GREGSET_T
+
+#ifdef HOST_64BIT
+
+#if defined(HOST_ARM64)
+
+#define MCREG_Pc(mc)      ((mc).pc)
+#define MCREG_Sp(mc)      ((mc).sp)
+#define MCREG_Lr(mc)      ((mc).regs[30])
+#define MCREG_X0(mc)      ((mc).regs[0])
+#define MCREG_X1(mc)      ((mc).regs[1])
+#define MCREG_X19(mc)     ((mc).regs[19])
+#define MCREG_X20(mc)     ((mc).regs[20])
+#define MCREG_X21(mc)     ((mc).regs[21])
+#define MCREG_X22(mc)     ((mc).regs[22])
+#define MCREG_X23(mc)     ((mc).regs[23])
+#define MCREG_X24(mc)     ((mc).regs[24])
+#define MCREG_X25(mc)     ((mc).regs[25])
+#define MCREG_X26(mc)     ((mc).regs[26])
+#define MCREG_X27(mc)     ((mc).regs[27])
+#define MCREG_X28(mc)     ((mc).regs[28])
+#define MCREG_Fp(mc)      ((mc).regs[29])
+
+#else
+
+// For FreeBSD, as found in x86/ucontext.h
+#define MCREG_Rip(mc)       ((mc).mc_rip)
+#define MCREG_Rsp(mc)       ((mc).mc_rsp)
+#define MCREG_Rax(mc)       ((mc).mc_rax)
+#define MCREG_Rbx(mc)       ((mc).mc_rbx)
+#define MCREG_Rcx(mc)       ((mc).mc_rcx)
+#define MCREG_Rdx(mc)       ((mc).mc_rdx)
+#define MCREG_Rsi(mc)       ((mc).mc_rsi)
+#define MCREG_Rdi(mc)       ((mc).mc_rdi)
+#define MCREG_Rbp(mc)       ((mc).mc_rbp)
+#define MCREG_R8(mc)        ((mc).mc_r8)
+#define MCREG_R9(mc)        ((mc).mc_r9)
+#define MCREG_R10(mc)       ((mc).mc_r10)
+#define MCREG_R11(mc)       ((mc).mc_r11)
+#define MCREG_R12(mc)       ((mc).mc_r12)
+#define MCREG_R13(mc)       ((mc).mc_r13)
+#define MCREG_R14(mc)       ((mc).mc_r14)
+#define MCREG_R15(mc)       ((mc).mc_r15)
+
+#endif
+
+#else // HOST_64BIT
+
+#if defined(HOST_ARM)
+
+#define MCREG_Pc(mc)        ((mc).arm_pc)
+#define MCREG_Sp(mc)        ((mc).arm_sp)
+#define MCREG_Lr(mc)        ((mc).arm_lr)
+#define MCREG_R0(mc)        ((mc).arm_r0)
+#define MCREG_R1(mc)        ((mc).arm_r1)
+#define MCREG_R4(mc)        ((mc).arm_r4)
+#define MCREG_R5(mc)        ((mc).arm_r5)
+#define MCREG_R6(mc)        ((mc).arm_r6)
+#define MCREG_R7(mc)        ((mc).arm_r7)
+#define MCREG_R8(mc)        ((mc).arm_r8)
+#define MCREG_R9(mc)        ((mc).arm_r9)
+#define MCREG_R10(mc)       ((mc).arm_r10)
+#define MCREG_R11(mc)       ((mc).arm_fp)
+
+#elif defined(HOST_X86)
+
+#define MCREG_Eip(mc)       ((mc).mc_eip)
+#define MCREG_Esp(mc)       ((mc).mc_esp)
+#define MCREG_Eax(mc)       ((mc).mc_eax)
+#define MCREG_Ebx(mc)       ((mc).mc_ebx)
+#define MCREG_Ecx(mc)       ((mc).mc_ecx)
+#define MCREG_Edx(mc)       ((mc).mc_edx)
+#define MCREG_Esi(mc)       ((mc).mc_esi)
+#define MCREG_Edi(mc)       ((mc).mc_edi)
+#define MCREG_Ebp(mc)       ((mc).mc_ebp)
+
+#else
+#error "Unsupported arch"
+#endif
+
+#endif // HOST_64BIT
+
+#endif // HAVE_GREGSET_T
+
+#endif // __APPLE__
+
+// Update unw_cursor_t from REGDISPLAY.
+// NOTE: We don't set the IP here since the current use cases for this function
+// don't require it.
+static void RegDisplayToUnwindCursor(REGDISPLAY* regDisplay, unw_cursor_t *cursor)
+{
+#define ASSIGN_REG(regName1, regName2) \
+    unw_set_reg(cursor, regName1, regDisplay->regName2, 0);
+
+#define ASSIGN_REG_PTR(regName1, regName2) \
+    if (regDisplay->p##regName2 != NULL) \
+        unw_set_reg(cursor, regName1, *(regDisplay->p##regName2), 0);
+
+#if defined(HOST_AMD64)
+    ASSIGN_REG(UNW_REG_SP, SP)
+    ASSIGN_REG_PTR(UNW_X86_64_RBP, Rbp)
+    ASSIGN_REG_PTR(UNW_X86_64_RBX, Rbx)
+    ASSIGN_REG_PTR(UNW_X86_64_R12, R12)
+    ASSIGN_REG_PTR(UNW_X86_64_R13, R13)
+    ASSIGN_REG_PTR(UNW_X86_64_R14, R14)
+    ASSIGN_REG_PTR(UNW_X86_64_R15, R15)
+#elif HOST_ARM
+    ASSIGN_REG(UNW_ARM_SP, SP)
+    ASSIGN_REG_PTR(UNW_ARM_R4, R4)
+    ASSIGN_REG_PTR(UNW_ARM_R5, R5)
+    ASSIGN_REG_PTR(UNW_ARM_R6, R6)
+    ASSIGN_REG_PTR(UNW_ARM_R7, R7)
+    ASSIGN_REG_PTR(UNW_ARM_R8, R8)
+    ASSIGN_REG_PTR(UNW_ARM_R9, R9)
+    ASSIGN_REG_PTR(UNW_ARM_R10, R10)
+    ASSIGN_REG_PTR(UNW_ARM_R11, R11)
+    ASSIGN_REG_PTR(UNW_ARM_R14, LR)
+#elif HOST_ARM64
+    ASSIGN_REG(UNW_ARM64_SP, SP)
+    ASSIGN_REG_PTR(UNW_ARM64_FP, FP)
+    ASSIGN_REG_PTR(UNW_ARM64_X19, X19)
+    ASSIGN_REG_PTR(UNW_ARM64_X20, X20)
+    ASSIGN_REG_PTR(UNW_ARM64_X21, X21)
+    ASSIGN_REG_PTR(UNW_ARM64_X22, X22)
+    ASSIGN_REG_PTR(UNW_ARM64_X23, X23)
+    ASSIGN_REG_PTR(UNW_ARM64_X24, X24)
+    ASSIGN_REG_PTR(UNW_ARM64_X25, X25)
+    ASSIGN_REG_PTR(UNW_ARM64_X26, X26)
+    ASSIGN_REG_PTR(UNW_ARM64_X27, X27)
+    ASSIGN_REG_PTR(UNW_ARM64_X28, X28)
+#elif defined(HOST_X86)
+    ASSIGN_REG(UNW_REG_SP, SP)
+    ASSIGN_REG_PTR(UNW_X86_EBP, Rbp)
+    ASSIGN_REG_PTR(UNW_X86_EBX, Rbx)
+#endif
+
+#undef ASSIGN_REG
+#undef ASSIGN_REG_PTR
+}
+
+// Returns the unw_proc_info_t for a given IP.
+bool GetUnwindProcInfo(PCODE ip, unw_proc_info_t *procInfo)
+{
+    int st;
+
+    unw_context_t unwContext;
+    unw_cursor_t cursor;
+
+    st = unw_getcontext(&unwContext);
+    if (st < 0)
+    {
+        return false;
+    }
+
+#ifdef HOST_AMD64
+    // We manually index into the unw_context_t's internals for now because there's
+    // no better way to modify it. This will go away in the future when we locate the
+    // LSDA and other information without initializing an unwind cursor.
+    unwContext.data[16] = ip;
+#elif HOST_ARM
+    ((uint32_t*)(unwContext.data))[15] = ip;
+#elif HOST_ARM64
+    ((uint32_t*)(unwContext.data))[32] = ip;
+#elif HOST_WASM
+    ASSERT(false);
+#elif HOST_X86
+    ASSERT(false);
+#else
+    #error "GetUnwindProcInfo is not supported on this arch yet."
+#endif
+
+    st = unw_init_local(&cursor, &unwContext);
+    if (st < 0)
+    {
+        return false;
+    }
+
+    st = unw_get_proc_info(&cursor, procInfo);
+    if (st < 0)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+// Initialize unw_cursor_t and unw_context_t from REGDISPLAY
+bool InitializeUnwindContextAndCursor(REGDISPLAY* regDisplay, unw_cursor_t* cursor, unw_context_t* unwContext)
+{
+    int st;
+
+    st = unw_getcontext(unwContext);
+    if (st < 0)
+    {
+        return false;
+    }
+
+    // Set the IP here instead of after unwinder initialization. unw_init_local
+    // will do some initialization of internal structures based on the IP value.
+    // We manually index into the unw_context_t's internals for now because there's
+    // no better way to modify it. This whole function will go away in the future
+    // when we are able to read unwind info without initializing an unwind cursor.
+#ifdef HOST_AMD64
+    unwContext->data[16] = regDisplay->IP;
+#elif HOST_ARM
+    ((uint32_t*)(unwContext->data))[15] = regDisplay->IP;
+#elif HOST_ARM64
+    ((uint32_t*)(unwContext->data))[32] = regDisplay->IP;
+#elif HOST_X86
+    ASSERT(false);
+#else
+    #error "InitializeUnwindContextAndCursor is not supported on this arch yet."
+#endif
+
+    st = unw_init_local(cursor, unwContext);
+    if (st < 0)
+    {
+        return false;
+    }
+
+    // Set the unwind context to the specified Windows context.
+    RegDisplayToUnwindCursor(regDisplay, cursor);
+
+    return true;
+}
+
+// Update context pointer for a register from the unw_cursor_t.
+static void GetContextPointer(unw_cursor_t *cursor, unw_context_t *unwContext, int reg, PTR_UIntNative *contextPointer)
+{
+    unw_save_loc_t saveLoc;
+    unw_get_save_loc(cursor, reg, &saveLoc);
+    if (saveLoc.type == UNW_SLT_MEMORY)
+    {
+        PTR_UIntNative pLoc = (PTR_UIntNative)saveLoc.u.addr;
+        // Filter out fake save locations that point to unwContext
+        if (unwContext == NULL || (pLoc < (PTR_UIntNative)unwContext) || ((PTR_UIntNative)(unwContext + 1) <= pLoc))
+            *contextPointer = (PTR_UIntNative)saveLoc.u.addr;
+    }
+}
+
+#if defined(HOST_AMD64)
+#define GET_CONTEXT_POINTERS                    \
+    GET_CONTEXT_POINTER(UNW_X86_64_RBP, Rbp)	\
+    GET_CONTEXT_POINTER(UNW_X86_64_RBX, Rbx)    \
+    GET_CONTEXT_POINTER(UNW_X86_64_R12, R12)    \
+    GET_CONTEXT_POINTER(UNW_X86_64_R13, R13)    \
+    GET_CONTEXT_POINTER(UNW_X86_64_R14, R14)    \
+    GET_CONTEXT_POINTER(UNW_X86_64_R15, R15)
+#elif defined(HOST_ARM)
+#define GET_CONTEXT_POINTERS                    \
+    GET_CONTEXT_POINTER(UNW_ARM_R4, R4)	        \
+    GET_CONTEXT_POINTER(UNW_ARM_R5, R5)	        \
+    GET_CONTEXT_POINTER(UNW_ARM_R6, R6)	        \
+    GET_CONTEXT_POINTER(UNW_ARM_R7, R7)	        \
+    GET_CONTEXT_POINTER(UNW_ARM_R8, R8)	        \
+    GET_CONTEXT_POINTER(UNW_ARM_R9, R9)	        \
+    GET_CONTEXT_POINTER(UNW_ARM_R10, R10)       \
+    GET_CONTEXT_POINTER(UNW_ARM_R11, R11)
+#elif defined(HOST_ARM64)
+#define GET_CONTEXT_POINTERS                    \
+    GET_CONTEXT_POINTER(UNW_ARM64_X19, X19)	\
+    GET_CONTEXT_POINTER(UNW_ARM64_X20, X20)	\
+    GET_CONTEXT_POINTER(UNW_ARM64_X21, X21)	\
+    GET_CONTEXT_POINTER(UNW_ARM64_X22, X22)	\
+    GET_CONTEXT_POINTER(UNW_ARM64_X23, X23)	\
+    GET_CONTEXT_POINTER(UNW_ARM64_X24, X24)	\
+    GET_CONTEXT_POINTER(UNW_ARM64_X25, X25)	\
+    GET_CONTEXT_POINTER(UNW_ARM64_X26, X26)	\
+    GET_CONTEXT_POINTER(UNW_ARM64_X27, X27)	\
+    GET_CONTEXT_POINTER(UNW_ARM64_X28, X28)	\
+    GET_CONTEXT_POINTER(UNW_ARM64_FP, FP)
+#elif defined(HOST_X86)
+#define GET_CONTEXT_POINTERS                    \
+    GET_CONTEXT_POINTER(UNW_X86_EBP, Rbp)       \
+    GET_CONTEXT_POINTER(UNW_X86_EBX, Rbx)
+#elif defined (HOST_WASM)
+// No registers
+#define GET_CONTEXT_POINTERS
+#else
+#error unsupported architecture
+#endif
+
+// Update REGDISPLAY from the unw_cursor_t and unw_context_t
+void UnwindCursorToRegDisplay(unw_cursor_t *cursor, unw_context_t *unwContext, REGDISPLAY *regDisplay)
+{
+#define GET_CONTEXT_POINTER(unwReg, rdReg) GetContextPointer(cursor, unwContext, unwReg, &regDisplay->p##rdReg);
+    GET_CONTEXT_POINTERS
+#undef GET_CONTEXT_POINTER
+
+    unw_get_reg(cursor, UNW_REG_IP, (unw_word_t *) &regDisplay->IP);
+    unw_get_reg(cursor, UNW_REG_SP, (unw_word_t *) &regDisplay->SP);
+
+#if defined(HOST_AMD64)
+    regDisplay->pIP = PTR_PCODE(regDisplay->SP - sizeof(TADDR));
+#endif
+
+#if defined(HOST_ARM) || defined(HOST_ARM64)
+    regDisplay->IP |= 1;
+#endif
+}
+
+#if defined(HOST_AMD64)
+#define ASSIGN_CONTROL_REGS \
+    ASSIGN_REG(Rip, IP)     \
+    ASSIGN_REG(Rsp, Rsp)
+
+#define ASSIGN_INTEGER_REGS  \
+    ASSIGN_REG(Rbx, Rbx)     \
+    ASSIGN_REG(Rbp, Rbp)     \
+    ASSIGN_REG(R12, R12)     \
+    ASSIGN_REG(R13, R13)     \
+    ASSIGN_REG(R14, R14)     \
+    ASSIGN_REG(R15, R15)
+
+#define ASSIGN_TWO_ARGUMENT_REGS(arg0Reg, arg1Reg)    \
+    MCREG_Rdi(nativeContext->uc_mcontext) = arg0Reg;  \
+    MCREG_Rsi(nativeContext->uc_mcontext) = arg1Reg;
+
+#elif defined(HOST_X86)
+#define ASSIGN_CONTROL_REGS \
+    ASSIGN_REG(Eip, IP)     \
+    ASSIGN_REG(Esp, Rsp)
+
+#define ASSIGN_INTEGER_REGS  \
+    ASSIGN_REG(Ebx, Rbx)     \
+    ASSIGN_REG(Ebp, Rbp)
+
+#define ASSIGN_TWO_ARGUMENT_REGS(arg0Reg, arg1Reg)    \
+    MCREG_Ecx(nativeContext->uc_mcontext) = arg0Reg;  \
+    MCREG_Edx(nativeContext->uc_mcontext) = arg1Reg;
+
+#elif defined(HOST_ARM)
+
+#define ASSIGN_CONTROL_REGS  \
+    ASSIGN_REG(Pc, IP)       \
+    ASSIGN_REG(Sp, SP)       \
+    ASSIGN_REG(Lr, LR)
+
+#define ASSIGN_INTEGER_REGS  \
+    ASSIGN_REG(R4, R4)       \
+    ASSIGN_REG(R5, R5)       \
+    ASSIGN_REG(R6, R6)       \
+    ASSIGN_REG(R7, R7)       \
+    ASSIGN_REG(R8, R8)       \
+    ASSIGN_REG(R9, R9)       \
+    ASSIGN_REG(R10, R10)     \
+    ASSIGN_REG(R11, R11)
+
+#define ASSIGN_TWO_ARGUMENT_REGS(arg0Reg, arg1Reg) \
+    MCREG_R0(nativeContext->uc_mcontext) = arg0Reg;       \
+    MCREG_R1(nativeContext->uc_mcontext) = arg1Reg;
+
+#elif defined(HOST_ARM64)
+
+#define ASSIGN_CONTROL_REGS  \
+    ASSIGN_REG(Pc, IP)    \
+    ASSIGN_REG(Sp, SP)    \
+    ASSIGN_REG(Fp, FP)    \
+    ASSIGN_REG(Lr, LR)
+
+#define ASSIGN_INTEGER_REGS  \
+    ASSIGN_REG(X19, X19)   \
+    ASSIGN_REG(X20, X20)   \
+    ASSIGN_REG(X21, X21)   \
+    ASSIGN_REG(X22, X22)   \
+    ASSIGN_REG(X23, X23)   \
+    ASSIGN_REG(X24, X24)   \
+    ASSIGN_REG(X25, X25)   \
+    ASSIGN_REG(X26, X26)   \
+    ASSIGN_REG(X27, X27)   \
+    ASSIGN_REG(X28, X28)
+
+#define ASSIGN_TWO_ARGUMENT_REGS \
+    MCREG_X0(nativeContext->uc_mcontext) = arg0Reg;       \
+    MCREG_X1(nativeContext->uc_mcontext) = arg1Reg;
+
+#elif defined(HOST_WASM)
+    // TODO: determine how unwinding will work on WebAssembly
+#define ASSIGN_CONTROL_REGS
+#define ASSIGN_INTEGER_REGS
+#define ASSIGN_TWO_ARGUMENT_REGS
+#else
+#error unsupported architecture
+#endif
+
+// Convert Unix native context to PAL_LIMITED_CONTEXT
+void NativeContextToPalContext(const void* context, PAL_LIMITED_CONTEXT* palContext)
+{
+    ucontext_t *nativeContext = (ucontext_t*)context;
+#define ASSIGN_REG(regNative, regPal) palContext->regPal = MCREG_##regNative(nativeContext->uc_mcontext);
+    ASSIGN_CONTROL_REGS
+    ASSIGN_INTEGER_REGS
+#undef ASSIGN_REG
+}
+
+// Redirect Unix native context to the PAL_LIMITED_CONTEXT and also set the first two argument registers
+void RedirectNativeContext(void* context, const PAL_LIMITED_CONTEXT* palContext, UIntNative arg0Reg, UIntNative arg1Reg)
+{
+    ucontext_t *nativeContext = (ucontext_t*)context;
+
+#define ASSIGN_REG(regNative, regPal) MCREG_##regNative(nativeContext->uc_mcontext) = palContext->regPal;
+    ASSIGN_CONTROL_REGS
+#undef ASSIGN_REG
+    ASSIGN_TWO_ARGUMENT_REGS(arg0Reg, arg1Reg);
+}
+
+#ifdef HOST_AMD64
+// Get value of a register from the native context
+// Parameters:
+//  void* context  - context containing the registers
+//  uint32_t index - index of the register
+//                   Rax = 0, Rcx = 1, Rdx = 2, Rbx = 3
+//                   Rsp = 4, Rbp = 5, Rsi = 6, Rdi = 7
+//                   R8  = 8, R9  = 9, R10 = 10, R11 = 11
+//                   R12 = 12, R13 = 13, R14 = 14, R15 = 15
+uint64_t GetRegisterValueByIndex(void* context, uint32_t index)
+{
+    ucontext_t *nativeContext = (ucontext_t*)context;
+    switch (index)
+    {
+        case 0:
+            return MCREG_Rax(nativeContext->uc_mcontext);
+        case 1:
+            return MCREG_Rcx(nativeContext->uc_mcontext);
+        case 2:
+            return MCREG_Rdx(nativeContext->uc_mcontext);
+        case 3:
+            return MCREG_Rbx(nativeContext->uc_mcontext);
+        case 4:
+            return MCREG_Rsp(nativeContext->uc_mcontext);
+        case 5:
+            return MCREG_Rbp(nativeContext->uc_mcontext);
+        case 6:
+            return MCREG_Rsi(nativeContext->uc_mcontext);
+        case 7:
+            return MCREG_Rdi(nativeContext->uc_mcontext);
+        case 8:
+            return MCREG_R8(nativeContext->uc_mcontext);
+        case 9:
+            return MCREG_R9(nativeContext->uc_mcontext);
+        case 10:
+            return MCREG_R10(nativeContext->uc_mcontext);
+        case 11:
+            return MCREG_R11(nativeContext->uc_mcontext);
+        case 12:
+            return MCREG_R12(nativeContext->uc_mcontext);
+        case 13:
+            return MCREG_R13(nativeContext->uc_mcontext);
+        case 14:
+            return MCREG_R14(nativeContext->uc_mcontext);
+        case 15:
+            return MCREG_R15(nativeContext->uc_mcontext);
+    }
+
+    ASSERT(false);
+    return 0;
+}
+
+// Get value of the program counter from the native context
+uint64_t GetPC(void* context)
+{
+    ucontext_t *nativeContext = (ucontext_t*)context;
+    return MCREG_Rip(nativeContext->uc_mcontext);
+}
+
+#endif // HOST_AMD64
+
+// Find LSDA and start address for a function at address controlPC
+bool FindProcInfo(UIntNative controlPC, UIntNative* startAddress, UIntNative* lsda)
+{
+    unw_proc_info_t procInfo;
+
+    if (!GetUnwindProcInfo((PCODE)controlPC, &procInfo))
+    {
+        return false;
+    }
+
+    assert((procInfo.start_ip <= controlPC) && (controlPC < procInfo.end_ip));
+
+#if defined(HOST_ARM) || defined(HOST_ARM64)
+    // libunwind fills by reference not by value for ARM
+    *lsda = *((UIntNative *)procInfo.lsda);
+#else
+    *lsda = procInfo.lsda;
+#endif
+    *startAddress = procInfo.start_ip;
+
+    return true;
+}
+
+// Virtually unwind stack to the caller of the context specified by the REGDISPLAY
+bool VirtualUnwind(REGDISPLAY* pRegisterSet)
+{
+    return UnwindHelpers::StepFrame(pRegisterSet);
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnixContext.h b/src/coreclr/src/nativeaot/Runtime/unix/UnixContext.h
new file mode 100644
index 0000000000000..3218c49928d16
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/UnixContext.h
@@ -0,0 +1,25 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef __UNIX_CONTEXT_H__
+#define __UNIX_CONTEXT_H__
+
+// Convert Unix native context to PAL_LIMITED_CONTEXT
+void NativeContextToPalContext(const void* context, PAL_LIMITED_CONTEXT* palContext);
+// Redirect Unix native context to the PAL_LIMITED_CONTEXT and also set the first two argument registers
+void RedirectNativeContext(void* context, const PAL_LIMITED_CONTEXT* palContext, UIntNative arg0Reg, UIntNative arg1Reg);
+
+// Find LSDA and start address for a function at address controlPC
+bool FindProcInfo(UIntNative controlPC, UIntNative* startAddress, UIntNative* lsda);
+// Virtually unwind stack to the caller of the context specified by the REGDISPLAY
+bool VirtualUnwind(REGDISPLAY* pRegisterSet);
+
+#ifdef HOST_AMD64
+// Get value of a register from the native context. The index is the processor specific
+// register index stored in machine instructions.
+uint64_t GetRegisterValueByIndex(void* context, uint32_t index);
+// Get value of the program counter from the native context
+uint64_t GetPC(void* context);
+#endif // HOST_AMD64
+
+#endif // __UNIX_CONTEXT_H__
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnixHandle.h b/src/coreclr/src/nativeaot/Runtime/unix/UnixHandle.h
new file mode 100644
index 0000000000000..ac9712a145633
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/UnixHandle.h
@@ -0,0 +1,59 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef __UNIX_HANDLE_H__
+#define __UNIX_HANDLE_H__
+
+enum class UnixHandleType
+{
+    Thread,
+    Event
+};
+
+// TODO: add validity check for usage / closing?
+class UnixHandleBase
+{
+    UnixHandleType m_type;
+protected:
+    UnixHandleBase(UnixHandleType type)
+    : m_type(type)
+    {
+    }
+
+public:
+
+    virtual ~UnixHandleBase()
+    {
+    }
+
+    virtual bool Destroy()
+    {
+        return true;
+    }
+
+    UnixHandleType GetType()
+    {
+        return m_type;
+    }
+};
+
+template<UnixHandleType HT, typename T>
+class UnixHandle : UnixHandleBase
+{
+protected:
+    T m_object;
+public:
+
+    UnixHandle(T object)
+    : UnixHandleBase(HT),
+      m_object(object)
+    {
+    }
+
+    T* GetObject()
+    {
+        return &m_object;
+    }
+};
+
+#endif // __UNIX_HANDLE_H__
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/src/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp
new file mode 100644
index 0000000000000..e03a85e320588
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp
@@ -0,0 +1,476 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "regdisplay.h"
+#include "ICodeManager.h"
+#include "UnixNativeCodeManager.h"
+#include "varint.h"
+#include "holder.h"
+
+#include "CommonMacros.inl"
+
+#define GCINFODECODER_NO_EE
+#include "coreclr/gcinfodecoder.cpp"
+
+#include "UnixContext.h"
+
+#define UBF_FUNC_KIND_MASK      0x03
+#define UBF_FUNC_KIND_ROOT      0x00
+#define UBF_FUNC_KIND_HANDLER   0x01
+#define UBF_FUNC_KIND_FILTER    0x02
+
+#define UBF_FUNC_HAS_EHINFO             0x04
+#define UBF_FUNC_REVERSE_PINVOKE        0x08
+#define UBF_FUNC_HAS_ASSOCIATED_DATA    0x10
+
+struct UnixNativeMethodInfo
+{
+    PTR_VOID pMethodStartAddress;
+    PTR_UInt8 pMainLSDA;
+    PTR_UInt8 pLSDA;
+    bool executionAborted;
+};
+
+// Ensure that UnixNativeMethodInfo fits into the space reserved by MethodInfo
+static_assert(sizeof(UnixNativeMethodInfo) <= sizeof(MethodInfo), "UnixNativeMethodInfo too big");
+
+UnixNativeCodeManager::UnixNativeCodeManager(TADDR moduleBase, 
+                                             PTR_VOID pvManagedCodeStartRange, UInt32 cbManagedCodeRange,
+                                             PTR_PTR_VOID pClasslibFunctions, UInt32 nClasslibFunctions)
+    : m_moduleBase(moduleBase), 
+      m_pvManagedCodeStartRange(pvManagedCodeStartRange), m_cbManagedCodeRange(cbManagedCodeRange),
+      m_pClasslibFunctions(pClasslibFunctions), m_nClasslibFunctions(nClasslibFunctions)
+{
+}
+
+UnixNativeCodeManager::~UnixNativeCodeManager()
+{
+}
+
+bool UnixNativeCodeManager::FindMethodInfo(PTR_VOID        ControlPC, 
+                                           MethodInfo *    pMethodInfoOut)
+{
+    // Stackwalker may call this with ControlPC that does not belong to this code manager
+    if (dac_cast<TADDR>(ControlPC) < dac_cast<TADDR>(m_pvManagedCodeStartRange) ||
+        dac_cast<TADDR>(m_pvManagedCodeStartRange) + m_cbManagedCodeRange <= dac_cast<TADDR>(ControlPC))
+    {
+        return false;
+    }
+
+    UnixNativeMethodInfo * pMethodInfo = (UnixNativeMethodInfo *)pMethodInfoOut;
+    UIntNative startAddress;
+    UIntNative lsda;
+
+    if (!FindProcInfo((UIntNative)ControlPC, &startAddress, &lsda))
+    {
+        return false;
+    }
+
+    PTR_UInt8 p = dac_cast<PTR_UInt8>(lsda);
+
+    pMethodInfo->pLSDA = p;
+
+    uint8_t unwindBlockFlags = *p++;
+
+    if ((unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT)
+    {
+        // Funclets just refer to the main function's blob
+        pMethodInfo->pMainLSDA = p + *dac_cast<PTR_Int32>(p);
+        p += sizeof(int32_t);
+
+        pMethodInfo->pMethodStartAddress = dac_cast<PTR_VOID>(startAddress - *dac_cast<PTR_Int32>(p));
+    }
+    else
+    {
+        pMethodInfo->pMainLSDA = dac_cast<PTR_UInt8>(lsda);
+        pMethodInfo->pMethodStartAddress = dac_cast<PTR_VOID>(startAddress);
+    }
+
+    pMethodInfo->executionAborted = false;
+
+    return true;
+}
+
+bool UnixNativeCodeManager::IsFunclet(MethodInfo * pMethodInfo)
+{
+    UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo;
+
+    uint8_t unwindBlockFlags = *(pNativeMethodInfo->pLSDA);
+    return (unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT;
+}
+
+bool UnixNativeCodeManager::IsFilter(MethodInfo * pMethodInfo)
+{
+    UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo;
+
+    uint8_t unwindBlockFlags = *(pNativeMethodInfo->pLSDA);
+    return (unwindBlockFlags & UBF_FUNC_KIND_MASK) == UBF_FUNC_KIND_FILTER;
+}
+
+PTR_VOID UnixNativeCodeManager::GetFramePointer(MethodInfo *   pMethodInfo,
+                                                REGDISPLAY *   pRegisterSet)
+{
+    UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo;
+
+    // Return frame pointer for methods with EH and funclets
+    uint8_t unwindBlockFlags = *(pNativeMethodInfo->pLSDA);
+    if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0 || (unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT)
+    {
+        return (PTR_VOID)pRegisterSet->GetFP();
+    }
+
+    return NULL;
+}
+
+void UnixNativeCodeManager::EnumGcRefs(MethodInfo *    pMethodInfo, 
+                                       PTR_VOID        safePointAddress,
+                                       REGDISPLAY *    pRegisterSet,
+                                       GCEnumContext * hCallback)
+{
+    UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo;
+
+    PTR_UInt8 p = pNativeMethodInfo->pMainLSDA;
+
+    uint8_t unwindBlockFlags = *p++;
+
+    if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0)
+        p += sizeof(int32_t);
+
+    if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0)
+        p += sizeof(int32_t);
+
+    UInt32 codeOffset = (UInt32)(PINSTRToPCODE(dac_cast<TADDR>(safePointAddress)) - PINSTRToPCODE(dac_cast<TADDR>(pNativeMethodInfo->pMethodStartAddress)));
+
+    GcInfoDecoder decoder(
+        GCInfoToken(p),
+        GcInfoDecoderFlags(DECODE_GC_LIFETIMES | DECODE_SECURITY_OBJECT | DECODE_VARARG),
+        codeOffset - 1 // TODO: Is this adjustment correct?
+    );
+
+    ICodeManagerFlags flags = (ICodeManagerFlags)0;
+    if (pNativeMethodInfo->executionAborted)
+        flags = ICodeManagerFlags::ExecutionAborted;
+    if (IsFilter(pMethodInfo))
+        flags = (ICodeManagerFlags)(flags | ICodeManagerFlags::NoReportUntracked);
+
+    if (!decoder.EnumerateLiveSlots(
+        pRegisterSet,
+        false /* reportScratchSlots */,
+        flags,
+        hCallback->pCallback,
+        hCallback
+        ))
+    {
+        assert(false);
+    }
+}
+
+UIntNative UnixNativeCodeManager::GetConservativeUpperBoundForOutgoingArgs(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet)
+{
+    // Return value
+    UIntNative upperBound;
+
+    UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo;
+
+    PTR_UInt8 p = pNativeMethodInfo->pMainLSDA;
+
+    uint8_t unwindBlockFlags = *p++;
+
+    if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0)
+        p += sizeof(int32_t);
+
+    if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0)
+    {
+        // Reverse PInvoke transition should be on the main function body only
+        assert(pNativeMethodInfo->pMainLSDA == pNativeMethodInfo->pLSDA);
+
+        if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0)
+            p += sizeof(int32_t);
+
+        GcInfoDecoder decoder(GCInfoToken(p), DECODE_REVERSE_PINVOKE_VAR);
+        INT32 slot = decoder.GetReversePInvokeFrameStackSlot();
+        assert(slot != NO_REVERSE_PINVOKE_FRAME);
+
+        TADDR basePointer = NULL;
+        UINT32 stackBasedRegister = decoder.GetStackBaseRegister();
+        if (stackBasedRegister == NO_STACK_BASE_REGISTER)
+        {
+            basePointer = dac_cast<TADDR>(pRegisterSet->GetSP());
+        }
+        else
+        {
+            basePointer = dac_cast<TADDR>(pRegisterSet->GetFP());
+        }
+
+        // Reverse PInvoke case.  The embedded reverse PInvoke frame is guaranteed to reside above
+        // all outgoing arguments.
+        upperBound = (UIntNative)dac_cast<TADDR>(basePointer + slot);
+    }
+    else
+    {
+        // The passed in pRegisterSet should be left intact
+        REGDISPLAY localRegisterSet = *pRegisterSet;
+
+        bool result = VirtualUnwind(&localRegisterSet);
+        assert(result);
+
+        // All common ABIs have outgoing arguments under caller SP (minus slot reserved for return address).
+        // There are ABI-specific optimizations that could applied here, but they are not worth the complexity
+        // given that this path is used rarely.
+        upperBound = dac_cast<TADDR>(localRegisterSet.GetSP() - sizeof(TADDR));
+    }
+
+    return upperBound;
+}
+
+bool UnixNativeCodeManager::UnwindStackFrame(MethodInfo *    pMethodInfo,
+                                             REGDISPLAY *    pRegisterSet,                 // in/out
+                                             PTR_VOID *      ppPreviousTransitionFrame)    // out
+{
+    UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo;
+
+    PTR_UInt8 p = pNativeMethodInfo->pMainLSDA;
+
+    uint8_t unwindBlockFlags = *p++;
+
+    if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0)
+        p += sizeof(int32_t);
+
+    if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0)
+    {
+        // Reverse PInvoke transition should be on the main function body only
+        assert(pNativeMethodInfo->pMainLSDA == pNativeMethodInfo->pLSDA);
+
+        if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0)
+            p += sizeof(int32_t);
+
+        GcInfoDecoder decoder(GCInfoToken(p), DECODE_REVERSE_PINVOKE_VAR);
+        INT32 slot = decoder.GetReversePInvokeFrameStackSlot();
+        assert(slot != NO_REVERSE_PINVOKE_FRAME);
+
+        TADDR basePointer = NULL;
+        UINT32 stackBasedRegister = decoder.GetStackBaseRegister();
+        if (stackBasedRegister == NO_STACK_BASE_REGISTER)
+        {
+            basePointer = dac_cast<TADDR>(pRegisterSet->GetSP());
+        }
+        else
+        {
+            basePointer = dac_cast<TADDR>(pRegisterSet->GetFP());
+        }
+        *ppPreviousTransitionFrame = *(void**)(basePointer + slot);
+        return true;
+    }
+
+    *ppPreviousTransitionFrame = NULL;
+
+    if (!VirtualUnwind(pRegisterSet))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo *    pMethodInfo,
+                                                       REGDISPLAY *    pRegisterSet,       // in
+                                                       PTR_PTR_VOID *  ppvRetAddrLocation, // out
+                                                       GCRefKind *     pRetValueKind)      // out
+{
+    // @TODO: CORERT: GetReturnAddressHijackInfo
+    return false;
+}
+
+void UnixNativeCodeManager::UnsynchronizedHijackMethodLoops(MethodInfo * pMethodInfo)
+{
+    // @TODO: CORERT: UnsynchronizedHijackMethodLoops
+}
+
+PTR_VOID UnixNativeCodeManager::RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC)
+{
+    // GCInfo decoder needs to know whether execution of the method is aborted 
+    // while querying for gc-info.  But ICodeManager::EnumGCRef() doesn't receive any
+    // flags from mrt. Call to this method is used as a cue to mark the method info
+    // as execution aborted. Note - if pMethodInfo was cached, this scheme would not work.
+    //
+    // If the method has EH, then JIT will make sure the method is fully interruptible
+    // and we will have GC-info available at the faulting address as well.
+
+    UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo;
+    pNativeMethodInfo->executionAborted = true;
+
+    return controlPC;
+}
+
+struct UnixEHEnumState
+{
+    PTR_UInt8 pMethodStartAddress;
+    PTR_UInt8 pEHInfo;
+    UInt32 uClause;
+    UInt32 nClauses;
+};
+
+// Ensure that UnixEHEnumState fits into the space reserved by EHEnumState
+static_assert(sizeof(UnixEHEnumState) <= sizeof(EHEnumState), "UnixEHEnumState too big");
+
+bool UnixNativeCodeManager::EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMethodStartAddress, EHEnumState * pEHEnumStateOut)
+{
+    assert(pMethodInfo != NULL);
+    assert(pMethodStartAddress != NULL);
+    assert(pEHEnumStateOut != NULL);
+
+    UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo;
+
+    PTR_UInt8 p = pNativeMethodInfo->pMainLSDA;
+
+    uint8_t unwindBlockFlags = *p++;
+
+    if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0)
+        p += sizeof(int32_t);
+
+    // return if there is no EH info associated with this method
+    if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) == 0)
+    {
+        return false;
+    }
+   
+    UnixEHEnumState * pEnumState = (UnixEHEnumState *)pEHEnumStateOut;
+
+    *pMethodStartAddress = pNativeMethodInfo->pMethodStartAddress;
+
+    pEnumState->pMethodStartAddress = dac_cast<PTR_UInt8>(pNativeMethodInfo->pMethodStartAddress);
+    pEnumState->pEHInfo = dac_cast<PTR_UInt8>(p + *dac_cast<PTR_Int32>(p));
+    pEnumState->uClause = 0;
+    pEnumState->nClauses = VarInt::ReadUnsigned(pEnumState->pEHInfo);
+
+    return true;
+}
+
+bool UnixNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pEHClauseOut)
+{
+    assert(pEHEnumState != NULL);
+    assert(pEHClauseOut != NULL);
+
+    UnixEHEnumState * pEnumState = (UnixEHEnumState *)pEHEnumState;
+    if (pEnumState->uClause >= pEnumState->nClauses)
+    {
+        return false;
+    }
+
+    pEnumState->uClause++;
+
+    pEHClauseOut->m_tryStartOffset = VarInt::ReadUnsigned(pEnumState->pEHInfo);
+
+    UInt32 tryEndDeltaAndClauseKind = VarInt::ReadUnsigned(pEnumState->pEHInfo);
+    pEHClauseOut->m_clauseKind = (EHClauseKind)(tryEndDeltaAndClauseKind & 0x3);
+    pEHClauseOut->m_tryEndOffset = pEHClauseOut->m_tryStartOffset + (tryEndDeltaAndClauseKind >> 2);
+
+    // For each clause, we have up to 4 integers:
+    //      1)  try start offset
+    //      2)  (try length << 2) | clauseKind
+    //      3)  if (typed || fault || filter)    { handler start offset }
+    //      4a) if (typed)                       { type RVA }
+    //      4b) if (filter)                      { filter start offset }
+    //
+    // The first two integers have already been decoded
+
+    switch (pEHClauseOut->m_clauseKind)
+    {
+    case EH_CLAUSE_TYPED:
+        pEHClauseOut->m_handlerAddress = dac_cast<UInt8*>(PINSTRToPCODE(dac_cast<TADDR>(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+
+        // Read target type
+        {
+            // @TODO: CORERT: Compress EHInfo using type table index scheme
+            // https://github.com/dotnet/corert/issues/972
+            Int32 typeRelAddr = *((PTR_Int32&)pEnumState->pEHInfo)++;
+            pEHClauseOut->m_pTargetType = dac_cast<PTR_VOID>(pEnumState->pEHInfo + typeRelAddr);
+        }
+        break;
+    case EH_CLAUSE_FAULT:
+        pEHClauseOut->m_handlerAddress = dac_cast<UInt8*>(PINSTRToPCODE(dac_cast<TADDR>(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+        break;
+    case EH_CLAUSE_FILTER:
+        pEHClauseOut->m_handlerAddress = dac_cast<UInt8*>(PINSTRToPCODE(dac_cast<TADDR>(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+        pEHClauseOut->m_filterAddress = dac_cast<UInt8*>(PINSTRToPCODE(dac_cast<TADDR>(pEnumState->pMethodStartAddress))) + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+        break;
+    default:
+        UNREACHABLE_MSG("unexpected EHClauseKind");
+    }
+
+    return true;
+}
+
+PTR_VOID UnixNativeCodeManager::GetOsModuleHandle()
+{
+    return (PTR_VOID)m_moduleBase;
+}
+
+PTR_VOID UnixNativeCodeManager::GetMethodStartAddress(MethodInfo * pMethodInfo)
+{
+    UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo;
+    return pNativeMethodInfo->pMethodStartAddress;
+}
+
+void * UnixNativeCodeManager::GetClasslibFunction(ClasslibFunctionId functionId)
+{
+    uint32_t id = (uint32_t)functionId;
+
+    if (id >= m_nClasslibFunctions)
+    {
+        return nullptr;
+    }
+
+    return m_pClasslibFunctions[id];
+}
+
+PTR_VOID UnixNativeCodeManager::GetAssociatedData(PTR_VOID ControlPC)
+{
+    UnixNativeMethodInfo methodInfo;
+    if (!FindMethodInfo(ControlPC, (MethodInfo*)&methodInfo))
+        return NULL;
+
+    PTR_UInt8 p = methodInfo.pMainLSDA;
+
+    uint8_t unwindBlockFlags = *p++;
+    if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) == 0)
+        return NULL;
+
+    return dac_cast<PTR_VOID>(p + *dac_cast<PTR_Int32>(p));
+}
+
+extern "C" bool __stdcall RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, UInt32 cbRange);
+extern "C" void __stdcall UnregisterCodeManager(ICodeManager * pCodeManager);
+extern "C" bool __stdcall RegisterUnboxingStubs(PTR_VOID pvStartRange, UInt32 cbRange);
+
+extern "C"
+bool RhRegisterOSModule(void * pModule,
+                        void * pvManagedCodeStartRange, UInt32 cbManagedCodeRange,
+                        void * pvUnboxingStubsStartRange, UInt32 cbUnboxingStubsRange,
+                        void ** pClasslibFunctions, UInt32 nClasslibFunctions)
+{
+    NewHolder<UnixNativeCodeManager> pUnixNativeCodeManager = new (nothrow) UnixNativeCodeManager((TADDR)pModule,
+        pvManagedCodeStartRange, cbManagedCodeRange,
+        pClasslibFunctions, nClasslibFunctions);
+
+    if (pUnixNativeCodeManager == nullptr)
+        return false;
+
+    if (!RegisterCodeManager(pUnixNativeCodeManager, pvManagedCodeStartRange, cbManagedCodeRange))
+        return false;
+
+    if (!RegisterUnboxingStubs(pvUnboxingStubsStartRange, cbUnboxingStubsRange))
+    {
+        UnregisterCodeManager(pUnixNativeCodeManager);
+        return false;
+    }
+
+    pUnixNativeCodeManager.SuppressRelease();
+
+    return true;
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnixNativeCodeManager.h b/src/coreclr/src/nativeaot/Runtime/unix/UnixNativeCodeManager.h
new file mode 100644
index 0000000000000..45ebeac60a243
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/UnixNativeCodeManager.h
@@ -0,0 +1,69 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#pragma once
+
+class UnixNativeCodeManager : public ICodeManager
+{
+    TADDR m_moduleBase;
+
+    PTR_VOID m_pvManagedCodeStartRange;
+    UInt32 m_cbManagedCodeRange;
+
+    PTR_PTR_VOID m_pClasslibFunctions;
+    UInt32 m_nClasslibFunctions;
+
+public:
+    UnixNativeCodeManager(TADDR moduleBase,
+                          PTR_VOID pvManagedCodeStartRange, UInt32 cbManagedCodeRange,
+                          PTR_PTR_VOID pClasslibFunctions, UInt32 nClasslibFunctions);
+
+    virtual ~UnixNativeCodeManager();
+
+    //
+    // Code manager methods
+    //
+
+    bool FindMethodInfo(PTR_VOID        ControlPC, 
+                        MethodInfo *    pMethodInfoOut);
+
+    bool IsFunclet(MethodInfo * pMethodInfo);
+
+    bool IsFilter(MethodInfo * pMethodInfo);
+
+    PTR_VOID GetFramePointer(MethodInfo *   pMethodInfo,
+                             REGDISPLAY *   pRegisterSet);
+
+    void EnumGcRefs(MethodInfo *    pMethodInfo, 
+                    PTR_VOID        safePointAddress,
+                    REGDISPLAY *    pRegisterSet,
+                    GCEnumContext * hCallback);
+
+    bool UnwindStackFrame(MethodInfo *    pMethodInfo,
+                          REGDISPLAY *    pRegisterSet,                 // in/out
+                          PTR_VOID *      ppPreviousTransitionFrame);   // out
+
+    UIntNative GetConservativeUpperBoundForOutgoingArgs(MethodInfo *   pMethodInfo,
+                                                        REGDISPLAY *   pRegisterSet);
+
+    bool GetReturnAddressHijackInfo(MethodInfo *    pMethodInfo,
+                                    REGDISPLAY *    pRegisterSet,       // in
+                                    PTR_PTR_VOID *  ppvRetAddrLocation, // out
+                                    GCRefKind *     pRetValueKind);     // out
+
+    void UnsynchronizedHijackMethodLoops(MethodInfo * pMethodInfo);
+
+    PTR_VOID RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC);
+
+    bool EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMethodStartAddress, EHEnumState * pEHEnumState);
+
+    bool EHEnumNext(EHEnumState * pEHEnumState, EHClause * pEHClause);
+
+    PTR_VOID GetMethodStartAddress(MethodInfo * pMethodInfo);
+
+    void * GetClasslibFunction(ClasslibFunctionId functionId);
+
+    PTR_VOID GetAssociatedData(PTR_VOID ControlPC);
+
+    PTR_VOID GetOsModuleHandle();
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/src/nativeaot/Runtime/unix/UnwindHelpers.cpp
new file mode 100644
index 0000000000000..ced22cc272cd6
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/UnwindHelpers.cpp
@@ -0,0 +1,773 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "common.h"
+#include "daccess.h"
+#include "rhassert.h"
+
+#define UNW_STEP_SUCCESS 1
+#define UNW_STEP_END     0
+
+#ifdef __APPLE__
+#include <mach-o/getsect.h>
+#endif
+
+#include <regdisplay.h>
+#include "UnwindHelpers.h"
+
+// libunwind headers
+#include <libunwind.h>
+#include <src/config.h>
+#include <src/Registers.hpp>
+#include <src/AddressSpace.hpp>
+#if defined(TARGET_ARM)
+#include <src/libunwind_ext.h>
+#endif
+#include <src/UnwindCursor.hpp>
+
+
+#if defined(TARGET_AMD64)
+using libunwind::Registers_x86_64;
+#elif defined(TARGET_ARM)
+using libunwind::Registers_arm;
+#elif defined(TARGET_ARM64)
+using libunwind::Registers_arm64;
+#elif defined(TARGET_X86)
+using libunwind::Registers_x86;
+#else
+#error "Unwinding is not implemented for this architecture yet."
+#endif
+using libunwind::LocalAddressSpace;
+using libunwind::EHHeaderParser;
+#if _LIBUNWIND_SUPPORT_DWARF_UNWIND
+using libunwind::DwarfInstructions;
+#endif
+using libunwind::UnwindInfoSections;
+
+LocalAddressSpace _addressSpace;
+
+#ifdef TARGET_AMD64
+
+// Shim that implements methods required by libunwind over REGDISPLAY
+struct Registers_REGDISPLAY : REGDISPLAY
+{
+    static int  getArch() { return libunwind::REGISTERS_X86_64; }
+
+    inline uint64_t getRegister(int regNum) const
+    {
+        switch (regNum)
+        {
+        case UNW_REG_IP:
+            return IP;
+        case UNW_REG_SP:
+            return SP;
+        case UNW_X86_64_RAX:
+            return *pRax;
+        case UNW_X86_64_RDX:
+            return *pRdx;
+        case UNW_X86_64_RCX:
+            return *pRcx;
+        case UNW_X86_64_RBX:
+            return *pRbx;
+        case UNW_X86_64_RSI:
+            return *pRsi;
+        case UNW_X86_64_RDI:
+            return *pRdi;
+        case UNW_X86_64_RBP:
+            return *pRbp;
+        case UNW_X86_64_RSP:
+            return SP;
+        case UNW_X86_64_R8:
+            return *pR8;
+        case UNW_X86_64_R9:
+            return *pR9;
+        case UNW_X86_64_R10:
+            return *pR10;
+        case UNW_X86_64_R11:
+            return *pR11;
+        case UNW_X86_64_R12:
+            return *pR12;
+        case UNW_X86_64_R13:
+            return *pR13;
+        case UNW_X86_64_R14:
+            return *pR14;
+        case UNW_X86_64_R15:
+            return *pR15;
+        }
+
+        // Unsupported register requested
+        abort();
+    }
+
+    inline void setRegister(int regNum, uint64_t value, uint64_t location)
+    {
+        switch (regNum)
+        {
+        case UNW_REG_IP:
+            IP = value;
+            pIP = (PTR_PCODE)location;
+            return;
+        case UNW_REG_SP:
+            SP = value;
+            return;
+        case UNW_X86_64_RAX:
+            pRax = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_RDX:
+            pRdx = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_RCX:
+            pRcx = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_RBX:
+            pRbx = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_RSI:
+            pRsi = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_RDI:
+            pRdi = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_RBP:
+            pRbp = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_RSP:
+            SP = value;
+            return;
+        case UNW_X86_64_R8:
+            pR8 = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_R9:
+            pR9 = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_R10:
+            pR10 = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_R11:
+            pR11 = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_R12:
+            pR12 = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_R13:
+            pR13 = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_R14:
+            pR14 = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_64_R15:
+            pR15 = (PTR_UIntNative)location;
+            return;
+        }
+
+        // Unsupported x86_64 register
+        abort();
+    }
+
+    // N/A for x86_64
+    inline bool validFloatRegister(int) { return false; }
+    inline bool validVectorRegister(int) { return false; }
+
+    inline static int  lastDwarfRegNum() { return 16; }
+
+    inline bool validRegister(int regNum) const
+    {
+        if (regNum == UNW_REG_IP)
+            return true;
+        if (regNum == UNW_REG_SP)
+            return true;
+        if (regNum < 0)
+            return false;
+        if (regNum > 15)
+            return false;
+        return true;
+    }
+
+    // N/A for x86_64
+    inline double getFloatRegister(int) const { abort(); }
+    inline   void setFloatRegister(int, double) { abort(); }
+    inline double getVectorRegister(int) const { abort(); }
+    inline   void setVectorRegister(int, ...) { abort(); }
+
+    uint64_t  getSP() const { return SP; }
+    void      setSP(uint64_t value, uint64_t location) { SP = value; }
+
+    uint64_t  getIP() const { return IP; }
+
+    void      setIP(uint64_t value, uint64_t location)
+    {
+        IP = value;
+        pIP = (PTR_PCODE)location;
+    }
+
+    uint64_t  getRBP() const { return *pRbp; }
+    void      setRBP(uint64_t value, uint64_t location) { pRbp = (PTR_UIntNative)location; }
+    uint64_t  getRBX() const { return *pRbx; }
+    void      setRBX(uint64_t value, uint64_t location) { pRbx = (PTR_UIntNative)location; }
+    uint64_t  getR12() const { return *pR12; }
+    void      setR12(uint64_t value, uint64_t location) { pR12 = (PTR_UIntNative)location; }
+    uint64_t  getR13() const { return *pR13; }
+    void      setR13(uint64_t value, uint64_t location) { pR13 = (PTR_UIntNative)location; }
+    uint64_t  getR14() const { return *pR14; }
+    void      setR14(uint64_t value, uint64_t location) { pR14 = (PTR_UIntNative)location; }
+    uint64_t  getR15() const { return *pR15; }
+    void      setR15(uint64_t value, uint64_t location) { pR15 = (PTR_UIntNative)location; }
+};
+
+#endif // TARGET_AMD64
+#if defined(TARGET_X86)
+struct Registers_REGDISPLAY : REGDISPLAY
+{
+    static int  getArch() { return libunwind::REGISTERS_X86; }
+
+    inline uint64_t getRegister(int regNum) const
+    {
+        switch (regNum)
+        {
+        case UNW_REG_IP:
+            return IP;
+        case UNW_REG_SP:
+            return SP;
+        case UNW_X86_EAX:
+            return *pRax;
+        case UNW_X86_EDX:
+            return *pRdx;
+        case UNW_X86_ECX:
+            return *pRcx;
+        case UNW_X86_EBX:
+            return *pRbx;
+        case UNW_X86_ESI:
+            return *pRsi;
+        case UNW_X86_EDI:
+            return *pRdi;
+        case UNW_X86_EBP:
+            return *pRbp;
+        case UNW_X86_ESP:
+            return SP;
+        }
+
+        // Unsupported register requested
+        abort();
+    }
+
+    inline void setRegister(int regNum, uint64_t value, uint64_t location)
+    {
+        switch (regNum)
+        {
+        case UNW_REG_IP:
+            IP = value;
+            pIP = (PTR_PCODE)location;
+            return;
+        case UNW_REG_SP:
+            SP = value;
+            return;
+        case UNW_X86_EAX:
+            pRax = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_EDX:
+            pRdx = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_ECX:
+            pRcx = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_EBX:
+            pRbx = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_ESI:
+            pRsi = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_EDI:
+            pRdi = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_EBP:
+            pRbp = (PTR_UIntNative)location;
+            return;
+        case UNW_X86_ESP:
+            SP = value;
+            return;
+        }
+
+        // Unsupported x86_64 register
+        abort();
+    }
+
+    // N/A for x86
+    inline bool validFloatRegister(int) { return false; }
+    inline bool validVectorRegister(int) { return false; }
+
+    inline static int  lastDwarfRegNum() { return 16; }
+
+    inline bool validRegister(int regNum) const
+    {
+        if (regNum == UNW_REG_IP)
+            return true;
+        if (regNum == UNW_REG_SP)
+            return true;
+        if (regNum < 0)
+            return false;
+        if (regNum > 15)
+            return false;
+        return true;
+    }
+
+    // N/A for x86
+    inline double getFloatRegister(int) const { abort(); }
+    inline   void setFloatRegister(int, double) { abort(); }
+    inline double getVectorRegister(int) const { abort(); }
+    inline   void setVectorRegister(int, ...) { abort(); }
+
+    void      setSP(uint64_t value, uint64_t location) { SP = value; }
+
+    uint64_t  getIP() const { return IP; }
+
+    void      setIP(uint64_t value, uint64_t location)
+    {
+        IP = value;
+        pIP = (PTR_PCODE)location;
+    }
+
+    uint64_t  getEBP() const { return *pRbp; }
+    void      setEBP(uint64_t value, uint64_t location) { pRbp = (PTR_UIntNative)location; }
+    uint64_t  getEBX() const { return *pRbx; }
+    void      setEBX(uint64_t value, uint64_t location) { pRbx = (PTR_UIntNative)location; }
+};
+
+#endif // TARGET_X86
+#if defined(TARGET_ARM)
+
+class Registers_arm_rt: public libunwind::Registers_arm {
+public:
+    Registers_arm_rt() { abort(); };
+    Registers_arm_rt(void *registers) { regs = (REGDISPLAY *)registers; };
+    uint32_t    getRegister(int num);
+    void        setRegister(int num, uint32_t value, uint32_t location);
+    uint32_t    getRegisterLocation(int regNum) const { abort();}
+    unw_fpreg_t getFloatRegister(int num) { abort();}
+    void        setFloatRegister(int num, unw_fpreg_t value) {abort();}
+    bool        validVectorRegister(int num) const { abort();}
+    uint32_t    getVectorRegister(int num) const {abort();};
+    void        setVectorRegister(int num, uint32_t value) {abort();};
+    void        jumpto() { abort();};
+    uint32_t    getSP() const         { return regs->SP;}
+    void        setSP(uint32_t value, uint32_t location) { regs->SP = value;}
+    uint32_t    getIP() const         { return regs->IP;}
+    void        setIP(uint32_t value, uint32_t location)
+    { regs->IP = value; regs->pIP = (PTR_UIntNative)location; }
+    void saveVFPAsX() {abort();};
+private:
+    REGDISPLAY *regs;
+};
+
+inline uint32_t Registers_arm_rt::getRegister(int regNum) {
+    if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP)
+        return regs->SP;
+
+    if (regNum == UNW_ARM_LR)
+        return *regs->pLR;
+
+    if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP)
+        return regs->IP;
+
+    switch (regNum)
+    {
+    case (UNW_ARM_R0):
+        return *regs->pR0;
+    case (UNW_ARM_R1):
+        return *regs->pR1;
+    case (UNW_ARM_R2):
+        return *regs->pR2;
+    case (UNW_ARM_R3):
+        return *regs->pR3;
+    case (UNW_ARM_R4):
+        return *regs->pR4;
+    case (UNW_ARM_R5):
+        return *regs->pR5;
+    case (UNW_ARM_R6):
+        return *regs->pR6;
+    case (UNW_ARM_R7):
+        return *regs->pR7;
+    case (UNW_ARM_R8):
+        return *regs->pR8;
+    case (UNW_ARM_R9):
+        return *regs->pR9;
+    case (UNW_ARM_R10):
+        return *regs->pR10;
+    case (UNW_ARM_R11):
+        return *regs->pR11;
+    case (UNW_ARM_R12):
+        return *regs->pR12;
+    }
+
+    PORTABILITY_ASSERT("unsupported arm register");
+}
+
+void Registers_arm_rt::setRegister(int num, uint32_t value, uint32_t location)
+{
+
+    if (num == UNW_REG_SP || num == UNW_ARM_SP) {
+        regs->SP = (UIntNative )value;
+        return;
+    }
+
+    if (num == UNW_ARM_LR) {
+        regs->pLR = (PTR_UIntNative)location;
+        return;
+    }
+
+    if (num == UNW_REG_IP || num == UNW_ARM_IP) {
+        regs->IP = value;
+        /* the location could be NULL, we could try to recover
+           pointer to value in stack from pLR */
+        if ((!location) && (regs->pLR) && (*regs->pLR == value))
+            regs->pIP = regs->pLR;
+        else
+            regs->pIP = (PTR_UIntNative)location;
+        return;
+    }
+
+    switch (num)
+    {
+    case (UNW_ARM_R0):
+        regs->pR0 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM_R1):
+        regs->pR1 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM_R2):
+        regs->pR2 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM_R3):
+        regs->pR3 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM_R4):
+        regs->pR4 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM_R5):
+        regs->pR5 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM_R6):
+        regs->pR6 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM_R7):
+        regs->pR7 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM_R8):
+        regs->pR8 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM_R9):
+        regs->pR9 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM_R10):
+        regs->pR10 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM_R11):
+        regs->pR11 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM_R12):
+        regs->pR12 = (PTR_UIntNative)location;
+        break;
+    default:
+        PORTABILITY_ASSERT("unsupported arm register");
+    }
+}
+
+#endif // TARGET_ARM
+
+#if defined(TARGET_ARM64)
+
+class Registers_arm64_rt: public libunwind::Registers_arm64 {
+public:
+    Registers_arm64_rt() { abort(); };
+    Registers_arm64_rt(const void *registers);
+
+    bool        validRegister(int num) {abort();};
+    uint64_t    getRegister(int num) const;
+    void        setRegister(int num, uint64_t value, uint64_t location);
+    bool        validFloatRegister(int num) {abort();};
+    double      getFloatRegister(int num) {abort();}
+    void        setFloatRegister(int num, double value) {abort();}
+    bool        validVectorRegister(int num) const {abort();}
+    libunwind::v128    getVectorRegister(int num) const {abort();};
+    void        setVectorRegister(int num, libunwind::v128 value) {abort();};
+    void        jumpto() { abort();};
+
+    uint64_t    getSP() const         { return regs->SP;}
+    void        setSP(uint64_t value, uint64_t location) { regs->SP = value;}
+    uint64_t    getIP() const         { return regs->IP;}
+    void        setIP(uint64_t value, uint64_t location)
+    { regs->IP = value; regs->pIP = (PTR_UIntNative)location; }
+    void saveVFPAsX() {abort();};
+private:
+    REGDISPLAY *regs;
+};
+
+inline Registers_arm64_rt::Registers_arm64_rt(const void *registers) {
+    regs = (REGDISPLAY *)registers;
+}
+
+inline uint64_t Registers_arm64_rt::getRegister(int regNum) const {
+    if (regNum == UNW_REG_SP || regNum == UNW_ARM64_SP)
+        return regs->SP;
+
+    if (regNum == UNW_ARM64_LR)
+        return *regs->pLR;
+
+    if (regNum == UNW_REG_IP)
+        return regs->IP;
+
+    switch (regNum)
+    {
+    case (UNW_ARM64_X0):
+        return *regs->pX0;
+    case (UNW_ARM64_X1):
+        return *regs->pX1;
+    case (UNW_ARM64_X2):
+        return *regs->pX2;
+    case (UNW_ARM64_X3):
+        return *regs->pX3;
+    case (UNW_ARM64_X4):
+        return *regs->pX4;
+    case (UNW_ARM64_X5):
+        return *regs->pX5;
+    case (UNW_ARM64_X6):
+        return *regs->pX6;
+    case (UNW_ARM64_X7):
+        return *regs->pX7;
+    case (UNW_ARM64_X8):
+        return *regs->pX8;
+    case (UNW_ARM64_X9):
+        return *regs->pX9;
+    case (UNW_ARM64_X10):
+        return *regs->pX10;
+    case (UNW_ARM64_X11):
+        return *regs->pX11;
+    case (UNW_ARM64_X12):
+        return *regs->pX12;
+    case (UNW_ARM64_X13):
+        return *regs->pX13;
+    case (UNW_ARM64_X14):
+        return *regs->pX14;
+    case (UNW_ARM64_X15):
+        return *regs->pX15;
+    case (UNW_ARM64_X16):
+        return *regs->pX16;
+    case (UNW_ARM64_X17):
+        return *regs->pX17;
+    case (UNW_ARM64_X18):
+        return *regs->pX18;
+    case (UNW_ARM64_X19):
+        return *regs->pX19;
+    case (UNW_ARM64_X20):
+        return *regs->pX20;
+    case (UNW_ARM64_X21):
+        return *regs->pX21;
+    case (UNW_ARM64_X22):
+        return *regs->pX22;
+    case (UNW_ARM64_X23):
+        return *regs->pX23;
+    case (UNW_ARM64_X24):
+        return *regs->pX24;
+    case (UNW_ARM64_X25):
+        return *regs->pX25;
+    case (UNW_ARM64_X26):
+        return *regs->pX26;
+    case (UNW_ARM64_X27):
+        return *regs->pX27;
+    case (UNW_ARM64_X28):
+        return *regs->pX28;
+    }
+
+    PORTABILITY_ASSERT("unsupported arm64 register");
+}
+
+void Registers_arm64_rt::setRegister(int num, uint64_t value, uint64_t location)
+{
+
+    if (num == UNW_REG_SP || num == UNW_ARM64_SP) {
+        regs->SP = (UIntNative )value;
+        return;
+    }
+
+    if (num == UNW_ARM64_LR) {
+        regs->pLR = (PTR_UIntNative)location;
+        return;
+    }
+
+    if (num == UNW_REG_IP) {
+        regs->IP = value;
+        /* the location could be NULL, we could try to recover
+           pointer to value in stack from pLR */
+        if ((!location) && (regs->pLR) && (*regs->pLR == value))
+            regs->pIP = regs->pLR;
+        else
+            regs->pIP = (PTR_UIntNative)location;
+        return;
+    }
+
+    switch (num)
+    {
+    case (UNW_ARM64_X0):
+        regs->pX0 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X1):
+        regs->pX1 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X2):
+        regs->pX2 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X3):
+        regs->pX3 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X4):
+        regs->pX4 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X5):
+        regs->pX5 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X6):
+        regs->pX6 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X7):
+        regs->pX7 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X8):
+        regs->pX8 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X9):
+        regs->pX9 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X10):
+        regs->pX10 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X11):
+        regs->pX11 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X12):
+        regs->pX12 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X13):
+        regs->pX13 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X14):
+        regs->pX14 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X15):
+        regs->pX15 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X16):
+        regs->pX16 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X17):
+        regs->pX17 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X18):
+        regs->pX18 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X19):
+        regs->pX19 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X20):
+        regs->pX20 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X21):
+        regs->pX21 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X22):
+        regs->pX22 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X23):
+        regs->pX23 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X24):
+        regs->pX24 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X25):
+        regs->pX25 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X26):
+        regs->pX26 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X27):
+        regs->pX27 = (PTR_UIntNative)location;
+        break;
+    case (UNW_ARM64_X28):
+        regs->pX28 = (PTR_UIntNative)location;
+        break;
+    default:
+        PORTABILITY_ASSERT("unsupported arm64 register");
+    }
+}
+
+#endif // TARGET_ARM64
+
+bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs)
+{
+#if defined(TARGET_AMD64)
+    libunwind::UnwindCursor<LocalAddressSpace, Registers_x86_64> uc(_addressSpace);
+#elif defined(TARGET_ARM)
+    libunwind::UnwindCursor<LocalAddressSpace, Registers_arm_rt> uc(_addressSpace, regs);
+#elif defined(TARGET_ARM64)
+    libunwind::UnwindCursor<LocalAddressSpace, Registers_arm64_rt> uc(_addressSpace, regs);
+#elif defined(HOST_X86)
+    libunwind::UnwindCursor<LocalAddressSpace, Registers_x86> uc(_addressSpace, regs);
+#else
+    #error "Unwinding is not implemented for this architecture yet."
+#endif
+
+#if _LIBUNWIND_SUPPORT_DWARF_UNWIND
+    bool retVal = uc.getInfoFromDwarfSection(pc, uwInfoSections, 0 /* fdeSectionOffsetHint */);
+    if (!retVal)
+    {
+        return false;
+    }
+
+    unw_proc_info_t procInfo;
+    uc.getInfo(&procInfo);
+
+#if defined(TARGET_ARM64)
+    DwarfInstructions<LocalAddressSpace, Registers_arm64_rt> dwarfInst;
+    int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm64_rt*)regs);
+#elif defined(TARGET_ARM)
+    DwarfInstructions<LocalAddressSpace, Registers_arm_rt> dwarfInst;
+    int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm_rt*)regs);
+#else
+    DwarfInstructions<LocalAddressSpace, Registers_REGDISPLAY> dwarfInst;
+    int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_REGDISPLAY*)regs);
+#endif
+
+    if (stepRet != UNW_STEP_SUCCESS)
+    {
+        return false;
+    }
+
+    regs->pIP = PTR_PCODE(regs->SP - sizeof(TADDR));
+#elif defined(_LIBUNWIND_ARM_EHABI)
+    uc.setInfoBasedOnIPRegister(true);
+    int stepRet = uc.step();
+    if ((stepRet != UNW_STEP_SUCCESS) && (stepRet != UNW_STEP_END))
+    {
+        return false;
+    }
+#endif
+
+    return true;
+}
+
+bool UnwindHelpers::StepFrame(REGDISPLAY *regs)
+{
+    UnwindInfoSections uwInfoSections;
+#if _LIBUNWIND_SUPPORT_DWARF_UNWIND
+    uintptr_t pc = regs->GetIP();
+    if (!_addressSpace.findUnwindSections(pc, uwInfoSections))
+    {
+        return false;
+    }
+    return DoTheStep(pc, uwInfoSections, regs);
+#elif defined(_LIBUNWIND_ARM_EHABI)
+    // unwind section is located later for ARM
+    // pc will be taked from regs parameter
+    return DoTheStep(0, uwInfoSections, regs);
+#else
+    PORTABILITY_ASSERT("StepFrame");
+#endif
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/UnwindHelpers.h b/src/coreclr/src/nativeaot/Runtime/unix/UnwindHelpers.h
new file mode 100644
index 0000000000000..86dadb71f4b82
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/UnwindHelpers.h
@@ -0,0 +1,13 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "common.h"
+
+// This class is used to encapsulate the internals of our unwinding implementation
+// and any custom versions of libunwind structures that we use for performance 
+// reasons.
+class UnwindHelpers
+{
+public:
+    static bool StepFrame(REGDISPLAY *regs);
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/config.h.in b/src/coreclr/src/nativeaot/Runtime/unix/config.h.in
new file mode 100644
index 0000000000000..ca5f5aa7145c2
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/config.h.in
@@ -0,0 +1,34 @@
+#ifndef _PAL_CONFIG_H_INCLUDED
+#define _PAL_CONFIG_H_INCLUDED 1
+
+#cmakedefine01 HAVE_SYS_VMPARAM_H
+#cmakedefine01 HAVE_MACH_VM_TYPES_H
+#cmakedefine01 HAVE_MACH_VM_PARAM_H
+
+#cmakedefine01 HAVE_PTHREAD_ATTR_GET_NP
+#cmakedefine01 HAVE_PTHREAD_GETATTR_NP
+#cmakedefine01 HAVE_PTHREAD_CONDATTR_SETCLOCK
+#cmakedefine01 HAVE_PTHREAD_GETTHREADID_NP
+
+#cmakedefine01 HAVE_CLOCK_NANOSLEEP
+#cmakedefine01 HAVE_SYSCTL
+#cmakedefine01 HAVE_SYSCONF
+
+#cmakedefine01 HAVE_GREGSET_T
+#cmakedefine01 HAVE___GREGSET_T
+
+#cmakedefine01 HAVE_SIGINFO_T
+#cmakedefine01 HAVE_UCONTEXT_T
+
+#cmakedefine01 HAVE__SC_PHYS_PAGES
+#cmakedefine01 HAVE__SC_AVPHYS_PAGES
+
+#cmakedefine01 HAVE_LWP_SELF
+#cmakedefine01 HAVE_SCHED_GETCPU
+#cmakedefine01 HAVE_CLOCK_MONOTONIC
+#cmakedefine01 HAVE_CLOCK_MONOTONIC_COARSE
+#cmakedefine01 HAVE_MACH_ABSOLUTE_TIME
+
+#cmakedefine01 HAVE_THREAD_LOCAL
+
+#endif
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/configure.cmake b/src/coreclr/src/nativeaot/Runtime/unix/configure.cmake
new file mode 100644
index 0000000000000..a8fd29d182cda
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/configure.cmake
@@ -0,0 +1,117 @@
+include(CheckCXXSourceCompiles)
+include(CheckCXXSourceRuns)
+include(CheckCXXSymbolExists)
+include(CheckFunctionExists)
+include(CheckIncludeFiles)
+include(CheckStructHasMember)
+include(CheckTypeSize)
+include(CheckLibraryExists)
+
+if(CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
+  set(CMAKE_REQUIRED_INCLUDES /usr/local/include)
+elseif(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin)
+  set(CMAKE_REQUIRED_DEFINITIONS "-D_BSD_SOURCE -D_SVID_SOURCE -D_DEFAULT_SOURCE -D_POSIX_C_SOURCE=200809L")
+endif()
+
+list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_FILE_OFFSET_BITS=64)
+
+check_include_files(sys/vmparam.h HAVE_SYS_VMPARAM_H)
+check_include_files(mach/vm_types.h HAVE_MACH_VM_TYPES_H)
+check_include_files(mach/vm_param.h HAVE_MACH_VM_PARAM_H)
+
+check_library_exists(pthread pthread_attr_get_np "" HAVE_PTHREAD_ATTR_GET_NP)
+check_library_exists(pthread pthread_getattr_np "" HAVE_PTHREAD_GETATTR_NP)
+check_library_exists(pthread pthread_condattr_setclock "" HAVE_PTHREAD_CONDATTR_SETCLOCK)
+check_library_exists(pthread pthread_getthreadid_np "" HAVE_PTHREAD_GETTHREADID_NP)
+
+check_function_exists(clock_nanosleep HAVE_CLOCK_NANOSLEEP)
+check_function_exists(sysctl HAVE_SYSCTL)
+check_function_exists(sysconf HAVE_SYSCONF)
+
+check_struct_has_member ("ucontext_t" uc_mcontext.gregs[0] ucontext.h HAVE_GREGSET_T)
+check_struct_has_member ("ucontext_t" uc_mcontext.__gregs[0] ucontext.h HAVE___GREGSET_T)
+
+set(CMAKE_EXTRA_INCLUDE_FILES)
+set(CMAKE_EXTRA_INCLUDE_FILES signal.h)
+check_type_size(siginfo_t SIGINFO_T)
+set(CMAKE_EXTRA_INCLUDE_FILES)
+set(CMAKE_EXTRA_INCLUDE_FILES ucontext.h)
+check_type_size(ucontext_t UCONTEXT_T)
+
+check_cxx_symbol_exists(_SC_PHYS_PAGES unistd.h HAVE__SC_PHYS_PAGES)
+check_cxx_symbol_exists(_SC_AVPHYS_PAGES unistd.h HAVE__SC_AVPHYS_PAGES)
+
+check_cxx_source_compiles("
+#include <lwp.h>
+
+int main(int argc, char **argv)
+{
+    return (int)_lwp_self();
+}" HAVE_LWP_SELF)
+
+set(CMAKE_REQUIRED_LIBRARIES pthread)
+check_cxx_source_runs("
+#include <stdlib.h>
+#include <sched.h>
+
+int main(void)
+{
+  if (sched_getcpu() >= 0)
+  {
+    exit(0);
+  }
+  exit(1);
+}" HAVE_SCHED_GETCPU)
+set(CMAKE_REQUIRED_LIBRARIES)
+
+check_cxx_source_runs("
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+
+int main()
+{
+  int ret;
+  struct timespec ts;
+  ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+
+  exit(ret);
+}" HAVE_CLOCK_MONOTONIC)
+
+check_cxx_source_runs("
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+
+int main()
+{
+  int ret;
+  struct timespec ts;
+  ret = clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+
+  exit(ret);
+}" HAVE_CLOCK_MONOTONIC_COARSE)
+
+check_cxx_source_runs("
+#include <stdlib.h>
+#include <mach/mach_time.h>
+
+int main()
+{
+  int ret;
+  mach_timebase_info_data_t timebaseInfo;
+  ret = mach_timebase_info(&timebaseInfo);
+  mach_absolute_time();
+  exit(ret);
+}" HAVE_MACH_ABSOLUTE_TIME)
+
+check_cxx_source_compiles("
+thread_local int x;
+
+int main(int argc, char **argv)
+{
+    x = 1;
+    return 0;
+}" HAVE_THREAD_LOCAL)
+
+configure_file(${CMAKE_CURRENT_LIST_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/no_sal2.h b/src/coreclr/src/nativeaot/Runtime/unix/no_sal2.h
new file mode 100644
index 0000000000000..9c4dff666be1c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/no_sal2.h
@@ -0,0 +1,533 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+    
+/***
+*       no_sal2.h - renders the SAL annotations for documenting APIs harmless.
+*
+
+*
+*Purpose:
+*       sal.h provides a set of SAL2 annotations to describe how a function uses its
+*       parameters - the assumptions it makes about them, and the guarantees it makes
+*       upon finishing. This file redefines all those annotation macros to be harmless.
+*       It is designed for use in down-level build environments where the tooling may
+*       be unhappy with the standard SAL2 macro definitions.
+*
+*       [Public]
+*
+
+*
+****/
+
+#ifndef _NO_SAL_2_H_
+#define _NO_SAL_2_H_
+
+#undef _When_
+#define _When_(c,a)
+#undef _At_
+#define _At_(t,a)
+#undef _At_buffer_
+#define _At_buffer_(t,i,c,a)
+#undef _Group_
+#define _Group_(a)
+#undef _Pre_
+#define _Pre_
+#undef _Post_
+#define _Post_
+#undef _Deref_
+#define _Deref_
+#undef _Null_
+#define _Null_
+#undef _Notnull_
+#define _Notnull_
+#undef _Maybenull_
+#define _Maybenull_
+#undef _Const_
+#define _Const_
+#undef _Check_return_
+#define _Check_return_
+#undef _Must_inspect_result_
+#define _Must_inspect_result_
+#undef _Pre_satisfies_
+#define _Pre_satisfies_(e)
+#undef _Post_satisfies_
+#define _Post_satisfies_(e)
+#undef _Writable_elements_
+#define _Writable_elements_(s)
+#undef _Writable_bytes_
+#define _Writable_bytes_(s)
+#undef _Readable_elements_
+#define _Readable_elements_(s)
+#undef _Readable_bytes_
+#define _Readable_bytes_(s)
+#undef _Null_terminated_
+#define _Null_terminated_
+#undef _NullNull_terminated_
+#define _NullNull_terminated_
+#undef _Valid_
+#define _Valid_
+#undef _Notvalid_
+#define _Notvalid_
+#undef _Success_
+#define _Success_(c)
+#undef _Return_type_success_
+#define _Return_type_success_(c)
+#undef _On_failure_
+#define _On_failure_(a)
+#undef _Always_
+#define _Always_(a)
+#undef _Use_decl_annotations_
+#define _Use_decl_annotations_
+#undef _Pre_defensive_
+#define _Pre_defensive_
+#undef _Post_defensive_
+#define _Post_defensive_
+#undef _Pre_unknown_
+#define _Pre_unknown_
+#undef _Acquires_lock_
+#define _Acquires_lock_(e)
+#undef _Releases_lock_
+#define _Releases_lock_(e)
+#undef _Requires_lock_held_
+#define _Requires_lock_held_(e)
+#undef _Requires_lock_not_held_
+#define _Requires_lock_not_held_(e)
+#undef _Requires_no_locks_held_
+#define _Requires_no_locks_held_
+#undef _Guarded_by_
+#define _Guarded_by_(e)
+#undef _Write_guarded_by_
+#define _Write_guarded_by_(e)
+#undef _Interlocked_
+#define _Interlocked_
+#undef _Post_same_lock_
+#define _Post_same_lock_(e1,e2)
+#undef _Benign_race_begin_
+#define _Benign_race_begin_
+#undef _Benign_race_end_
+#define _Benign_race_end_
+#undef _No_competing_thread_
+#define _No_competing_thread_
+#undef _No_competing_thread_begin_
+#define _No_competing_thread_begin_
+#undef _No_competing_thread_end_
+#define _No_competing_thread_end_
+#undef _Acquires_shared_lock_
+#define _Acquires_shared_lock_(e)
+#undef _Releases_shared_lock_
+#define _Releases_shared_lock_(e)
+#undef _Requires_shared_lock_held_
+#define _Requires_shared_lock_held_(e)
+#undef _Acquires_exclusive_lock_
+#define _Acquires_exclusive_lock_(e)
+#undef _Releases_exclusive_lock_
+#define _Releases_exclusive_lock_(e)
+#undef _Requires_exclusive_lock_held_
+#define _Requires_exclusive_lock_held_(e)
+#undef _Has_lock_kind_
+#define _Has_lock_kind_(n)
+#undef _Create_lock_level_
+#define _Create_lock_level_(n)
+#undef _Has_lock_level_
+#define _Has_lock_level_(n)
+#undef _Lock_level_order_
+#define _Lock_level_order_(n1,n2)
+#undef _Analysis_assume_lock_acquired_
+#define _Analysis_assume_lock_acquired_(e)
+#undef _Analysis_assume_lock_released_
+#define _Analysis_assume_lock_released_(e)
+#undef _Analysis_assume_lock_held_
+#define _Analysis_assume_lock_held_(e)
+#undef _Analysis_assume_lock_not_held_
+#define _Analysis_assume_lock_not_held_(e)
+#undef _Analysis_assume_same_lock_
+#define _Analysis_assume_same_lock_(e)
+#undef _In_
+#define _In_
+#undef _Out_
+#define _Out_
+#undef _Inout_
+#define _Inout_
+#undef _In_z_
+#define _In_z_
+#undef _Inout_z_
+#define _Inout_z_
+#undef _In_reads_
+#define _In_reads_(s)
+#undef _In_reads_bytes_
+#define _In_reads_bytes_(s)
+#undef _In_reads_z_
+#define _In_reads_z_(s)
+#undef _In_reads_or_z_
+#define _In_reads_or_z_(s)
+#undef _Out_writes_
+#define _Out_writes_(s)
+#undef _Out_writes_bytes_
+#define _Out_writes_bytes_(s)
+#undef _Out_writes_z_
+#define _Out_writes_z_(s)
+#undef _Inout_updates_
+#define _Inout_updates_(s)
+#undef _Inout_updates_bytes_
+#define _Inout_updates_bytes_(s)
+#undef _Inout_updates_z_
+#define _Inout_updates_z_(s)
+#undef _Out_writes_to_
+#define _Out_writes_to_(s,c)
+#undef _Out_writes_bytes_to_
+#define _Out_writes_bytes_to_(s,c)
+#undef _Out_writes_all_
+#define _Out_writes_all_(s)
+#undef _Out_writes_bytes_all_
+#define _Out_writes_bytes_all_(s)
+#undef _Inout_updates_to_
+#define _Inout_updates_to_(s,c)
+#undef _Inout_updates_bytes_to_
+#define _Inout_updates_bytes_to_(s,c)
+#undef _Inout_updates_all_
+#define _Inout_updates_all_(s)
+#undef _Inout_updates_bytes_all_
+#define _Inout_updates_bytes_all_(s)
+#undef _In_reads_to_ptr_
+#define _In_reads_to_ptr_(p)
+#undef _In_reads_to_ptr_z_
+#define _In_reads_to_ptr_z_(p)
+#undef _Out_writes_to_ptr_
+#define _Out_writes_to_ptr_(p)
+#undef _Out_writes_to_ptr_z_
+#define _Out_writes_to_ptr_z_(p)
+#undef _In_opt_
+#define _In_opt_
+#undef _Out_opt_
+#define _Out_opt_
+#undef _Inout_opt_
+#define _Inout_opt_
+#undef _In_opt_z_
+#define _In_opt_z_
+#undef _Inout_opt_z_
+#define _Inout_opt_z_
+#undef _In_reads_opt_
+#define _In_reads_opt_(s)
+#undef _In_reads_bytes_opt_
+#define _In_reads_bytes_opt_(s)
+#undef _Out_writes_opt_
+#define _Out_writes_opt_(s)
+#undef _Out_writes_bytes_opt_
+#define _Out_writes_bytes_opt_(s)
+#undef _Out_writes_opt_z_
+#define _Out_writes_opt_z_(s)
+#undef _Inout_updates_opt_
+#define _Inout_updates_opt_(s)
+#undef _Inout_updates_bytes_opt_
+#define _Inout_updates_bytes_opt_(s)
+#undef _Inout_updates_opt_z_
+#define _Inout_updates_opt_z_(s)
+#undef _Out_writes_to_opt_
+#define _Out_writes_to_opt_(s,c)
+#undef _Out_writes_bytes_to_opt_
+#define _Out_writes_bytes_to_opt_(s,c)
+#undef _Out_writes_all_opt_
+#define _Out_writes_all_opt_(s)
+#undef _Out_writes_bytes_all_opt_
+#define _Out_writes_bytes_all_opt_(s)
+#undef _Inout_updates_to_opt_
+#define _Inout_updates_to_opt_(s,c)
+#undef _Inout_updates_bytes_to_opt_
+#define _Inout_updates_bytes_to_opt_(s,c)
+#undef _Inout_updates_all_opt_
+#define _Inout_updates_all_opt_(s)
+#undef _Inout_updates_bytes_all_opt_
+#define _Inout_updates_bytes_all_opt_(s)
+#undef _In_reads_to_ptr_opt_
+#define _In_reads_to_ptr_opt_(p)
+#undef _In_reads_to_ptr_opt_z_
+#define _In_reads_to_ptr_opt_z_(p)
+#undef _Out_writes_to_ptr_opt_
+#define _Out_writes_to_ptr_opt_(p)
+#undef _Out_writes_to_ptr_opt_z_
+#define _Out_writes_to_ptr_opt_z_(p)
+#undef _Outptr_
+#define _Outptr_
+#undef _Outptr_opt_
+#define _Outptr_opt_
+#undef _Outptr_result_maybenull_
+#define _Outptr_result_maybenull_
+#undef _Outptr_opt_result_maybenull_
+#define _Outptr_opt_result_maybenull_
+#undef _Outptr_z_
+#define _Outptr_z_
+#undef _Outptr_opt_z_
+#define _Outptr_opt_z_
+#undef _Outptr_result_maybenull_z_
+#define _Outptr_result_maybenull_z_
+#undef _Outptr_opt_result_maybenull_z_
+#define _Outptr_opt_result_maybenull_z_
+#undef _COM_Outptr_
+#define _COM_Outptr_
+#undef _COM_Outptr_opt_
+#define _COM_Outptr_opt_
+#undef _COM_Outptr_result_maybenull_
+#define _COM_Outptr_result_maybenull_
+#undef _COM_Outptr_opt_result_maybenull_
+#define _COM_Outptr_opt_result_maybenull_
+#undef _Outptr_result_buffer_
+#define _Outptr_result_buffer_(s)
+#undef _Outptr_result_bytebuffer_
+#define _Outptr_result_bytebuffer_(s)
+#undef _Outptr_opt_result_buffer_
+#define _Outptr_opt_result_buffer_(s)
+#undef _Outptr_opt_result_bytebuffer_
+#define _Outptr_opt_result_bytebuffer_(s)
+#undef _Outptr_result_buffer_to_
+#define _Outptr_result_buffer_to_(s,c)
+#undef _Outptr_result_bytebuffer_to_
+#define _Outptr_result_bytebuffer_to_(s,c)
+#undef _Outptr_opt_result_buffer_to_
+#define _Outptr_opt_result_buffer_to_(s,c)
+#undef _Outptr_opt_result_bytebuffer_to_
+#define _Outptr_opt_result_bytebuffer_to_(s,c)
+#undef _Ret_
+#define _Ret_
+#undef _Ret_valid_
+#define _Ret_valid_
+#undef _Ret_z_
+#define _Ret_z_
+#undef _Ret_writes_
+#define _Ret_writes_(s)
+#undef _Ret_writes_bytes_
+#define _Ret_writes_bytes_(s)
+#undef _Ret_writes_z_
+#define _Ret_writes_z_(s)
+#undef _Ret_writes_to_
+#define _Ret_writes_to_(s,c)
+#undef _Ret_writes_bytes_to_
+#define _Ret_writes_bytes_to_(s,c)
+#undef _Ret_writes_maybenull_
+#define _Ret_writes_maybenull_(s)
+#undef _Ret_writes_bytes_maybenull_
+#define _Ret_writes_bytes_maybenull_(s)
+#undef _Ret_writes_to_maybenull_
+#define _Ret_writes_to_maybenull_(s,c)
+#undef _Ret_writes_bytes_to_maybenull_
+#define _Ret_writes_bytes_to_maybenull_(s,c)
+#undef _Ret_writes_maybenull_z_
+#define _Ret_writes_maybenull_z_(s)
+#undef _Ret_maybenull_
+#define _Ret_maybenull_
+#undef _Ret_maybenull_z_
+#define _Ret_maybenull_z_
+#undef _Field_size_
+#define _Field_size_(s)
+#undef _Field_size_opt_
+#define _Field_size_opt_(s)
+#undef _Field_size_bytes_
+#define _Field_size_bytes_(s)
+#undef _Field_size_bytes_opt_
+#define _Field_size_bytes_opt_(s)
+#undef _Field_size_part_
+#define _Field_size_part_(s,c)
+#undef _Field_size_part_opt_
+#define _Field_size_part_opt_(s,c)
+#undef _Field_size_bytes_part_
+#define _Field_size_bytes_part_(s,c)
+#undef _Field_size_bytes_part_opt_
+#define _Field_size_bytes_part_opt_(s,c)
+#undef _Field_size_full_
+#define _Field_size_full_(s)
+#undef _Field_size_full_opt_
+#define _Field_size_full_opt_(s)
+#undef _Field_size_bytes_full_
+#define _Field_size_bytes_full_(s)
+#undef _Field_size_bytes_full_opt_
+#define _Field_size_bytes_full_opt_(s)
+#undef _Printf_format_string_
+#define _Printf_format_string_
+#undef _Scanf_format_string_
+#define _Scanf_format_string_
+#undef _Scanf_s_format_string_
+#define _Scanf_s_format_string_
+#undef _Printf_format_string_params_
+#define _Printf_format_string_params_(x)
+#undef _Scanf_format_string_params_
+#define _Scanf_format_string_params_(x)
+#undef _Scanf_s_format_string_params_
+#define _Scanf_s_format_string_params_(x)
+#undef _In_range_
+#define _In_range_(l,h)
+#undef _Out_range_
+#define _Out_range_(l,h)
+#undef _Ret_range_
+#define _Ret_range_(l,h)
+#undef _Deref_in_range_
+#define _Deref_in_range_(l,h)
+#undef _Deref_out_range_
+#define _Deref_out_range_(l,h)
+#undef _Deref_inout_range_
+#define _Deref_inout_range_(l,h)
+#undef _Field_range_
+#define _Field_range_(l,h)
+#undef _Pre_equal_to_
+#define _Pre_equal_to_(e)
+#undef _Post_equal_to_
+#define _Post_equal_to_(e)
+#undef _Struct_size_bytes_
+#define _Struct_size_bytes_(s)
+#undef _Analysis_assume_
+#define _Analysis_assume_
+#undef _Analysis_mode_
+#define _Analysis_mode_(m)
+#undef _Analysis_noreturn_
+#define _Analysis_noreturn_
+#undef _Raises_SEH_exception_
+#define _Raises_SEH_exception_
+#undef _Maybe_raises_SEH_exception_
+#define _Maybe_raises_SEH_exception_
+#undef _Function_class_
+#define _Function_class_(n)
+#undef _Literal_
+#define _Literal_
+#undef _Notliteral_
+#define _Notliteral_
+#undef _Enum_is_bitflag_
+#define _Enum_is_bitflag_
+#undef _Strict_type_match_
+#define _Strict_type_match_
+#undef _Points_to_data_
+#define _Points_to_data_
+#undef _Interlocked_operand_
+#define _Interlocked_operand_
+#undef _IRQL_raises_
+#define _IRQL_raises_(i)
+#undef _IRQL_requires_
+#define _IRQL_requires_(i)
+#undef _IRQL_requires_max_
+#define _IRQL_requires_max_(i)
+#undef _IRQL_requires_min_
+#define _IRQL_requires_min_(i)
+#undef _IRQL_saves_
+#define _IRQL_saves_
+#undef _IRQL_saves_global_
+#define _IRQL_saves_global_(k,s)
+#undef _IRQL_restores_
+#define _IRQL_restores_
+#undef _IRQL_restores_global_
+#define _IRQL_restores_global_(k,s)
+#undef _IRQL_always_function_min_
+#define _IRQL_always_function_min_(i)
+#undef _IRQL_always_function_max_
+#define _IRQL_always_function_max_(i)
+#undef _IRQL_requires_same_
+#define _IRQL_requires_same_
+#undef _IRQL_uses_cancel_
+#define _IRQL_uses_cancel_
+#undef _IRQL_is_cancel_
+#define _IRQL_is_cancel_
+#undef _Kernel_float_saved_
+#define _Kernel_float_saved_
+#undef _Kernel_float_restored_
+#define _Kernel_float_restored_
+#undef _Kernel_float_used_
+#define _Kernel_float_used_
+#undef _Kernel_acquires_resource_
+#define _Kernel_acquires_resource_(k)
+#undef _Kernel_releases_resource_
+#define _Kernel_releases_resource_(k)
+#undef _Kernel_requires_resource_held_
+#define _Kernel_requires_resource_held_(k)
+#undef _Kernel_requires_resource_not_held_
+#define _Kernel_requires_resource_not_held_(k)
+#undef _Kernel_clear_do_init_
+#define _Kernel_clear_do_init_(yn)
+#undef _Kernel_IoGetDmaAdapter_
+#define _Kernel_IoGetDmaAdapter_
+#undef _Outref_
+#define _Outref_
+#undef _Outref_result_maybenull_
+#define _Outref_result_maybenull_
+#undef _Outref_result_buffer_
+#define _Outref_result_buffer_(s)
+#undef _Outref_result_bytebuffer_
+#define _Outref_result_bytebuffer_(s)
+#undef _Outref_result_buffer_to_
+#define _Outref_result_buffer_to_(s,c)
+#undef _Outref_result_bytebuffer_to_
+#define _Outref_result_bytebuffer_to_(s,c)
+#undef _Outref_result_buffer_all_
+#define _Outref_result_buffer_all_(s)
+#undef _Outref_result_bytebuffer_all_
+#define _Outref_result_bytebuffer_all_(s)
+#undef _Outref_result_buffer_maybenull_
+#define _Outref_result_buffer_maybenull_(s)
+#undef _Outref_result_bytebuffer_maybenull_
+#define _Outref_result_bytebuffer_maybenull_(s)
+#undef _Outref_result_buffer_to_maybenull_
+#define _Outref_result_buffer_to_maybenull_(s,c)
+#undef _Outref_result_bytebuffer_to_maybenull_
+#define _Outref_result_bytebuffer_to_maybenull_(s,c)
+#undef _Outref_result_buffer_all_maybenull_
+#define _Outref_result_buffer_all_maybenull_(s)
+#undef _Outref_result_bytebuffer_all_maybenull_
+#define _Outref_result_bytebuffer_all_maybenull_(s)
+#undef _In_defensive_
+#define _In_defensive_(a)
+#undef _Out_defensive_
+#define _Out_defensive_(a)
+#undef _Inout_defensive_
+#define _Inout_defensive_(a)
+#undef _Outptr_result_nullonfailure_
+#define _Outptr_result_nullonfailure_
+#undef _Outptr_opt_result_nullonfailure_
+#define _Outptr_opt_result_nullonfailure_
+#undef _Outref_result_nullonfailure_
+#define _Outref_result_nullonfailure_
+#undef _Result_nullonfailure_
+#define _Result_nullonfailure_
+#undef _Result_zeroonfailure_
+#define _Result_zeroonfailure_
+#undef _Acquires_nonreentrant_lock_
+#define _Acquires_nonreentrant_lock_(e)
+#undef _Releases_nonreentrant_lock_
+#define _Releases_nonreentrant_lock_(e)
+#undef _Reserved_
+#define _Reserved_           _Pre_equal_to_(0) _Pre_ _Null_
+#undef _Pre_z_
+#define _Pre_z_              _Pre_ _Null_terminated_
+#undef _Post_z_
+#define _Post_z_             _Post_ _Null_terminated_
+#undef _Prepost_z_
+#define _Prepost_z_          _Pre_z_ _Post_z_
+#undef _Pre_null_
+#define _Pre_null_           _Pre_ _Null_
+#undef _Pre_maybenull_
+#define _Pre_maybenull_      _Pre_ _Maybenull_
+#undef _Pre_notnull_
+#define _Pre_notnull_        _Pre_ _Notnull_
+#undef _Pre_valid_
+#define _Pre_valid_          _Pre_notnull_ _Pre_ _Valid_
+#undef _Pre_opt_valid_
+#define _Pre_opt_valid_      _Pre_maybenull_ _Pre_ _Valid_
+#undef _Post_valid_
+#define _Post_valid_         _Post_ _Valid_
+#undef _Post_invalid_
+#define _Post_invalid_       _Post_ _Deref_ _Notvalid_
+#undef _Post_ptr_invalid_
+#define _Post_ptr_invalid_   _Post_ _Notvalid_
+#undef _Pre_readable_size_
+#define _Pre_readable_size_(s)      _Pre_ _Readable_elements_(s) _Pre_ _Valid_
+#undef _Pre_writable_size_
+#define _Pre_writable_size_(s)      _Pre_ _Writable_elements_(s)
+#undef _Pre_readable_byte_size_
+#define _Pre_readable_byte_size_(s) _Pre_ _Readable_bytes_(s) _Pre_ _Valid_
+#undef _Pre_writable_byte_size_
+#define _Pre_writable_byte_size_(s) _Pre_ _Writable_bytes_(s)
+#undef _Post_readable_size_
+#define _Post_readable_size_(s)     _Post_ _Readable_elements_(s) _Post_ _Valid_
+#undef _Post_writable_size_
+#define _Post_writable_size_(s)     _Post_ _Writable_elements_(s)
+#undef _Post_readable_byte_size_
+#define _Post_readable_byte_size_(s) _Post_ _Readable_bytes_(s) _Post_ _Valid_
+#undef _Post_writable_byte_size_
+#define _Post_writable_byte_size_(s) _Post_ _Writable_bytes_(s)
+
+#endif /* _NO_SAL_2_H_ */
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/poppack.h b/src/coreclr/src/nativeaot/Runtime/unix/poppack.h
new file mode 100644
index 0000000000000..b5c2319b54171
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/poppack.h
@@ -0,0 +1,39 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+//
+// ===========================================================================
+// File:  poppack.h
+//
+// ===========================================================================
+/*
+Abstract:
+
+    This file turns packing of structures off.  (That is, it enables
+    automatic alignment of structure fields.)  An include file is needed
+    because various compilers do this in different ways.
+
+    poppack.h is the complement to pshpack?.h.  An inclusion of poppack.h
+    MUST ALWAYS be preceded by an inclusion of one of pshpack?.h, in one-to-one
+    correspondence.
+
+    For Microsoft compatible compilers, this file uses the pop option
+    to the pack pragma so that it can restore the previous saved by the
+    pshpack?.h include file.
+
+*/
+
+#if ! (defined(lint) || defined(RC_INVOKED))
+#if ( _MSC_VER >= 800 && !defined(_M_I86)) || defined(_PUSHPOP_SUPPORTED)
+#pragma warning(disable:4103)
+#if !(defined( MIDL_PASS )) || defined( __midl )
+#pragma pack(pop)
+#else
+#pragma pack()
+#endif
+#else
+#pragma pack()
+#endif
+#endif // ! (defined(lint) || defined(RC_INVOKED))
+
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/pshpack1.h b/src/coreclr/src/nativeaot/Runtime/unix/pshpack1.h
new file mode 100644
index 0000000000000..5f1e59e285db8
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/pshpack1.h
@@ -0,0 +1,38 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+//
+// ===========================================================================
+// File: pshpack1.h
+//
+// ===========================================================================
+
+/*++
+
+Abstract:
+
+    This file turns 1 byte packing of structures on.  (That is, it disables
+    automatic alignment of structure fields.)  An include file is needed
+    because various compilers do this in different ways.  For Microsoft
+    compatible compilers, this files uses the push option to the pack pragma
+    so that the poppack.h include file can restore the previous packing
+    reliably.
+
+    The file poppack.h is the complement to this file.
+
+--*/
+
+#if ! (defined(lint) || defined(RC_INVOKED))
+#if ( _MSC_VER >= 800 && !defined(_M_I86)) || defined(_PUSHPOP_SUPPORTED)
+#pragma warning(disable:4103)
+#if !(defined( MIDL_PASS )) || defined( __midl )
+#pragma pack(push,1)
+#else
+#pragma pack(1)
+#endif
+#else
+#pragma pack(1)
+#endif
+#endif // ! (defined(lint) || defined(RC_INVOKED))
+
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/pshpack4.h b/src/coreclr/src/nativeaot/Runtime/unix/pshpack4.h
new file mode 100644
index 0000000000000..7d4de930c2dff
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/pshpack4.h
@@ -0,0 +1,38 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+//
+// ===========================================================================
+// File: pshpack4.h
+//
+// ===========================================================================
+
+/*++
+
+Abstract:
+
+    This file turns 4 byte packing of structures on.  (That is, it disables
+    automatic alignment of structure fields.)  An include file is needed
+    because various compilers do this in different ways.  For Microsoft
+    compatible compilers, this files uses the push option to the pack pragma
+    so that the poppack.h include file can restore the previous packing
+    reliably.
+
+    The file poppack.h is the complement to this file.
+
+--*/
+
+#if ! (defined(lint) || defined(RC_INVOKED))
+#if ( _MSC_VER >= 800 && !defined(_M_I86)) || defined(_PUSHPOP_SUPPORTED)
+#pragma warning(disable:4103)
+#if !(defined( MIDL_PASS )) || defined( __midl )
+#pragma pack(push,4)
+#else
+#pragma pack(4)
+#endif
+#else
+#pragma pack(4)
+#endif
+#endif // ! (defined(lint) || defined(RC_INVOKED))
+
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/sal.h b/src/coreclr/src/nativeaot/Runtime/unix/sal.h
new file mode 100644
index 0000000000000..e651378912826
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/sal.h
@@ -0,0 +1,2957 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/***
+*sal.h - markers for documenting the semantics of APIs
+*
+
+*
+*Purpose:
+*       sal.h provides a set of annotations to describe how a function uses its
+*       parameters - the assumptions it makes about them, and the guarantees it makes
+*       upon finishing.
+****/
+#pragma once
+
+/*==========================================================================
+
+   The comments in this file are intended to give basic understanding of
+   the usage of SAL, the Microsoft Source Code Annotation Language.
+   For more details, please see http://go.microsoft.com/fwlink/?LinkID=242134
+
+   The macros are defined in 3 layers, plus the structural set:
+
+   _In_/_Out_/_Ret_ Layer:
+   ----------------------
+   This layer provides the highest abstraction and its macros should be used
+   in most cases. These macros typically start with:
+      _In_     : input parameter to a function, unmodified by called function
+      _Out_    : output parameter, written to by called function, pointed-to
+                 location not expected to be initialized prior to call
+      _Outptr_ : like _Out_ when returned variable is a pointer type
+                 (so param is pointer-to-pointer type). Called function
+                 provides/allocated space.
+      _Outref_ : like _Outptr_, except param is reference-to-pointer type.
+      _Inout_  : inout parameter, read from and potentially modified by
+                 called function.
+      _Ret_    : for return values
+      _Field_  : class/struct field invariants
+   For common usage, this class of SAL provides the most concise annotations.
+   Note that _In_/_Out_/_Inout_/_Outptr_ annotations are designed to be used
+   with a parameter target. Using them with _At_ to specify non-parameter
+   targets may yield unexpected results.
+
+   This layer also includes a number of other properties that can be specified
+   to extend the ability of code analysis, most notably:
+      -- Designating parameters as format strings for printf/scanf/scanf_s
+      -- Requesting stricter type checking for C enum parameters
+
+   _Pre_/_Post_ Layer:
+   ------------------
+   The macros of this layer only should be used when there is no suitable macro
+   in the _In_/_Out_ layer. Its macros start with _Pre_ or _Post_.
+   This layer provides the most flexibility for annotations.
+
+   Implementation Abstraction Layer:
+   --------------------------------
+   Macros from this layer should never be used directly. The layer only exists
+   to hide the implementation of the annotation macros.
+
+   Structural Layer:
+   ----------------
+   These annotations, like _At_ and _When_, are used with annotations from
+   any of the other layers as modifiers, indicating exactly when and where
+   the annotations apply.
+
+
+   Common syntactic conventions:
+   ----------------------------
+
+   Usage:
+   -----
+   _In_, _Out_, _Inout_, _Pre_, _Post_, are for formal parameters.
+   _Ret_, _Deref_ret_ must be used for return values.
+
+   Nullness:
+   --------
+   If the parameter can be NULL as a precondition to the function, the
+   annotation contains _opt. If the macro does not contain '_opt' the
+   parameter cannot be NULL.
+
+   If an out/inout parameter returns a null pointer as a postcondition, this is
+   indicated by _Ret_maybenull_ or _result_maybenull_. If the macro is not
+   of this form, then the result will not be NULL as a postcondition.
+     _Outptr_ - output value is not NULL
+     _Outptr_result_maybenull_ - output value might be NULL
+
+   String Type:
+   -----------
+   _z: NullTerminated string
+   for _In_ parameters the buffer must have the specified stringtype before the call
+   for _Out_ parameters the buffer must have the specified stringtype after the call
+   for _Inout_ parameters both conditions apply
+
+   Extent Syntax:
+   -------------
+   Buffer sizes are expressed as element counts, unless the macro explicitly
+   contains _byte_ or _bytes_. Some annotations specify two buffer sizes, in
+   which case the second is used to indicate how much of the buffer is valid
+   as a postcondition. This table outlines the precondition buffer allocation
+   size, precondition number of valid elements, postcondition allocation size,
+   and postcondition number of valid elements for representative buffer size
+   annotations:
+                                     Pre    |  Pre    |  Post   |  Post
+                                     alloc  |  valid  |  alloc  |  valid
+      Annotation                     elems  |  elems  |  elems  |  elems
+      ----------                     ------------------------------------
+      _In_reads_(s)                    s    |   s     |   s     |   s
+      _Inout_updates_(s)               s    |   s     |   s     |   s
+      _Inout_updates_to_(s,c)          s    |   s     |   s     |   c
+      _Out_writes_(s)                  s    |   0     |   s     |   s
+      _Out_writes_to_(s,c)             s    |   0     |   s     |   c
+      _Outptr_result_buffer_(s)        ?    |   ?     |   s     |   s
+      _Outptr_result_buffer_to_(s,c)   ?    |   ?     |   s     |   c
+
+   For the _Outptr_ annotations, the buffer in question is at one level of
+   dereference. The called function is responsible for supplying the buffer.
+
+   Success and failure:
+   -------------------
+   The SAL concept of success allows functions to define expressions that can
+   be tested by the caller, which if it evaluates to non-zero, indicates the
+   function succeeded, which means that its postconditions are guaranteed to
+   hold.  Otherwise, if the expression evaluates to zero, the function is
+   considered to have failed, and the postconditions are not guaranteed.
+
+   The success criteria can be specified with the _Success_(expr) annotation:
+     _Success_(return != FALSE) BOOL
+     PathCanonicalizeA(_Out_writes_(MAX_PATH) LPSTR pszBuf, LPCSTR pszPath) :
+        pszBuf is only guaranteed to be NULL-terminated when TRUE is returned,
+        and FALSE indiates failure. In common practice, callers check for zero
+        vs. non-zero returns, so it is preferable to express the success
+        criteria in terms of zero/non-zero, not checked for exactly TRUE.
+
+   Functions can specify that some postconditions will still hold, even when
+   the function fails, using _On_failure_(anno-list), or postconditions that
+   hold regardless of success or failure using _Always_(anno-list).
+
+   The annotation _Return_type_success_(expr) may be used with a typedef to
+   give a default _Success_ criteria to all functions returning that type.
+   This is the case for common Windows API status types, including
+   HRESULT and NTSTATUS.  This may be overridden on a per-function basis by
+   specifying a _Success_ annotation locally.
+
+============================================================================*/
+
+#define __ATTR_SAL
+
+#ifndef _SAL_VERSION /*IFSTRIP=IGN*/
+#define _SAL_VERSION 20
+#endif
+
+#ifdef _PREFAST_ // [
+
+// choose attribute or __declspec implementation
+#ifndef _USE_DECLSPECS_FOR_SAL // [
+#define _USE_DECLSPECS_FOR_SAL 1
+#endif // ]
+
+#if _USE_DECLSPECS_FOR_SAL // [
+#undef _USE_ATTRIBUTES_FOR_SAL
+#define _USE_ATTRIBUTES_FOR_SAL 0
+#elif !defined(_USE_ATTRIBUTES_FOR_SAL) // ][
+#if _MSC_VER >= 1400 /*IFSTRIP=IGN*/ // [
+#define _USE_ATTRIBUTES_FOR_SAL 1
+#else // ][
+#define _USE_ATTRIBUTES_FOR_SAL 0
+#endif // ]
+#endif // ]
+
+
+#if !_USE_DECLSPECS_FOR_SAL // [
+#if !_USE_ATTRIBUTES_FOR_SAL // [
+#if _MSC_VER >= 1400 /*IFSTRIP=IGN*/ // [
+#undef _USE_ATTRIBUTES_FOR_SAL
+#define _USE_ATTRIBUTES_FOR_SAL 1
+#else // ][
+#undef _USE_DECLSPECS_FOR_SAL
+#define _USE_DECLSPECS_FOR_SAL  1
+#endif // ]
+#endif // ]
+#endif // ]
+
+#else
+
+// Disable expansion of SAL macros in non-Prefast mode to 
+// improve compiler throughput.
+#ifndef _USE_DECLSPECS_FOR_SAL // [
+#define _USE_DECLSPECS_FOR_SAL 0
+#endif // ]
+#ifndef _USE_ATTRIBUTES_FOR_SAL // [
+#define _USE_ATTRIBUTES_FOR_SAL 0
+#endif // ]
+
+#endif // ]
+
+// safeguard for MIDL and RC builds
+#if _USE_DECLSPECS_FOR_SAL && ( defined( MIDL_PASS ) || defined(__midl) || defined(RC_INVOKED) || !defined(_PREFAST_) ) /*IFSTRIP=IGN*/ // [
+#undef _USE_DECLSPECS_FOR_SAL
+#define _USE_DECLSPECS_FOR_SAL 0
+#endif // ]
+#if _USE_ATTRIBUTES_FOR_SAL && ( !defined(_MSC_EXTENSIONS) || defined( MIDL_PASS ) || defined(__midl) || defined(RC_INVOKED) ) /*IFSTRIP=IGN*/ // [
+#undef _USE_ATTRIBUTES_FOR_SAL
+#define _USE_ATTRIBUTES_FOR_SAL 0
+#endif // ]
+
+#if _USE_DECLSPECS_FOR_SAL || _USE_ATTRIBUTES_FOR_SAL
+
+// Special enum type for Y/N/M
+enum __SAL_YesNo {_SAL_notpresent, _SAL_no, _SAL_maybe, _SAL_yes, _SAL_default};
+
+#endif
+
+#if defined(BUILD_WINDOWS) && !_USE_ATTRIBUTES_FOR_SAL /*IFSTRIP=IGN*/
+#define _SAL1_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "1") _GrouP_(annotes _SAL_nop_impl_)
+#define _SAL1_1_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "1.1") _GrouP_(annotes _SAL_nop_impl_)
+#define _SAL1_2_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "1.2") _GrouP_(annotes _SAL_nop_impl_)
+#define _SAL2_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "2") _GrouP_(annotes _SAL_nop_impl_)
+#else
+#define _SAL1_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "1") _Group_(annotes _SAL_nop_impl_)
+#define _SAL1_1_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "1.1") _Group_(annotes _SAL_nop_impl_)
+#define _SAL1_2_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "1.2") _Group_(annotes _SAL_nop_impl_)
+#define _SAL2_Source_(Name, args, annotes) _SA_annotes3(SAL_name, #Name, "", "2") _Group_(annotes _SAL_nop_impl_)
+#endif
+
+//============================================================================
+//   Structural SAL:
+//     These annotations modify the use of other annotations.  They may
+//     express the annotation target (i.e. what parameter/field the annotation
+//     applies to) or the condition under which the annotation is applicable.
+//============================================================================
+
+// _At_(target, annos) specifies that the annotations listed in 'annos' is to
+// be applied to 'target' rather than to the identifier which is the current
+// lexical target.
+#define _At_(target, annos)            _At_impl_(target, annos _SAL_nop_impl_)
+
+// _At_buffer_(target, iter, bound, annos) is similar to _At_, except that
+// target names a buffer, and each annotation in annos is applied to each
+// element of target up to bound, with the variable named in iter usable
+// by the annotations to refer to relevant offsets within target.
+#define _At_buffer_(target, iter, bound, annos)  _At_buffer_impl_(target, iter, bound, annos _SAL_nop_impl_)
+
+// _When_(expr, annos) specifies that the annotations listed in 'annos' only
+// apply when 'expr' evaluates to non-zero.
+#define _When_(expr, annos)            _When_impl_(expr, annos _SAL_nop_impl_)
+#define _Group_(annos)                 _Group_impl_(annos _SAL_nop_impl_)
+#define _GrouP_(annos)                 _GrouP_impl_(annos _SAL_nop_impl_)
+
+// <expr> indicates whether normal post conditions apply to a function
+#define _Success_(expr)                  _SAL2_Source_(_Success_, (expr), _Success_impl_(expr))
+
+// <expr> indicates whether post conditions apply to a function returning
+// the type that this annotation is applied to
+#define _Return_type_success_(expr)      _SAL2_Source_(_Return_type_success_, (expr), _Success_impl_(expr))
+
+// Establish postconditions that apply only if the function does not succeed
+#define _On_failure_(annos)              _On_failure_impl_(annos _SAL_nop_impl_)
+
+// Establish postconditions that apply in both success and failure cases.
+// Only applicable with functions that have  _Success_ or _Return_type_succss_.
+#define _Always_(annos)                  _Always_impl_(annos _SAL_nop_impl_)
+
+// Usable on a function defintion. Asserts that a function declaration is
+// in scope, and its annotations are to be used. There are no other annotations
+// allowed on the function definition.
+#define _Use_decl_annotations_         _Use_decl_anno_impl_
+
+// _Notref_ may precede a _Deref_ or "real" annotation, and removes one
+// level of dereference if the parameter is a C++ reference (&).  If the
+// net deref on a "real" annotation is negative, it is simply discarded.
+#define _Notref_                       _Notref_impl_
+
+// Annotations for defensive programming styles.
+#define _Pre_defensive_             _SA_annotes0(SAL_pre_defensive)
+#define _Post_defensive_            _SA_annotes0(SAL_post_defensive)
+
+#define _In_defensive_(annotes)     _Pre_defensive_ _Group_(annotes)
+#define _Out_defensive_(annotes)    _Post_defensive_ _Group_(annotes)
+#define _Inout_defensive_(annotes)  _Pre_defensive_ _Post_defensive_ _Group_(annotes)
+
+//============================================================================
+//   _In_\_Out_ Layer:
+//============================================================================
+
+// Reserved pointer parameters, must always be NULL.
+#define _Reserved_                      _SAL2_Source_(_Reserved_, (), _Pre1_impl_(__null_impl))
+
+// _Const_ allows specification that any namable memory location is considered
+// readonly for a given call.
+#define _Const_                         _SAL2_Source_(_Const_, (), _Pre1_impl_(__readaccess_impl_notref))
+
+
+// Input parameters --------------------------
+
+//   _In_ - Annotations for parameters where data is passed into the function, but not modified.
+//          _In_ by itself can be used with non-pointer types (although it is redundant).
+
+// e.g. void SetPoint( _In_ const POINT* pPT );
+#define _In_                            _SAL2_Source_(_In_, (), _Pre1_impl_(__notnull_impl_notref) _Pre_valid_impl_ _Deref_pre1_impl_(__readaccess_impl_notref))
+#define _In_opt_                        _SAL2_Source_(_In_opt_, (), _Pre1_impl_(__maybenull_impl_notref) _Pre_valid_impl_ _Deref_pre_readonly_)
+
+// nullterminated 'in' parameters.
+// e.g. void CopyStr( _In_z_ const char* szFrom, _Out_z_cap_(cchTo) char* szTo, size_t cchTo );
+#define _In_z_                          _SAL2_Source_(_In_z_, (),     _In_     _Pre1_impl_(__zterm_impl))
+#define _In_opt_z_                      _SAL2_Source_(_In_opt_z_, (), _In_opt_ _Pre1_impl_(__zterm_impl))
+
+
+// 'input' buffers with given size
+
+#define _In_reads_(size)               _SAL2_Source_(_In_reads_, (size), _Pre_count_(size)          _Deref_pre_readonly_)
+#define _In_reads_opt_(size)           _SAL2_Source_(_In_reads_opt_, (size), _Pre_opt_count_(size)      _Deref_pre_readonly_)
+#define _In_reads_bytes_(size)         _SAL2_Source_(_In_reads_bytes_, (size), _Pre_bytecount_(size)      _Deref_pre_readonly_)
+#define _In_reads_bytes_opt_(size)     _SAL2_Source_(_In_reads_bytes_opt_, (size), _Pre_opt_bytecount_(size)  _Deref_pre_readonly_)
+#define _In_reads_z_(size)             _SAL2_Source_(_In_reads_z_, (size), _In_reads_(size)     _Pre_z_)
+#define _In_reads_opt_z_(size)         _SAL2_Source_(_In_reads_opt_z_, (size), _Pre_opt_count_(size)      _Deref_pre_readonly_     _Pre_opt_z_)
+#define _In_reads_or_z_(size)          _SAL2_Source_(_In_reads_or_z_, (size), _In_ _When_(_String_length_(_Curr_) < (size), _Pre_z_) _When_(_String_length_(_Curr_) >= (size), _Pre1_impl_(__count_impl(size))))
+#define _In_reads_or_z_opt_(size)      _SAL2_Source_(_In_reads_or_z_opt_, (size), _In_opt_ _When_(_String_length_(_Curr_) < (size), _Pre_z_) _When_(_String_length_(_Curr_) >= (size), _Pre1_impl_(__count_impl(size))))
+
+
+// 'input' buffers valid to the given end pointer
+
+#define _In_reads_to_ptr_(ptr)         _SAL2_Source_(_In_reads_to_ptr_, (ptr), _Pre_ptrdiff_count_(ptr)     _Deref_pre_readonly_)
+#define _In_reads_to_ptr_opt_(ptr)     _SAL2_Source_(_In_reads_to_ptr_opt_, (ptr), _Pre_opt_ptrdiff_count_(ptr) _Deref_pre_readonly_)
+#define _In_reads_to_ptr_z_(ptr)       _SAL2_Source_(_In_reads_to_ptr_z_, (ptr), _In_reads_to_ptr_(ptr) _Pre_z_)
+#define _In_reads_to_ptr_opt_z_(ptr)   _SAL2_Source_(_In_reads_to_ptr_opt_z_, (ptr), _Pre_opt_ptrdiff_count_(ptr) _Deref_pre_readonly_  _Pre_opt_z_)
+
+
+
+// Output parameters --------------------------
+
+//   _Out_ - Annotations for pointer or reference parameters where data passed back to the caller.
+//           These are mostly used where the pointer/reference is to a non-pointer type.
+//           _Outptr_/_Outref) (see below) are typically used to return pointers via parameters.
+
+// e.g. void GetPoint( _Out_ POINT* pPT );
+#define _Out_                                  _SAL2_Source_(_Out_, (),     _Out_impl_)
+#define _Out_opt_                              _SAL2_Source_(_Out_opt_, (), _Out_opt_impl_)
+
+#define _Out_writes_(size)                     _SAL2_Source_(_Out_writes_, (size), _Pre_cap_(size)            _Post_valid_impl_)
+#define _Out_writes_opt_(size)                 _SAL2_Source_(_Out_writes_opt_, (size), _Pre_opt_cap_(size)        _Post_valid_impl_)
+#define _Out_writes_bytes_(size)               _SAL2_Source_(_Out_writes_bytes_, (size), _Pre_bytecap_(size)        _Post_valid_impl_)
+#define _Out_writes_bytes_opt_(size)           _SAL2_Source_(_Out_writes_bytes_opt_, (size), _Pre_opt_bytecap_(size)    _Post_valid_impl_)
+#define _Out_writes_z_(size)                   _SAL2_Source_(_Out_writes_z_, (size), _Pre_cap_(size)            _Post_valid_impl_ _Post_z_)
+#define _Out_writes_opt_z_(size)               _SAL2_Source_(_Out_writes_opt_z_, (size), _Pre_opt_cap_(size)        _Post_valid_impl_ _Post_z_)
+
+#define _Out_writes_to_(size,count)            _SAL2_Source_(_Out_writes_to_, (size,count), _Pre_cap_(size)            _Post_valid_impl_ _Post_count_(count))
+#define _Out_writes_to_opt_(size,count)        _SAL2_Source_(_Out_writes_to_opt_, (size,count), _Pre_opt_cap_(size)        _Post_valid_impl_ _Post_count_(count))
+#define _Out_writes_all_(size)                 _SAL2_Source_(_Out_writes_all_, (size), _Out_writes_to_(_Old_(size), _Old_(size)))
+#define _Out_writes_all_opt_(size)             _SAL2_Source_(_Out_writes_all_opt_, (size), _Out_writes_to_opt_(_Old_(size), _Old_(size)))
+
+#define _Out_writes_bytes_to_(size,count)      _SAL2_Source_(_Out_writes_bytes_to_, (size,count), _Pre_bytecap_(size)        _Post_valid_impl_ _Post_bytecount_(count))
+#define _Out_writes_bytes_to_opt_(size,count)  _SAL2_Source_(_Out_writes_bytes_to_opt_, (size,count), _Pre_opt_bytecap_(size) _Post_valid_impl_ _Post_bytecount_(count))
+#define _Out_writes_bytes_all_(size)           _SAL2_Source_(_Out_writes_bytes_all_, (size), _Out_writes_bytes_to_(_Old_(size), _Old_(size)))
+#define _Out_writes_bytes_all_opt_(size)       _SAL2_Source_(_Out_writes_bytes_all_opt_, (size), _Out_writes_bytes_to_opt_(_Old_(size), _Old_(size)))
+
+#define _Out_writes_to_ptr_(ptr)               _SAL2_Source_(_Out_writes_to_ptr_, (ptr), _Pre_ptrdiff_cap_(ptr)     _Post_valid_impl_)
+#define _Out_writes_to_ptr_opt_(ptr)           _SAL2_Source_(_Out_writes_to_ptr_opt_, (ptr), _Pre_opt_ptrdiff_cap_(ptr) _Post_valid_impl_)
+#define _Out_writes_to_ptr_z_(ptr)             _SAL2_Source_(_Out_writes_to_ptr_z_, (ptr), _Pre_ptrdiff_cap_(ptr)     _Post_valid_impl_ Post_z_)
+#define _Out_writes_to_ptr_opt_z_(ptr)         _SAL2_Source_(_Out_writes_to_ptr_opt_z_, (ptr), _Pre_opt_ptrdiff_cap_(ptr) _Post_valid_impl_ Post_z_)
+
+
+// Inout parameters ----------------------------
+
+//   _Inout_ - Annotations for pointer or reference parameters where data is passed in and
+//        potentially modified.
+//          void ModifyPoint( _Inout_ POINT* pPT );
+//          void ModifyPointByRef( _Inout_ POINT& pPT );
+
+#define _Inout_                                _SAL2_Source_(_Inout_, (), _Prepost_valid_)
+#define _Inout_opt_                            _SAL2_Source_(_Inout_opt_, (), _Prepost_opt_valid_)
+
+// For modifying string buffers
+//   void toupper( _Inout_z_ char* sz );
+#define _Inout_z_                              _SAL2_Source_(_Inout_z_, (), _Prepost_z_)
+#define _Inout_opt_z_                          _SAL2_Source_(_Inout_opt_z_, (), _Prepost_opt_z_)
+
+// For modifying buffers with explicit element size
+#define _Inout_updates_(size)                  _SAL2_Source_(_Inout_updates_, (size), _Pre_cap_(size)         _Pre_valid_impl_ _Post_valid_impl_)
+#define _Inout_updates_opt_(size)              _SAL2_Source_(_Inout_updates_opt_, (size), _Pre_opt_cap_(size)     _Pre_valid_impl_ _Post_valid_impl_)
+#define _Inout_updates_z_(size)                _SAL2_Source_(_Inout_updates_z_, (size), _Pre_cap_(size)         _Pre_valid_impl_ _Post_valid_impl_ _Pre1_impl_(__zterm_impl) _Post1_impl_(__zterm_impl))
+#define _Inout_updates_opt_z_(size)            _SAL2_Source_(_Inout_updates_opt_z_, (size), _Pre_opt_cap_(size)     _Pre_valid_impl_ _Post_valid_impl_ _Pre1_impl_(__zterm_impl) _Post1_impl_(__zterm_impl))
+
+#define _Inout_updates_to_(size,count)         _SAL2_Source_(_Inout_updates_to_, (size,count), _Out_writes_to_(size,count) _Pre_valid_impl_ _Pre1_impl_(__count_impl(count)))
+#define _Inout_updates_to_opt_(size,count)     _SAL2_Source_(_Inout_updates_to_opt_, (size,count), _Out_writes_to_opt_(size,count) _Pre_valid_impl_ _Pre1_impl_(__count_impl(count)))
+
+#define _Inout_updates_all_(size)              _SAL2_Source_(_Inout_updates_all_, (size), _Inout_updates_to_(_Old_(size), _Old_(size)))
+#define _Inout_updates_all_opt_(size)          _SAL2_Source_(_Inout_updates_all_opt_, (size), _Inout_updates_to_opt_(_Old_(size), _Old_(size)))
+
+// For modifying buffers with explicit byte size
+#define _Inout_updates_bytes_(size)            _SAL2_Source_(_Inout_updates_bytes_, (size), _Pre_bytecap_(size)     _Pre_valid_impl_ _Post_valid_impl_)
+#define _Inout_updates_bytes_opt_(size)        _SAL2_Source_(_Inout_updates_bytes_opt_, (size), _Pre_opt_bytecap_(size) _Pre_valid_impl_ _Post_valid_impl_)
+
+#define _Inout_updates_bytes_to_(size,count)       _SAL2_Source_(_Inout_updates_bytes_to_, (size,count), _Out_writes_bytes_to_(size,count) _Pre_valid_impl_ _Pre1_impl_(__bytecount_impl(count)))
+#define _Inout_updates_bytes_to_opt_(size,count)   _SAL2_Source_(_Inout_updates_bytes_to_opt_, (size,count), _Out_writes_bytes_to_opt_(size,count) _Pre_valid_impl_ _Pre1_impl_(__bytecount_impl(count)))
+
+#define _Inout_updates_bytes_all_(size)        _SAL2_Source_(_Inout_updates_bytes_all_, (size), _Inout_updates_bytes_to_(_Old_(size), _Old_(size)))
+#define _Inout_updates_bytes_all_opt_(size)    _SAL2_Source_(_Inout_updates_bytes_all_opt_, (size), _Inout_updates_bytes_to_opt_(_Old_(size), _Old_(size)))
+
+
+// Pointer to pointer parameters -------------------------
+
+//   _Outptr_ - Annotations for output params returning pointers
+//      These describe parameters where the called function provides the buffer:
+//        HRESULT SHStrDupW(_In_ LPCWSTR psz, _Outptr_ LPWSTR *ppwsz);
+//      The caller passes the address of an LPWSTR variable as ppwsz, and SHStrDupW allocates
+//      and initializes memory and returns the pointer to the new LPWSTR in *ppwsz.
+//
+//    _Outptr_opt_ - describes parameters that are allowed to be NULL.
+//    _Outptr_*_result_maybenull_ - describes parameters where the called function might return NULL to the caller.
+//
+//    Example:
+//       void MyFunc(_Outptr_opt_ int **ppData1, _Outptr_result_maybenull_ int **ppData2);
+//    Callers:
+//       MyFunc(NULL, NULL);           // error: parameter 2, ppData2, should not be NULL
+//       MyFunc(&pData1, &pData2);     // ok: both non-NULL
+//       if (*pData1 == *pData2) ...   // error: pData2 might be NULL after call
+
+#define _Outptr_                         _SAL2_Source_(_Outptr_, (),                      _Out_impl_     _Deref_post2_impl_(__notnull_impl_notref,   __count_impl(1)))
+#define _Outptr_result_maybenull_        _SAL2_Source_(_Outptr_result_maybenull_, (),     _Out_impl_     _Deref_post2_impl_(__maybenull_impl_notref, __count_impl(1)))
+#define _Outptr_opt_                     _SAL2_Source_(_Outptr_opt_, (),                  _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref,   __count_impl(1)))
+#define _Outptr_opt_result_maybenull_    _SAL2_Source_(_Outptr_opt_result_maybenull_, (), _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __count_impl(1)))
+
+// Annotations for _Outptr_ parameters returning pointers to null terminated strings.
+
+#define _Outptr_result_z_                _SAL2_Source_(_Outptr_result_z_, (),               _Out_impl_     _Deref_post_z_)
+#define _Outptr_opt_result_z_            _SAL2_Source_(_Outptr_opt_result_z_, (),           _Out_opt_impl_ _Deref_post_z_)
+#define _Outptr_result_maybenull_z_      _SAL2_Source_(_Outptr_result_maybenull_z_, (),     _Out_impl_     _Deref_post_opt_z_)
+#define _Outptr_opt_result_maybenull_z_  _SAL2_Source_(_Outptr_opt_result_maybenull_z_, (), _Out_opt_impl_ _Deref_post_opt_z_)
+
+// Annotations for _Outptr_ parameters where the output pointer is set to NULL if the function fails. 
+
+#define _Outptr_result_nullonfailure_       _SAL2_Source_(_Outptr_result_nullonfailure_, (),     _Outptr_      _On_failure_(_Deref_post_null_))
+#define _Outptr_opt_result_nullonfailure_   _SAL2_Source_(_Outptr_opt_result_nullonfailure_, (), _Outptr_opt_  _On_failure_(_Deref_post_null_))
+
+// Annotations for _Outptr_ parameters which return a pointer to a ref-counted COM object,
+// following the COM convention of setting the output to NULL on failure.
+// The current implementation is identical to _Outptr_result_nullonfailure_.
+// For pointers to types that are not COM objects, _Outptr_result_nullonfailure_ is preferred.
+
+#define _COM_Outptr_                        _SAL2_Source_(_COM_Outptr_, (),                      _Outptr_                      _On_failure_(_Deref_post_null_))
+#define _COM_Outptr_result_maybenull_       _SAL2_Source_(_COM_Outptr_result_maybenull_, (),     _Outptr_result_maybenull_     _On_failure_(_Deref_post_null_))
+#define _COM_Outptr_opt_                    _SAL2_Source_(_COM_Outptr_opt_, (),                  _Outptr_opt_                  _On_failure_(_Deref_post_null_))
+#define _COM_Outptr_opt_result_maybenull_   _SAL2_Source_(_COM_Outptr_opt_result_maybenull_, (), _Outptr_opt_result_maybenull_ _On_failure_(_Deref_post_null_))
+
+// Annotations for _Outptr_ parameters returning a pointer to buffer with a specified number of elements/bytes
+
+#define _Outptr_result_buffer_(size)                      _SAL2_Source_(_Outptr_result_buffer_, (size),               _Out_impl_     _Deref_post2_impl_(__notnull_impl_notref, __cap_impl(size)))
+#define _Outptr_opt_result_buffer_(size)                  _SAL2_Source_(_Outptr_opt_result_buffer_, (size),           _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __cap_impl(size)))
+#define _Outptr_result_buffer_to_(size, count)            _SAL2_Source_(_Outptr_result_buffer_to_, (size, count),     _Out_impl_     _Deref_post3_impl_(__notnull_impl_notref, __cap_impl(size), __count_impl(count)))
+#define _Outptr_opt_result_buffer_to_(size, count)        _SAL2_Source_(_Outptr_opt_result_buffer_to_, (size, count), _Out_opt_impl_ _Deref_post3_impl_(__notnull_impl_notref, __cap_impl(size), __count_impl(count)))
+
+#define _Outptr_result_buffer_all_(size)                  _SAL2_Source_(_Outptr_result_buffer_all_, (size),           _Out_impl_     _Deref_post2_impl_(__notnull_impl_notref, __count_impl(size)))
+#define _Outptr_opt_result_buffer_all_(size)              _SAL2_Source_(_Outptr_opt_result_buffer_all_, (size),       _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __count_impl(size)))
+
+#define _Outptr_result_buffer_maybenull_(size)               _SAL2_Source_(_Outptr_result_buffer_maybenull_, (size),               _Out_impl_     _Deref_post2_impl_(__maybenull_impl_notref, __cap_impl(size)))
+#define _Outptr_opt_result_buffer_maybenull_(size)           _SAL2_Source_(_Outptr_opt_result_buffer_maybenull_, (size),           _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __cap_impl(size)))
+#define _Outptr_result_buffer_to_maybenull_(size, count)     _SAL2_Source_(_Outptr_result_buffer_to_maybenull_, (size, count),     _Out_impl_     _Deref_post3_impl_(__maybenull_impl_notref, __cap_impl(size), __count_impl(count)))
+#define _Outptr_opt_result_buffer_to_maybenull_(size, count) _SAL2_Source_(_Outptr_opt_result_buffer_to_maybenull_, (size, count), _Out_opt_impl_ _Deref_post3_impl_(__maybenull_impl_notref, __cap_impl(size), __count_impl(count)))
+
+#define _Outptr_result_buffer_all_maybenull_(size)           _SAL2_Source_(_Outptr_result_buffer_all_maybenull_, (size),           _Out_impl_     _Deref_post2_impl_(__maybenull_impl_notref, __count_impl(size)))
+#define _Outptr_opt_result_buffer_all_maybenull_(size)       _SAL2_Source_(_Outptr_opt_result_buffer_all_maybenull_, (size),       _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __count_impl(size)))
+
+#define _Outptr_result_bytebuffer_(size)                     _SAL2_Source_(_Outptr_result_bytebuffer_, (size),                     _Out_impl_     _Deref_post2_impl_(__notnull_impl_notref, __bytecap_impl(size)))
+#define _Outptr_opt_result_bytebuffer_(size)                 _SAL2_Source_(_Outptr_opt_result_bytebuffer_, (size),                 _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __bytecap_impl(size)))
+#define _Outptr_result_bytebuffer_to_(size, count)           _SAL2_Source_(_Outptr_result_bytebuffer_to_, (size, count),           _Out_impl_     _Deref_post3_impl_(__notnull_impl_notref, __bytecap_impl(size), __bytecount_impl(count)))
+#define _Outptr_opt_result_bytebuffer_to_(size, count)       _SAL2_Source_(_Outptr_opt_result_bytebuffer_to_, (size, count),       _Out_opt_impl_ _Deref_post3_impl_(__notnull_impl_notref, __bytecap_impl(size), __bytecount_impl(count)))
+
+#define _Outptr_result_bytebuffer_all_(size)                 _SAL2_Source_(_Outptr_result_bytebuffer_all_, (size),                 _Out_impl_     _Deref_post2_impl_(__notnull_impl_notref, __bytecount_impl(size)))
+#define _Outptr_opt_result_bytebuffer_all_(size)             _SAL2_Source_(_Outptr_opt_result_bytebuffer_all_, (size),             _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __bytecount_impl(size)))
+
+#define _Outptr_result_bytebuffer_maybenull_(size)                 _SAL2_Source_(_Outptr_result_bytebuffer_maybenull_, (size),               _Out_impl_     _Deref_post2_impl_(__maybenull_impl_notref, __bytecap_impl(size)))
+#define _Outptr_opt_result_bytebuffer_maybenull_(size)             _SAL2_Source_(_Outptr_opt_result_bytebuffer_maybenull_, (size),           _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __bytecap_impl(size)))
+#define _Outptr_result_bytebuffer_to_maybenull_(size, count)       _SAL2_Source_(_Outptr_result_bytebuffer_to_maybenull_, (size, count),     _Out_impl_     _Deref_post3_impl_(__maybenull_impl_notref, __bytecap_impl(size), __bytecount_impl(count)))
+#define _Outptr_opt_result_bytebuffer_to_maybenull_(size, count)   _SAL2_Source_(_Outptr_opt_result_bytebuffer_to_maybenull_, (size, count), _Out_opt_impl_ _Deref_post3_impl_(__maybenull_impl_notref, __bytecap_impl(size), __bytecount_impl(count)))
+
+#define _Outptr_result_bytebuffer_all_maybenull_(size)         _SAL2_Source_(_Outptr_result_bytebuffer_all_maybenull_, (size),               _Out_impl_     _Deref_post2_impl_(__maybenull_impl_notref, __bytecount_impl(size)))
+#define _Outptr_opt_result_bytebuffer_all_maybenull_(size)     _SAL2_Source_(_Outptr_opt_result_bytebuffer_all_maybenull_, (size),           _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __bytecount_impl(size)))
+
+// Annotations for output reference to pointer parameters.
+
+#define _Outref_                                               _SAL2_Source_(_Outref_, (),                  _Out_impl_ _Post_notnull_)
+#define _Outref_result_maybenull_                              _SAL2_Source_(_Outref_result_maybenull_, (), _Pre2_impl_(__notnull_impl_notref, __cap_c_one_notref_impl) _Post_maybenull_ _Post_valid_impl_)
+
+#define _Outref_result_buffer_(size)                           _SAL2_Source_(_Outref_result_buffer_, (size),                         _Outref_ _Post1_impl_(__cap_impl(size)))
+#define _Outref_result_bytebuffer_(size)                       _SAL2_Source_(_Outref_result_bytebuffer_, (size),                     _Outref_ _Post1_impl_(__bytecap_impl(size)))
+#define _Outref_result_buffer_to_(size, count)                 _SAL2_Source_(_Outref_result_buffer_to_, (size, count),               _Outref_result_buffer_(size) _Post1_impl_(__count_impl(count)))
+#define _Outref_result_bytebuffer_to_(size, count)             _SAL2_Source_(_Outref_result_bytebuffer_to_, (size, count),           _Outref_result_bytebuffer_(size) _Post1_impl_(__bytecount_impl(count)))
+#define _Outref_result_buffer_all_(size)                       _SAL2_Source_(_Outref_result_buffer_all_, (size),                     _Outref_result_buffer_to_(size, _Old_(size)))
+#define _Outref_result_bytebuffer_all_(size)                   _SAL2_Source_(_Outref_result_bytebuffer_all_, (size),                 _Outref_result_bytebuffer_to_(size, _Old_(size)))
+
+#define _Outref_result_buffer_maybenull_(size)                 _SAL2_Source_(_Outref_result_buffer_maybenull_, (size),               _Outref_result_maybenull_ _Post1_impl_(__cap_impl(size)))
+#define _Outref_result_bytebuffer_maybenull_(size)             _SAL2_Source_(_Outref_result_bytebuffer_maybenull_, (size),           _Outref_result_maybenull_ _Post1_impl_(__bytecap_impl(size)))
+#define _Outref_result_buffer_to_maybenull_(size, count)       _SAL2_Source_(_Outref_result_buffer_to_maybenull_, (size, count),     _Outref_result_buffer_maybenull_(size) _Post1_impl_(__count_impl(count)))
+#define _Outref_result_bytebuffer_to_maybenull_(size, count)   _SAL2_Source_(_Outref_result_bytebuffer_to_maybenull_, (size, count), _Outref_result_bytebuffer_maybenull_(size) _Post1_impl_(__bytecount_impl(count)))
+#define _Outref_result_buffer_all_maybenull_(size)             _SAL2_Source_(_Outref_result_buffer_all_maybenull_, (size),           _Outref_result_buffer_to_maybenull_(size, _Old_(size)))
+#define _Outref_result_bytebuffer_all_maybenull_(size)         _SAL2_Source_(_Outref_result_bytebuffer_all_maybenull_, (size),       _Outref_result_bytebuffer_to_maybenull_(size, _Old_(size)))
+
+// Annotations for output reference to pointer parameters that guarantee
+// that the pointer is set to NULL on failure.
+#define _Outref_result_nullonfailure_                          _SAL2_Source_(_Outref_result_nullonfailure_, (), _Outref_    _On_failure_(_Post_null_))
+
+// Generic annotations to set output value of a by-pointer or by-reference parameter to null/zero on failure.
+#define _Result_nullonfailure_                                 _SAL2_Source_(_Result_nullonfailure_, (), _On_failure_(_Notref_impl_ _Deref_impl_ _Post_null_))
+#define _Result_zeroonfailure_                                 _SAL2_Source_(_Result_zeroonfailure_, (), _On_failure_(_Notref_impl_ _Deref_impl_ _Out_range_(==, 0)))
+
+
+// return values -------------------------------
+
+//
+// _Ret_ annotations
+//
+// describing conditions that hold for return values after the call
+
+// e.g. _Ret_z_ CString::operator const WCHAR*() const throw();
+#define _Ret_z_                             _SAL2_Source_(_Ret_z_, (), _Ret2_impl_(__notnull_impl,  __zterm_impl) _Ret_valid_impl_)
+#define _Ret_maybenull_z_                   _SAL2_Source_(_Ret_maybenull_z_, (), _Ret2_impl_(__maybenull_impl,__zterm_impl) _Ret_valid_impl_)
+
+// used with allocated but not yet initialized objects
+#define _Ret_notnull_                       _SAL2_Source_(_Ret_notnull_, (), _Ret1_impl_(__notnull_impl))
+#define _Ret_maybenull_                     _SAL2_Source_(_Ret_maybenull_, (), _Ret1_impl_(__maybenull_impl))
+#define _Ret_null_                          _SAL2_Source_(_Ret_null_, (), _Ret1_impl_(__null_impl))
+
+// used with allocated and initialized objects
+//    returns single valid object
+#define _Ret_valid_                         _SAL2_Source_(_Ret_valid_, (), _Ret1_impl_(__notnull_impl_notref)   _Ret_valid_impl_)
+
+//    returns pointer to initialized buffer of specified size
+#define _Ret_writes_(size)                  _SAL2_Source_(_Ret_writes_, (size), _Ret2_impl_(__notnull_impl,  __count_impl(size))          _Ret_valid_impl_)
+#define _Ret_writes_z_(size)                _SAL2_Source_(_Ret_writes_z_, (size), _Ret3_impl_(__notnull_impl,  __count_impl(size), __zterm_impl) _Ret_valid_impl_)
+#define _Ret_writes_bytes_(size)            _SAL2_Source_(_Ret_writes_bytes_, (size), _Ret2_impl_(__notnull_impl,  __bytecount_impl(size))      _Ret_valid_impl_)
+#define _Ret_writes_maybenull_(size)        _SAL2_Source_(_Ret_writes_maybenull_, (size), _Ret2_impl_(__maybenull_impl,__count_impl(size))          _Ret_valid_impl_)
+#define _Ret_writes_maybenull_z_(size)      _SAL2_Source_(_Ret_writes_maybenull_z_, (size), _Ret3_impl_(__maybenull_impl,__count_impl(size),__zterm_impl)  _Ret_valid_impl_)
+#define _Ret_writes_bytes_maybenull_(size)  _SAL2_Source_(_Ret_writes_bytes_maybenull_, (size), _Ret2_impl_(__maybenull_impl,__bytecount_impl(size))      _Ret_valid_impl_)
+
+//    returns pointer to partially initialized buffer, with total size 'size' and initialized size 'count'
+#define _Ret_writes_to_(size,count)                   _SAL2_Source_(_Ret_writes_to_, (size,count), _Ret3_impl_(__notnull_impl,  __cap_impl(size),     __count_impl(count))     _Ret_valid_impl_)
+#define _Ret_writes_bytes_to_(size,count)             _SAL2_Source_(_Ret_writes_bytes_to_, (size,count), _Ret3_impl_(__notnull_impl,  __bytecap_impl(size), __bytecount_impl(count)) _Ret_valid_impl_)
+#define _Ret_writes_to_maybenull_(size,count)         _SAL2_Source_(_Ret_writes_to_maybenull_, (size,count), _Ret3_impl_(__maybenull_impl,  __cap_impl(size),     __count_impl(count))     _Ret_valid_impl_)
+#define _Ret_writes_bytes_to_maybenull_(size,count)   _SAL2_Source_(_Ret_writes_bytes_to_maybenull_, (size,count), _Ret3_impl_(__maybenull_impl,  __bytecap_impl(size), __bytecount_impl(count)) _Ret_valid_impl_)
+
+
+// Annotations for strict type checking
+#define _Points_to_data_        _SAL2_Source_(_Points_to_data_, (), _Pre_ _Points_to_data_impl_)
+#define _Literal_               _SAL2_Source_(_Literal_, (), _Pre_ _Literal_impl_)
+#define _Notliteral_            _SAL2_Source_(_Notliteral_, (), _Pre_ _Notliteral_impl_)
+
+// Check the return value of a function e.g. _Check_return_ ErrorCode Foo();
+#define _Check_return_           _SAL2_Source_(_Check_return_, (), _Check_return_impl_)
+#define _Must_inspect_result_    _SAL2_Source_(_Must_inspect_result_, (), _Must_inspect_impl_ _Check_return_impl_)
+
+// e.g. MyPrintF( _Printf_format_string_ const WCHAR* wzFormat, ... );
+#define _Printf_format_string_  _SAL2_Source_(_Printf_format_string_, (), _Printf_format_string_impl_)
+#define _Scanf_format_string_   _SAL2_Source_(_Scanf_format_string_, (), _Scanf_format_string_impl_)
+#define _Scanf_s_format_string_  _SAL2_Source_(_Scanf_s_format_string_, (), _Scanf_s_format_string_impl_)
+
+#define _Format_string_impl_(kind,where)  _SA_annotes2(SAL_IsFormatString2, kind, where)
+#define _Printf_format_string_params_(x)  _SAL2_Source_(_Printf_format_string_params_, (x), _Format_string_impl_("printf", x))
+#define _Scanf_format_string_params_(x)   _SAL2_Source_(_Scanf_format_string_params_, (x), _Format_string_impl_("scanf", x))
+#define _Scanf_s_format_string_params_(x) _SAL2_Source_(_Scanf_s_format_string_params_, (x), _Format_string_impl_("scanf_s", x))
+
+// annotations to express value of integral or pointer parameter
+#define _In_range_(lb,ub)           _SAL2_Source_(_In_range_, (lb,ub), _In_range_impl_(lb,ub))
+#define _Out_range_(lb,ub)          _SAL2_Source_(_Out_range_, (lb,ub), _Out_range_impl_(lb,ub))
+#define _Ret_range_(lb,ub)          _SAL2_Source_(_Ret_range_, (lb,ub), _Ret_range_impl_(lb,ub))
+#define _Deref_in_range_(lb,ub)     _SAL2_Source_(_Deref_in_range_, (lb,ub), _Deref_in_range_impl_(lb,ub))
+#define _Deref_out_range_(lb,ub)    _SAL2_Source_(_Deref_out_range_, (lb,ub), _Deref_out_range_impl_(lb,ub))
+#define _Deref_ret_range_(lb,ub)    _SAL2_Source_(_Deref_ret_range_, (lb,ub), _Deref_ret_range_impl_(lb,ub))
+#define _Pre_equal_to_(expr)        _SAL2_Source_(_Pre_equal_to_, (expr), _In_range_(==, expr))
+#define _Post_equal_to_(expr)       _SAL2_Source_(_Post_equal_to_, (expr), _Out_range_(==, expr))
+
+// annotation to express that a value (usually a field of a mutable class)
+// is not changed by a function call
+#define _Unchanged_(e)              _SAL2_Source_(_Unchanged_, (e), _At_(e, _Post_equal_to_(_Old_(e)) _Const_))
+
+// Annotations to allow expressing generalized pre and post conditions.
+// 'cond' may be any valid SAL expression that is considered to be true as a precondition
+// or postcondition (respsectively).
+#define _Pre_satisfies_(cond)       _SAL2_Source_(_Pre_satisfies_, (cond), _Pre_satisfies_impl_(cond))
+#define _Post_satisfies_(cond)      _SAL2_Source_(_Post_satisfies_, (cond), _Post_satisfies_impl_(cond))
+
+// Annotations to express struct, class and field invariants
+#define _Struct_size_bytes_(size)                  _SAL2_Source_(_Struct_size_bytes_, (size), _Writable_bytes_(size))
+
+#define _Field_size_(size)                         _SAL2_Source_(_Field_size_, (size), _Notnull_   _Writable_elements_(size))
+#define _Field_size_opt_(size)                     _SAL2_Source_(_Field_size_opt_, (size), _Maybenull_ _Writable_elements_(size))
+#define _Field_size_part_(size, count)             _SAL2_Source_(_Field_size_part_, (size, count), _Notnull_   _Writable_elements_(size) _Readable_elements_(count))
+#define _Field_size_part_opt_(size, count)         _SAL2_Source_(_Field_size_part_opt_, (size, count), _Maybenull_ _Writable_elements_(size) _Readable_elements_(count))
+#define _Field_size_full_(size)                    _SAL2_Source_(_Field_size_full_, (size), _Field_size_part_(size, size))
+#define _Field_size_full_opt_(size)                _SAL2_Source_(_Field_size_full_opt_, (size), _Field_size_part_opt_(size, size))
+
+#define _Field_size_bytes_(size)                   _SAL2_Source_(_Field_size_bytes_, (size), _Notnull_   _Writable_bytes_(size))
+#define _Field_size_bytes_opt_(size)               _SAL2_Source_(_Field_size_bytes_opt_, (size), _Maybenull_ _Writable_bytes_(size))
+#define _Field_size_bytes_part_(size, count)       _SAL2_Source_(_Field_size_bytes_part_, (size, count), _Notnull_   _Writable_bytes_(size) _Readable_bytes_(count))
+#define _Field_size_bytes_part_opt_(size, count)   _SAL2_Source_(_Field_size_bytes_part_opt_, (size, count), _Maybenull_ _Writable_bytes_(size) _Readable_bytes_(count))
+#define _Field_size_bytes_full_(size)              _SAL2_Source_(_Field_size_bytes_full_, (size), _Field_size_bytes_part_(size, size))
+#define _Field_size_bytes_full_opt_(size)          _SAL2_Source_(_Field_size_bytes_full_opt_, (size), _Field_size_bytes_part_opt_(size, size))
+
+#define _Field_z_                                  _SAL2_Source_(_Field_z_, (), _Null_terminated_)
+
+#define _Field_range_(min,max)                     _SAL2_Source_(_Field_range_, (min,max), _Field_range_impl_(min,max))
+
+//============================================================================
+//   _Pre_\_Post_ Layer:
+//============================================================================
+
+//
+// Raw Pre/Post for declaring custom pre/post conditions
+//
+
+#define _Pre_                             _Pre_impl_
+#define _Post_                            _Post_impl_
+
+//
+// Validity property
+//
+
+#define _Valid_                           _Valid_impl_
+#define _Notvalid_                        _Notvalid_impl_
+#define _Maybevalid_                      _Maybevalid_impl_
+
+//
+// Buffer size properties
+//
+
+// Expressing buffer sizes without specifying pre or post condition
+#define _Readable_bytes_(size)            _SAL2_Source_(_Readable_bytes_, (size), _Readable_bytes_impl_(size))
+#define _Readable_elements_(size)         _SAL2_Source_(_Readable_elements_, (size), _Readable_elements_impl_(size))
+#define _Writable_bytes_(size)            _SAL2_Source_(_Writable_bytes_, (size), _Writable_bytes_impl_(size))
+#define _Writable_elements_(size)         _SAL2_Source_(_Writable_elements_, (size), _Writable_elements_impl_(size))
+
+#define _Null_terminated_                 _SAL2_Source_(_Null_terminated_, (), _Null_terminated_impl_)
+#define _NullNull_terminated_             _SAL2_Source_(_NullNull_terminated_, (), _NullNull_terminated_impl_)
+
+// Expressing buffer size as pre or post condition
+#define _Pre_readable_size_(size)         _SAL2_Source_(_Pre_readable_size_, (size), _Pre1_impl_(__count_impl(size))      _Pre_valid_impl_)
+#define _Pre_writable_size_(size)         _SAL2_Source_(_Pre_writable_size_, (size), _Pre1_impl_(__cap_impl(size)))
+#define _Pre_readable_byte_size_(size)    _SAL2_Source_(_Pre_readable_byte_size_, (size), _Pre1_impl_(__bytecount_impl(size))  _Pre_valid_impl_)
+#define _Pre_writable_byte_size_(size)    _SAL2_Source_(_Pre_writable_byte_size_, (size), _Pre1_impl_(__bytecap_impl(size)))
+
+#define _Post_readable_size_(size)        _SAL2_Source_(_Post_readable_size_, (size), _Post1_impl_(__count_impl(size))     _Post_valid_impl_)
+#define _Post_writable_size_(size)        _SAL2_Source_(_Post_writable_size_, (size), _Post1_impl_(__cap_impl(size)))
+#define _Post_readable_byte_size_(size)   _SAL2_Source_(_Post_readable_byte_size_, (size), _Post1_impl_(__bytecount_impl(size)) _Post_valid_impl_)
+#define _Post_writable_byte_size_(size)   _SAL2_Source_(_Post_writable_byte_size_, (size), _Post1_impl_(__bytecap_impl(size)))
+
+//
+// Pointer null-ness properties
+//
+#define _Null_                            _Null_impl_
+#define _Notnull_                         _Notnull_impl_
+#define _Maybenull_                       _Maybenull_impl_
+
+//
+// _Pre_ annotations ---
+//
+// describing conditions that must be met before the call of the function
+
+// e.g. int strlen( _Pre_z_ const char* sz );
+// buffer is a zero terminated string
+#define _Pre_z_                           _SAL2_Source_(_Pre_z_, (), _Pre1_impl_(__zterm_impl) _Pre_valid_impl_)
+
+// valid size unknown or indicated by type (e.g.:LPSTR)
+#define _Pre_valid_                       _SAL2_Source_(_Pre_valid_, (), _Pre1_impl_(__notnull_impl_notref)   _Pre_valid_impl_)
+#define _Pre_opt_valid_                   _SAL2_Source_(_Pre_opt_valid_, (), _Pre1_impl_(__maybenull_impl_notref) _Pre_valid_impl_)
+
+#define _Pre_invalid_                     _SAL2_Source_(_Pre_invalid_, (), _Deref_pre1_impl_(__notvalid_impl))
+
+// Overrides recursive valid when some field is not yet initialized when using _Inout_
+#define _Pre_unknown_                     _SAL2_Source_(_Pre_unknown_, (), _Pre1_impl_(__maybevalid_impl))
+
+// used with allocated but not yet initialized objects
+#define _Pre_notnull_                     _SAL2_Source_(_Pre_notnull_, (), _Pre1_impl_(__notnull_impl_notref))
+#define _Pre_maybenull_                   _SAL2_Source_(_Pre_maybenull_, (), _Pre1_impl_(__maybenull_impl_notref))
+#define _Pre_null_                        _SAL2_Source_(_Pre_null_, (), _Pre1_impl_(__null_impl_notref))
+
+//
+// _Post_ annotations ---
+//
+// describing conditions that hold after the function call
+
+// void CopyStr( _In_z_ const char* szFrom, _Pre_cap_(cch) _Post_z_ char* szFrom, size_t cchFrom );
+// buffer will be a zero-terminated string after the call
+#define _Post_z_                         _SAL2_Source_(_Post_z_, (), _Post1_impl_(__zterm_impl) _Post_valid_impl_)
+
+// e.g. HRESULT InitStruct( _Post_valid_ Struct* pobj );
+#define _Post_valid_                     _SAL2_Source_(_Post_valid_, (), _Post_valid_impl_)
+#define _Post_invalid_                   _SAL2_Source_(_Post_invalid_, (), _Deref_post1_impl_(__notvalid_impl))
+
+// e.g. void free( _Post_ptr_invalid_ void* pv );
+#define _Post_ptr_invalid_               _SAL2_Source_(_Post_ptr_invalid_, (), _Post1_impl_(__notvalid_impl))
+
+// e.g. void ThrowExceptionIfNull( _Post_notnull_ const void* pv );
+#define _Post_notnull_                   _SAL2_Source_(_Post_notnull_, (), _Post1_impl_(__notnull_impl))
+
+// e.g. HRESULT GetObject(_Outptr_ _On_failure_(_At_(*p, _Post_null_)) T **p);
+#define _Post_null_                      _SAL2_Source_(_Post_null_, (), _Post1_impl_(__null_impl))
+
+#define _Post_maybenull_                 _SAL2_Source_(_Post_maybenull_, (), _Post1_impl_(__maybenull_impl))
+
+#define _Prepost_z_                      _SAL2_Source_(_Prepost_z_, (), _Pre_z_      _Post_z_)
+
+
+// #pragma region Input Buffer SAL 1 compatibility macros
+
+/*==========================================================================
+
+   This section contains definitions for macros defined for VS2010 and earlier.
+   Usage of these macros is still supported, but the SAL 2 macros defined above
+   are recommended instead.  This comment block is retained to assist in
+   understanding SAL that still uses the older syntax.
+
+   The macros are defined in 3 layers:
+
+   _In_\_Out_ Layer:
+   ----------------
+   This layer provides the highest abstraction and its macros should be used
+   in most cases. Its macros start with _In_, _Out_ or _Inout_. For the
+   typical case they provide the most concise annotations.
+
+   _Pre_\_Post_ Layer:
+   ------------------
+   The macros of this layer only should be used when there is no suitable macro
+   in the _In_\_Out_ layer. Its macros start with _Pre_, _Post_, _Ret_,
+   _Deref_pre_ _Deref_post_ and _Deref_ret_. This layer provides the most
+   flexibility for annotations.
+
+   Implementation Abstraction Layer:
+   --------------------------------
+   Macros from this layer should never be used directly. The layer only exists
+   to hide the implementation of the annotation macros.
+
+
+   Annotation Syntax:
+   |--------------|----------|----------------|-----------------------------|
+   |   Usage      | Nullness | ZeroTerminated |  Extent                     |
+   |--------------|----------|----------------|-----------------------------|
+   | _In_         | <>       | <>             | <>                          |
+   | _Out_        | opt_     | z_             | [byte]cap_[c_|x_]( size )   |
+   | _Inout_      |          |                | [byte]count_[c_|x_]( size ) |
+   | _Deref_out_  |          |                | ptrdiff_cap_( ptr )         |
+   |--------------|          |                | ptrdiff_count_( ptr )       |
+   | _Ret_        |          |                |                             |
+   | _Deref_ret_  |          |                |                             |
+   |--------------|          |                |                             |
+   | _Pre_        |          |                |                             |
+   | _Post_       |          |                |                             |
+   | _Deref_pre_  |          |                |                             |
+   | _Deref_post_ |          |                |                             |
+   |--------------|----------|----------------|-----------------------------|
+
+   Usage:
+   -----
+   _In_, _Out_, _Inout_, _Pre_, _Post_, _Deref_pre_, _Deref_post_ are for
+   formal parameters.
+   _Ret_, _Deref_ret_ must be used for return values.
+
+   Nullness:
+   --------
+   If the pointer can be NULL the annotation contains _opt. If the macro
+   does not contain '_opt' the pointer may not be NULL.
+
+   String Type:
+   -----------
+   _z: NullTerminated string
+   for _In_ parameters the buffer must have the specified stringtype before the call
+   for _Out_ parameters the buffer must have the specified stringtype after the call
+   for _Inout_ parameters both conditions apply
+
+   Extent Syntax:
+   |------|---------------|---------------|
+   | Unit | Writ\Readable | Argument Type |
+   |------|---------------|---------------|
+   |  <>  | cap_          | <>            |
+   | byte | count_        | c_            |
+   |      |               | x_            |
+   |------|---------------|---------------|
+
+   'cap' (capacity) describes the writable size of the buffer and is typically used
+   with _Out_. The default unit is elements. Use 'bytecap' if the size is given in bytes
+   'count' describes the readable size of the buffer and is typically used with _In_.
+   The default unit is elements. Use 'bytecount' if the size is given in bytes.
+   
+   Argument syntax for cap_, bytecap_, count_, bytecount_:
+   (<parameter>|return)[+n]  e.g. cch, return, cb+2
+   
+   If the buffer size is a constant expression use the c_ postfix.
+   E.g. cap_c_(20), count_c_(MAX_PATH), bytecount_c_(16)
+
+   If the buffer size is given by a limiting pointer use the ptrdiff_ versions
+   of the macros.
+
+   If the buffer size is neither a parameter nor a constant expression use the x_
+   postfix. e.g. bytecount_x_(num*size) x_ annotations accept any arbitrary string.
+   No analysis can be done for x_ annotations but they at least tell the tool that
+   the buffer has some sort of extent description. x_ annotations might be supported
+   by future compiler versions.
+
+============================================================================*/
+
+// e.g. void SetCharRange( _In_count_(cch) const char* rgch, size_t cch )
+// valid buffer extent described by another parameter
+#define _In_count_(size)               _SAL1_1_Source_(_In_count_, (size), _Pre_count_(size)         _Deref_pre_readonly_)
+#define _In_opt_count_(size)           _SAL1_1_Source_(_In_opt_count_, (size), _Pre_opt_count_(size)     _Deref_pre_readonly_)
+#define _In_bytecount_(size)           _SAL1_1_Source_(_In_bytecount_, (size), _Pre_bytecount_(size)     _Deref_pre_readonly_)
+#define _In_opt_bytecount_(size)       _SAL1_1_Source_(_In_opt_bytecount_, (size), _Pre_opt_bytecount_(size) _Deref_pre_readonly_)
+
+// valid buffer extent described by a constant extression
+#define _In_count_c_(size)             _SAL1_1_Source_(_In_count_c_, (size), _Pre_count_c_(size)         _Deref_pre_readonly_)
+#define _In_opt_count_c_(size)         _SAL1_1_Source_(_In_opt_count_c_, (size), _Pre_opt_count_c_(size)     _Deref_pre_readonly_)
+#define _In_bytecount_c_(size)         _SAL1_1_Source_(_In_bytecount_c_, (size), _Pre_bytecount_c_(size)     _Deref_pre_readonly_)
+#define _In_opt_bytecount_c_(size)     _SAL1_1_Source_(_In_opt_bytecount_c_, (size), _Pre_opt_bytecount_c_(size) _Deref_pre_readonly_)
+
+// nullterminated  'input' buffers with given size
+
+// e.g. void SetCharRange( _In_count_(cch) const char* rgch, size_t cch )
+// nullterminated valid buffer extent described by another parameter
+#define _In_z_count_(size)               _SAL1_1_Source_(_In_z_count_, (size), _Pre_z_ _Pre_count_(size)         _Deref_pre_readonly_)
+#define _In_opt_z_count_(size)           _SAL1_1_Source_(_In_opt_z_count_, (size), _Pre_opt_z_ _Pre_opt_count_(size)     _Deref_pre_readonly_)
+#define _In_z_bytecount_(size)           _SAL1_1_Source_(_In_z_bytecount_, (size), _Pre_z_ _Pre_bytecount_(size)     _Deref_pre_readonly_)
+#define _In_opt_z_bytecount_(size)       _SAL1_1_Source_(_In_opt_z_bytecount_, (size), _Pre_opt_z_ _Pre_opt_bytecount_(size) _Deref_pre_readonly_)
+
+// nullterminated valid buffer extent described by a constant extression
+#define _In_z_count_c_(size)             _SAL1_1_Source_(_In_z_count_c_, (size), _Pre_z_ _Pre_count_c_(size)         _Deref_pre_readonly_)
+#define _In_opt_z_count_c_(size)         _SAL1_1_Source_(_In_opt_z_count_c_, (size), _Pre_opt_z_ _Pre_opt_count_c_(size)     _Deref_pre_readonly_)
+#define _In_z_bytecount_c_(size)         _SAL1_1_Source_(_In_z_bytecount_c_, (size), _Pre_z_ _Pre_bytecount_c_(size)     _Deref_pre_readonly_)
+#define _In_opt_z_bytecount_c_(size)     _SAL1_1_Source_(_In_opt_z_bytecount_c_, (size), _Pre_opt_z_ _Pre_opt_bytecount_c_(size) _Deref_pre_readonly_)
+
+// buffer capacity is described by another pointer
+// e.g. void Foo( _In_ptrdiff_count_(pchMax) const char* pch, const char* pchMax ) { while pch < pchMax ) pch++; }
+#define _In_ptrdiff_count_(size)       _SAL1_1_Source_(_In_ptrdiff_count_, (size), _Pre_ptrdiff_count_(size)     _Deref_pre_readonly_)
+#define _In_opt_ptrdiff_count_(size)   _SAL1_1_Source_(_In_opt_ptrdiff_count_, (size), _Pre_opt_ptrdiff_count_(size) _Deref_pre_readonly_)
+
+// 'x' version for complex expressions that are not supported by the current compiler version
+// e.g. void Set3ColMatrix( _In_count_x_(3*cRows) const Elem* matrix, int cRows );
+#define _In_count_x_(size)             _SAL1_1_Source_(_In_count_x_, (size), _Pre_count_x_(size)         _Deref_pre_readonly_)
+#define _In_opt_count_x_(size)         _SAL1_1_Source_(_In_opt_count_x_, (size), _Pre_opt_count_x_(size)     _Deref_pre_readonly_)
+#define _In_bytecount_x_(size)         _SAL1_1_Source_(_In_bytecount_x_, (size), _Pre_bytecount_x_(size)     _Deref_pre_readonly_)
+#define _In_opt_bytecount_x_(size)     _SAL1_1_Source_(_In_opt_bytecount_x_, (size), _Pre_opt_bytecount_x_(size) _Deref_pre_readonly_)
+
+
+// 'out' with buffer size
+// e.g. void GetIndeces( _Out_cap_(cIndeces) int* rgIndeces, size_t cIndices );
+// buffer capacity is described by another parameter
+#define _Out_cap_(size)                   _SAL1_1_Source_(_Out_cap_, (size), _Pre_cap_(size)           _Post_valid_impl_)
+#define _Out_opt_cap_(size)               _SAL1_1_Source_(_Out_opt_cap_, (size), _Pre_opt_cap_(size)       _Post_valid_impl_)
+#define _Out_bytecap_(size)               _SAL1_1_Source_(_Out_bytecap_, (size), _Pre_bytecap_(size)       _Post_valid_impl_)
+#define _Out_opt_bytecap_(size)           _SAL1_1_Source_(_Out_opt_bytecap_, (size), _Pre_opt_bytecap_(size)   _Post_valid_impl_)
+
+// buffer capacity is described by a constant expression
+#define _Out_cap_c_(size)                 _SAL1_1_Source_(_Out_cap_c_, (size), _Pre_cap_c_(size)         _Post_valid_impl_)
+#define _Out_opt_cap_c_(size)             _SAL1_1_Source_(_Out_opt_cap_c_, (size), _Pre_opt_cap_c_(size)     _Post_valid_impl_)
+#define _Out_bytecap_c_(size)             _SAL1_1_Source_(_Out_bytecap_c_, (size), _Pre_bytecap_c_(size)     _Post_valid_impl_)
+#define _Out_opt_bytecap_c_(size)         _SAL1_1_Source_(_Out_opt_bytecap_c_, (size), _Pre_opt_bytecap_c_(size) _Post_valid_impl_)
+
+// buffer capacity is described by another parameter multiplied by a constant expression
+#define _Out_cap_m_(mult,size)            _SAL1_1_Source_(_Out_cap_m_, (mult,size), _Pre_cap_m_(mult,size)     _Post_valid_impl_)
+#define _Out_opt_cap_m_(mult,size)        _SAL1_1_Source_(_Out_opt_cap_m_, (mult,size), _Pre_opt_cap_m_(mult,size) _Post_valid_impl_)
+#define _Out_z_cap_m_(mult,size)          _SAL1_1_Source_(_Out_z_cap_m_, (mult,size), _Pre_cap_m_(mult,size)     _Post_valid_impl_ _Post_z_)
+#define _Out_opt_z_cap_m_(mult,size)      _SAL1_1_Source_(_Out_opt_z_cap_m_, (mult,size), _Pre_opt_cap_m_(mult,size) _Post_valid_impl_ _Post_z_)
+
+// buffer capacity is described by another pointer
+// e.g. void Foo( _Out_ptrdiff_cap_(pchMax) char* pch, const char* pchMax ) { while pch < pchMax ) pch++; }
+#define _Out_ptrdiff_cap_(size)           _SAL1_1_Source_(_Out_ptrdiff_cap_, (size), _Pre_ptrdiff_cap_(size)     _Post_valid_impl_)
+#define _Out_opt_ptrdiff_cap_(size)       _SAL1_1_Source_(_Out_opt_ptrdiff_cap_, (size), _Pre_opt_ptrdiff_cap_(size) _Post_valid_impl_)
+
+// buffer capacity is described by a complex expression
+#define _Out_cap_x_(size)                 _SAL1_1_Source_(_Out_cap_x_, (size), _Pre_cap_x_(size)         _Post_valid_impl_)
+#define _Out_opt_cap_x_(size)             _SAL1_1_Source_(_Out_opt_cap_x_, (size), _Pre_opt_cap_x_(size)     _Post_valid_impl_)
+#define _Out_bytecap_x_(size)             _SAL1_1_Source_(_Out_bytecap_x_, (size), _Pre_bytecap_x_(size)     _Post_valid_impl_)
+#define _Out_opt_bytecap_x_(size)         _SAL1_1_Source_(_Out_opt_bytecap_x_, (size), _Pre_opt_bytecap_x_(size) _Post_valid_impl_)
+
+// a zero terminated string is filled into a buffer of given capacity
+// e.g. void CopyStr( _In_z_ const char* szFrom, _Out_z_cap_(cchTo) char* szTo, size_t cchTo );
+// buffer capacity is described by another parameter
+#define _Out_z_cap_(size)                 _SAL1_1_Source_(_Out_z_cap_, (size), _Pre_cap_(size)           _Post_valid_impl_ _Post_z_)
+#define _Out_opt_z_cap_(size)             _SAL1_1_Source_(_Out_opt_z_cap_, (size), _Pre_opt_cap_(size)       _Post_valid_impl_ _Post_z_)
+#define _Out_z_bytecap_(size)             _SAL1_1_Source_(_Out_z_bytecap_, (size), _Pre_bytecap_(size)       _Post_valid_impl_ _Post_z_)
+#define _Out_opt_z_bytecap_(size)         _SAL1_1_Source_(_Out_opt_z_bytecap_, (size), _Pre_opt_bytecap_(size)   _Post_valid_impl_ _Post_z_)
+
+// buffer capacity is described by a constant expression
+#define _Out_z_cap_c_(size)               _SAL1_1_Source_(_Out_z_cap_c_, (size), _Pre_cap_c_(size)         _Post_valid_impl_ _Post_z_)
+#define _Out_opt_z_cap_c_(size)           _SAL1_1_Source_(_Out_opt_z_cap_c_, (size), _Pre_opt_cap_c_(size)     _Post_valid_impl_ _Post_z_)
+#define _Out_z_bytecap_c_(size)           _SAL1_1_Source_(_Out_z_bytecap_c_, (size), _Pre_bytecap_c_(size)     _Post_valid_impl_ _Post_z_)
+#define _Out_opt_z_bytecap_c_(size)       _SAL1_1_Source_(_Out_opt_z_bytecap_c_, (size), _Pre_opt_bytecap_c_(size) _Post_valid_impl_ _Post_z_)
+
+// buffer capacity is described by a complex expression
+#define _Out_z_cap_x_(size)               _SAL1_1_Source_(_Out_z_cap_x_, (size), _Pre_cap_x_(size)         _Post_valid_impl_ _Post_z_)
+#define _Out_opt_z_cap_x_(size)           _SAL1_1_Source_(_Out_opt_z_cap_x_, (size), _Pre_opt_cap_x_(size)     _Post_valid_impl_ _Post_z_)
+#define _Out_z_bytecap_x_(size)           _SAL1_1_Source_(_Out_z_bytecap_x_, (size), _Pre_bytecap_x_(size)     _Post_valid_impl_ _Post_z_)
+#define _Out_opt_z_bytecap_x_(size)       _SAL1_1_Source_(_Out_opt_z_bytecap_x_, (size), _Pre_opt_bytecap_x_(size) _Post_valid_impl_ _Post_z_)
+
+// a zero terminated string is filled into a buffer of given capacity
+// e.g. size_t CopyCharRange( _In_count_(cchFrom) const char* rgchFrom, size_t cchFrom, _Out_cap_post_count_(cchTo,return)) char* rgchTo, size_t cchTo );
+#define _Out_cap_post_count_(cap,count)                _SAL1_1_Source_(_Out_cap_post_count_, (cap,count), _Pre_cap_(cap)         _Post_valid_impl_ _Post_count_(count))
+#define _Out_opt_cap_post_count_(cap,count)            _SAL1_1_Source_(_Out_opt_cap_post_count_, (cap,count), _Pre_opt_cap_(cap)     _Post_valid_impl_ _Post_count_(count))
+#define _Out_bytecap_post_bytecount_(cap,count)        _SAL1_1_Source_(_Out_bytecap_post_bytecount_, (cap,count), _Pre_bytecap_(cap)     _Post_valid_impl_ _Post_bytecount_(count))
+#define _Out_opt_bytecap_post_bytecount_(cap,count)    _SAL1_1_Source_(_Out_opt_bytecap_post_bytecount_, (cap,count), _Pre_opt_bytecap_(cap) _Post_valid_impl_ _Post_bytecount_(count))
+
+// a zero terminated string is filled into a buffer of given capacity
+// e.g. size_t CopyStr( _In_z_ const char* szFrom, _Out_z_cap_post_count_(cchTo,return+1) char* szTo, size_t cchTo );
+#define _Out_z_cap_post_count_(cap,count)               _SAL1_1_Source_(_Out_z_cap_post_count_, (cap,count), _Pre_cap_(cap)         _Post_valid_impl_ _Post_z_count_(count))
+#define _Out_opt_z_cap_post_count_(cap,count)           _SAL1_1_Source_(_Out_opt_z_cap_post_count_, (cap,count), _Pre_opt_cap_(cap)     _Post_valid_impl_ _Post_z_count_(count))
+#define _Out_z_bytecap_post_bytecount_(cap,count)       _SAL1_1_Source_(_Out_z_bytecap_post_bytecount_, (cap,count), _Pre_bytecap_(cap)     _Post_valid_impl_ _Post_z_bytecount_(count))
+#define _Out_opt_z_bytecap_post_bytecount_(cap,count)   _SAL1_1_Source_(_Out_opt_z_bytecap_post_bytecount_, (cap,count), _Pre_opt_bytecap_(cap) _Post_valid_impl_ _Post_z_bytecount_(count))
+
+// only use with dereferenced arguments e.g. '*pcch' 
+#define _Out_capcount_(capcount)             _SAL1_1_Source_(_Out_capcount_, (capcount), _Pre_cap_(capcount)         _Post_valid_impl_ _Post_count_(capcount))
+#define _Out_opt_capcount_(capcount)         _SAL1_1_Source_(_Out_opt_capcount_, (capcount), _Pre_opt_cap_(capcount)     _Post_valid_impl_ _Post_count_(capcount))
+#define _Out_bytecapcount_(capcount)         _SAL1_1_Source_(_Out_bytecapcount_, (capcount), _Pre_bytecap_(capcount)     _Post_valid_impl_ _Post_bytecount_(capcount))
+#define _Out_opt_bytecapcount_(capcount)     _SAL1_1_Source_(_Out_opt_bytecapcount_, (capcount), _Pre_opt_bytecap_(capcount) _Post_valid_impl_ _Post_bytecount_(capcount))
+
+#define _Out_capcount_x_(capcount)           _SAL1_1_Source_(_Out_capcount_x_, (capcount), _Pre_cap_x_(capcount)         _Post_valid_impl_ _Post_count_x_(capcount))
+#define _Out_opt_capcount_x_(capcount)       _SAL1_1_Source_(_Out_opt_capcount_x_, (capcount), _Pre_opt_cap_x_(capcount)     _Post_valid_impl_ _Post_count_x_(capcount))
+#define _Out_bytecapcount_x_(capcount)       _SAL1_1_Source_(_Out_bytecapcount_x_, (capcount), _Pre_bytecap_x_(capcount)     _Post_valid_impl_ _Post_bytecount_x_(capcount))
+#define _Out_opt_bytecapcount_x_(capcount)   _SAL1_1_Source_(_Out_opt_bytecapcount_x_, (capcount), _Pre_opt_bytecap_x_(capcount) _Post_valid_impl_ _Post_bytecount_x_(capcount))
+
+// e.g. GetString( _Out_z_capcount_(*pLen+1) char* sz, size_t* pLen );
+#define _Out_z_capcount_(capcount)           _SAL1_1_Source_(_Out_z_capcount_, (capcount), _Pre_cap_(capcount)         _Post_valid_impl_ _Post_z_count_(capcount))
+#define _Out_opt_z_capcount_(capcount)       _SAL1_1_Source_(_Out_opt_z_capcount_, (capcount), _Pre_opt_cap_(capcount)     _Post_valid_impl_ _Post_z_count_(capcount))
+#define _Out_z_bytecapcount_(capcount)       _SAL1_1_Source_(_Out_z_bytecapcount_, (capcount), _Pre_bytecap_(capcount)     _Post_valid_impl_ _Post_z_bytecount_(capcount))
+#define _Out_opt_z_bytecapcount_(capcount)   _SAL1_1_Source_(_Out_opt_z_bytecapcount_, (capcount), _Pre_opt_bytecap_(capcount) _Post_valid_impl_ _Post_z_bytecount_(capcount))
+
+
+// 'inout' buffers with initialized elements before and after the call
+// e.g. void ModifyIndices( _Inout_count_(cIndices) int* rgIndeces, size_t cIndices );
+#define _Inout_count_(size)               _SAL1_1_Source_(_Inout_count_, (size), _Prepost_count_(size))
+#define _Inout_opt_count_(size)           _SAL1_1_Source_(_Inout_opt_count_, (size), _Prepost_opt_count_(size))
+#define _Inout_bytecount_(size)           _SAL1_1_Source_(_Inout_bytecount_, (size), _Prepost_bytecount_(size))
+#define _Inout_opt_bytecount_(size)       _SAL1_1_Source_(_Inout_opt_bytecount_, (size), _Prepost_opt_bytecount_(size))
+
+#define _Inout_count_c_(size)             _SAL1_1_Source_(_Inout_count_c_, (size), _Prepost_count_c_(size))
+#define _Inout_opt_count_c_(size)         _SAL1_1_Source_(_Inout_opt_count_c_, (size), _Prepost_opt_count_c_(size))
+#define _Inout_bytecount_c_(size)         _SAL1_1_Source_(_Inout_bytecount_c_, (size), _Prepost_bytecount_c_(size))
+#define _Inout_opt_bytecount_c_(size)     _SAL1_1_Source_(_Inout_opt_bytecount_c_, (size), _Prepost_opt_bytecount_c_(size))
+
+// nullterminated 'inout' buffers with initialized elements before and after the call
+// e.g. void ModifyIndices( _Inout_count_(cIndices) int* rgIndeces, size_t cIndices );
+#define _Inout_z_count_(size)               _SAL1_1_Source_(_Inout_z_count_, (size), _Prepost_z_ _Prepost_count_(size))
+#define _Inout_opt_z_count_(size)           _SAL1_1_Source_(_Inout_opt_z_count_, (size), _Prepost_z_ _Prepost_opt_count_(size))
+#define _Inout_z_bytecount_(size)           _SAL1_1_Source_(_Inout_z_bytecount_, (size), _Prepost_z_ _Prepost_bytecount_(size))
+#define _Inout_opt_z_bytecount_(size)       _SAL1_1_Source_(_Inout_opt_z_bytecount_, (size), _Prepost_z_ _Prepost_opt_bytecount_(size))
+
+#define _Inout_z_count_c_(size)             _SAL1_1_Source_(_Inout_z_count_c_, (size), _Prepost_z_ _Prepost_count_c_(size))
+#define _Inout_opt_z_count_c_(size)         _SAL1_1_Source_(_Inout_opt_z_count_c_, (size), _Prepost_z_ _Prepost_opt_count_c_(size))
+#define _Inout_z_bytecount_c_(size)         _SAL1_1_Source_(_Inout_z_bytecount_c_, (size), _Prepost_z_ _Prepost_bytecount_c_(size))
+#define _Inout_opt_z_bytecount_c_(size)     _SAL1_1_Source_(_Inout_opt_z_bytecount_c_, (size), _Prepost_z_ _Prepost_opt_bytecount_c_(size))
+
+#define _Inout_ptrdiff_count_(size)       _SAL1_1_Source_(_Inout_ptrdiff_count_, (size), _Pre_ptrdiff_count_(size))
+#define _Inout_opt_ptrdiff_count_(size)   _SAL1_1_Source_(_Inout_opt_ptrdiff_count_, (size), _Pre_opt_ptrdiff_count_(size))
+
+#define _Inout_count_x_(size)             _SAL1_1_Source_(_Inout_count_x_, (size), _Prepost_count_x_(size))
+#define _Inout_opt_count_x_(size)         _SAL1_1_Source_(_Inout_opt_count_x_, (size), _Prepost_opt_count_x_(size))
+#define _Inout_bytecount_x_(size)         _SAL1_1_Source_(_Inout_bytecount_x_, (size), _Prepost_bytecount_x_(size))
+#define _Inout_opt_bytecount_x_(size)     _SAL1_1_Source_(_Inout_opt_bytecount_x_, (size), _Prepost_opt_bytecount_x_(size))
+
+// e.g. void AppendToLPSTR( _In_ LPCSTR szFrom, _Inout_cap_(cchTo) LPSTR* szTo, size_t cchTo );
+#define _Inout_cap_(size)                 _SAL1_1_Source_(_Inout_cap_, (size), _Pre_valid_cap_(size)           _Post_valid_)
+#define _Inout_opt_cap_(size)             _SAL1_1_Source_(_Inout_opt_cap_, (size), _Pre_opt_valid_cap_(size)       _Post_valid_)
+#define _Inout_bytecap_(size)             _SAL1_1_Source_(_Inout_bytecap_, (size), _Pre_valid_bytecap_(size)       _Post_valid_)
+#define _Inout_opt_bytecap_(size)         _SAL1_1_Source_(_Inout_opt_bytecap_, (size), _Pre_opt_valid_bytecap_(size)   _Post_valid_)
+
+#define _Inout_cap_c_(size)               _SAL1_1_Source_(_Inout_cap_c_, (size), _Pre_valid_cap_c_(size)         _Post_valid_)
+#define _Inout_opt_cap_c_(size)           _SAL1_1_Source_(_Inout_opt_cap_c_, (size), _Pre_opt_valid_cap_c_(size)     _Post_valid_)
+#define _Inout_bytecap_c_(size)           _SAL1_1_Source_(_Inout_bytecap_c_, (size), _Pre_valid_bytecap_c_(size)     _Post_valid_)
+#define _Inout_opt_bytecap_c_(size)       _SAL1_1_Source_(_Inout_opt_bytecap_c_, (size), _Pre_opt_valid_bytecap_c_(size) _Post_valid_)
+
+#define _Inout_cap_x_(size)               _SAL1_1_Source_(_Inout_cap_x_, (size), _Pre_valid_cap_x_(size)         _Post_valid_)
+#define _Inout_opt_cap_x_(size)           _SAL1_1_Source_(_Inout_opt_cap_x_, (size), _Pre_opt_valid_cap_x_(size)     _Post_valid_)
+#define _Inout_bytecap_x_(size)           _SAL1_1_Source_(_Inout_bytecap_x_, (size), _Pre_valid_bytecap_x_(size)     _Post_valid_)
+#define _Inout_opt_bytecap_x_(size)       _SAL1_1_Source_(_Inout_opt_bytecap_x_, (size), _Pre_opt_valid_bytecap_x_(size) _Post_valid_)
+
+// inout string buffers with writable size
+// e.g. void AppendStr( _In_z_ const char* szFrom, _Inout_z_cap_(cchTo) char* szTo, size_t cchTo );
+#define _Inout_z_cap_(size)                  _SAL1_1_Source_(_Inout_z_cap_, (size), _Pre_z_cap_(size)            _Post_z_)
+#define _Inout_opt_z_cap_(size)              _SAL1_1_Source_(_Inout_opt_z_cap_, (size), _Pre_opt_z_cap_(size)        _Post_z_)
+#define _Inout_z_bytecap_(size)              _SAL1_1_Source_(_Inout_z_bytecap_, (size), _Pre_z_bytecap_(size)        _Post_z_)
+#define _Inout_opt_z_bytecap_(size)          _SAL1_1_Source_(_Inout_opt_z_bytecap_, (size), _Pre_opt_z_bytecap_(size)    _Post_z_)
+
+#define _Inout_z_cap_c_(size)                _SAL1_1_Source_(_Inout_z_cap_c_, (size), _Pre_z_cap_c_(size)          _Post_z_)
+#define _Inout_opt_z_cap_c_(size)            _SAL1_1_Source_(_Inout_opt_z_cap_c_, (size), _Pre_opt_z_cap_c_(size)      _Post_z_)
+#define _Inout_z_bytecap_c_(size)            _SAL1_1_Source_(_Inout_z_bytecap_c_, (size), _Pre_z_bytecap_c_(size)      _Post_z_)
+#define _Inout_opt_z_bytecap_c_(size)        _SAL1_1_Source_(_Inout_opt_z_bytecap_c_, (size), _Pre_opt_z_bytecap_c_(size)  _Post_z_)
+
+#define _Inout_z_cap_x_(size)                _SAL1_1_Source_(_Inout_z_cap_x_, (size), _Pre_z_cap_x_(size)          _Post_z_)
+#define _Inout_opt_z_cap_x_(size)            _SAL1_1_Source_(_Inout_opt_z_cap_x_, (size), _Pre_opt_z_cap_x_(size)      _Post_z_)
+#define _Inout_z_bytecap_x_(size)            _SAL1_1_Source_(_Inout_z_bytecap_x_, (size), _Pre_z_bytecap_x_(size)      _Post_z_)
+#define _Inout_opt_z_bytecap_x_(size)        _SAL1_1_Source_(_Inout_opt_z_bytecap_x_, (size), _Pre_opt_z_bytecap_x_(size)  _Post_z_)
+
+
+// returning pointers to valid objects
+#define _Ret_                   _SAL1_1_Source_(_Ret_, (), _Ret_valid_)
+#define _Ret_opt_               _SAL1_1_Source_(_Ret_opt_, (), _Ret_opt_valid_)
+
+// annotations to express 'boundedness' of integral value parameter
+#define _In_bound_           _SAL1_1_Source_(_In_bound_, (), _In_bound_impl_)
+#define _Out_bound_          _SAL1_1_Source_(_Out_bound_, (), _Out_bound_impl_)
+#define _Ret_bound_          _SAL1_1_Source_(_Ret_bound_, (), _Ret_bound_impl_)
+#define _Deref_in_bound_     _SAL1_1_Source_(_Deref_in_bound_, (), _Deref_in_bound_impl_)
+#define _Deref_out_bound_    _SAL1_1_Source_(_Deref_out_bound_, (), _Deref_out_bound_impl_)
+#define _Deref_inout_bound_  _SAL1_1_Source_(_Deref_inout_bound_, (), _Deref_in_bound_ _Deref_out_bound_)
+#define _Deref_ret_bound_    _SAL1_1_Source_(_Deref_ret_bound_, (), _Deref_ret_bound_impl_)
+
+// e.g.  HRESULT HrCreatePoint( _Deref_out_opt_ POINT** ppPT );
+#define _Deref_out_             _SAL1_1_Source_(_Deref_out_, (), _Out_ _Deref_post_valid_)
+#define _Deref_out_opt_         _SAL1_1_Source_(_Deref_out_opt_, (), _Out_ _Deref_post_opt_valid_)
+#define _Deref_opt_out_         _SAL1_1_Source_(_Deref_opt_out_, (), _Out_opt_ _Deref_post_valid_)
+#define _Deref_opt_out_opt_     _SAL1_1_Source_(_Deref_opt_out_opt_, (), _Out_opt_ _Deref_post_opt_valid_)
+
+// e.g.  void CloneString( _In_z_ const WCHAR* wzFrom, _Deref_out_z_ WCHAR** pWzTo );
+#define _Deref_out_z_           _SAL1_1_Source_(_Deref_out_z_, (), _Out_ _Deref_post_z_)
+#define _Deref_out_opt_z_       _SAL1_1_Source_(_Deref_out_opt_z_, (), _Out_ _Deref_post_opt_z_)
+#define _Deref_opt_out_z_       _SAL1_1_Source_(_Deref_opt_out_z_, (), _Out_opt_ _Deref_post_z_)
+#define _Deref_opt_out_opt_z_   _SAL1_1_Source_(_Deref_opt_out_opt_z_, (), _Out_opt_ _Deref_post_opt_z_)
+
+//
+// _Deref_pre_ ---
+//
+// describing conditions for array elements of dereferenced pointer parameters that must be met before the call
+
+// e.g. void SaveStringArray( _In_count_(cStrings) _Deref_pre_z_ const WCHAR* const rgpwch[] );
+#define _Deref_pre_z_                           _SAL1_1_Source_(_Deref_pre_z_, (), _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__zterm_impl) _Pre_valid_impl_)
+#define _Deref_pre_opt_z_                       _SAL1_1_Source_(_Deref_pre_opt_z_, (), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__zterm_impl) _Pre_valid_impl_)
+
+// e.g. void FillInArrayOfStr32( _In_count_(cStrings) _Deref_pre_cap_c_(32) _Deref_post_z_ WCHAR* const rgpwch[] );
+// buffer capacity is described by another parameter
+#define _Deref_pre_cap_(size)                   _SAL1_1_Source_(_Deref_pre_cap_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__cap_impl(size)))
+#define _Deref_pre_opt_cap_(size)               _SAL1_1_Source_(_Deref_pre_opt_cap_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_impl(size)))
+#define _Deref_pre_bytecap_(size)               _SAL1_1_Source_(_Deref_pre_bytecap_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__bytecap_impl(size)))
+#define _Deref_pre_opt_bytecap_(size)           _SAL1_1_Source_(_Deref_pre_opt_bytecap_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecap_impl(size)))
+
+// buffer capacity is described by a constant expression
+#define _Deref_pre_cap_c_(size)                 _SAL1_1_Source_(_Deref_pre_cap_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__cap_c_impl(size)))
+#define _Deref_pre_opt_cap_c_(size)             _SAL1_1_Source_(_Deref_pre_opt_cap_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_c_impl(size)))
+#define _Deref_pre_bytecap_c_(size)             _SAL1_1_Source_(_Deref_pre_bytecap_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__bytecap_c_impl(size)))
+#define _Deref_pre_opt_bytecap_c_(size)         _SAL1_1_Source_(_Deref_pre_opt_bytecap_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecap_c_impl(size)))
+
+// buffer capacity is described by a complex condition
+#define _Deref_pre_cap_x_(size)                 _SAL1_1_Source_(_Deref_pre_cap_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__cap_x_impl(size)))
+#define _Deref_pre_opt_cap_x_(size)             _SAL1_1_Source_(_Deref_pre_opt_cap_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_x_impl(size)))
+#define _Deref_pre_bytecap_x_(size)             _SAL1_1_Source_(_Deref_pre_bytecap_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__bytecap_x_impl(size)))
+#define _Deref_pre_opt_bytecap_x_(size)         _SAL1_1_Source_(_Deref_pre_opt_bytecap_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecap_x_impl(size)))
+
+// convenience macros for nullterminated buffers with given capacity
+#define _Deref_pre_z_cap_(size)                 _SAL1_1_Source_(_Deref_pre_z_cap_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre2_impl_(__zterm_impl,__cap_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_opt_z_cap_(size)             _SAL1_1_Source_(_Deref_pre_opt_z_cap_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__cap_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_z_bytecap_(size)             _SAL1_1_Source_(_Deref_pre_z_bytecap_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre2_impl_(__zterm_impl,__bytecap_impl(size)) _Pre_valid_impl_)
+#define _Deref_pre_opt_z_bytecap_(size)         _SAL1_1_Source_(_Deref_pre_opt_z_bytecap_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__bytecap_impl(size)) _Pre_valid_impl_)
+
+#define _Deref_pre_z_cap_c_(size)               _SAL1_1_Source_(_Deref_pre_z_cap_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre2_impl_(__zterm_impl,__cap_c_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_opt_z_cap_c_(size)           _SAL1_1_Source_(_Deref_pre_opt_z_cap_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__cap_c_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_z_bytecap_c_(size)           _SAL1_1_Source_(_Deref_pre_z_bytecap_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre2_impl_(__zterm_impl,__bytecap_c_impl(size)) _Pre_valid_impl_)
+#define _Deref_pre_opt_z_bytecap_c_(size)       _SAL1_1_Source_(_Deref_pre_opt_z_bytecap_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__bytecap_c_impl(size)) _Pre_valid_impl_)
+
+#define _Deref_pre_z_cap_x_(size)               _SAL1_1_Source_(_Deref_pre_z_cap_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre2_impl_(__zterm_impl,__cap_x_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_opt_z_cap_x_(size)           _SAL1_1_Source_(_Deref_pre_opt_z_cap_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__cap_x_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_z_bytecap_x_(size)           _SAL1_1_Source_(_Deref_pre_z_bytecap_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre2_impl_(__zterm_impl,__bytecap_x_impl(size)) _Pre_valid_impl_)
+#define _Deref_pre_opt_z_bytecap_x_(size)       _SAL1_1_Source_(_Deref_pre_opt_z_bytecap_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre2_impl_(__zterm_impl,__bytecap_x_impl(size)) _Pre_valid_impl_)
+
+// known capacity and valid but unknown readable extent
+#define _Deref_pre_valid_cap_(size)             _SAL1_1_Source_(_Deref_pre_valid_cap_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__cap_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_opt_valid_cap_(size)         _SAL1_1_Source_(_Deref_pre_opt_valid_cap_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_valid_bytecap_(size)         _SAL1_1_Source_(_Deref_pre_valid_bytecap_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__bytecap_impl(size)) _Pre_valid_impl_)
+#define _Deref_pre_opt_valid_bytecap_(size)     _SAL1_1_Source_(_Deref_pre_opt_valid_bytecap_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecap_impl(size)) _Pre_valid_impl_)
+
+#define _Deref_pre_valid_cap_c_(size)           _SAL1_1_Source_(_Deref_pre_valid_cap_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__cap_c_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_opt_valid_cap_c_(size)       _SAL1_1_Source_(_Deref_pre_opt_valid_cap_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_c_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_valid_bytecap_c_(size)       _SAL1_1_Source_(_Deref_pre_valid_bytecap_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__bytecap_c_impl(size)) _Pre_valid_impl_)
+#define _Deref_pre_opt_valid_bytecap_c_(size)   _SAL1_1_Source_(_Deref_pre_opt_valid_bytecap_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecap_c_impl(size)) _Pre_valid_impl_)
+
+#define _Deref_pre_valid_cap_x_(size)           _SAL1_1_Source_(_Deref_pre_valid_cap_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__cap_x_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_opt_valid_cap_x_(size)       _SAL1_1_Source_(_Deref_pre_opt_valid_cap_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_x_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_valid_bytecap_x_(size)       _SAL1_1_Source_(_Deref_pre_valid_bytecap_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__bytecap_x_impl(size)) _Pre_valid_impl_)
+#define _Deref_pre_opt_valid_bytecap_x_(size)   _SAL1_1_Source_(_Deref_pre_opt_valid_bytecap_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecap_x_impl(size)) _Pre_valid_impl_)
+
+// e.g. void SaveMatrix( _In_count_(n) _Deref_pre_count_(n) const Elem** matrix, size_t n ); 
+// valid buffer extent is described by another parameter
+#define _Deref_pre_count_(size)                 _SAL1_1_Source_(_Deref_pre_count_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__count_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_opt_count_(size)             _SAL1_1_Source_(_Deref_pre_opt_count_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__count_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_bytecount_(size)             _SAL1_1_Source_(_Deref_pre_bytecount_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__bytecount_impl(size)) _Pre_valid_impl_)
+#define _Deref_pre_opt_bytecount_(size)         _SAL1_1_Source_(_Deref_pre_opt_bytecount_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecount_impl(size)) _Pre_valid_impl_)
+
+// valid buffer extent is described by a constant expression
+#define _Deref_pre_count_c_(size)               _SAL1_1_Source_(_Deref_pre_count_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__count_c_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_opt_count_c_(size)           _SAL1_1_Source_(_Deref_pre_opt_count_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__count_c_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_bytecount_c_(size)           _SAL1_1_Source_(_Deref_pre_bytecount_c_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__bytecount_c_impl(size)) _Pre_valid_impl_)
+#define _Deref_pre_opt_bytecount_c_(size)       _SAL1_1_Source_(_Deref_pre_opt_bytecount_c_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecount_c_impl(size)) _Pre_valid_impl_)
+
+// valid buffer extent is described by a complex expression
+#define _Deref_pre_count_x_(size)               _SAL1_1_Source_(_Deref_pre_count_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__count_x_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_opt_count_x_(size)           _SAL1_1_Source_(_Deref_pre_opt_count_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__count_x_impl(size))     _Pre_valid_impl_)
+#define _Deref_pre_bytecount_x_(size)           _SAL1_1_Source_(_Deref_pre_bytecount_x_, (size), _Deref_pre1_impl_(__notnull_impl_notref)   _Deref_pre1_impl_(__bytecount_x_impl(size)) _Pre_valid_impl_)
+#define _Deref_pre_opt_bytecount_x_(size)       _SAL1_1_Source_(_Deref_pre_opt_bytecount_x_, (size), _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__bytecount_x_impl(size)) _Pre_valid_impl_)
+
+// e.g. void PrintStringArray( _In_count_(cElems) _Deref_pre_valid_ LPCSTR rgStr[], size_t cElems );
+#define _Deref_pre_valid_                       _SAL1_1_Source_(_Deref_pre_valid_, (), _Deref_pre1_impl_(__notnull_impl_notref)   _Pre_valid_impl_)
+#define _Deref_pre_opt_valid_                   _SAL1_1_Source_(_Deref_pre_opt_valid_, (), _Deref_pre1_impl_(__maybenull_impl_notref) _Pre_valid_impl_)
+#define _Deref_pre_invalid_                     _SAL1_1_Source_(_Deref_pre_invalid_, (), _Deref_pre1_impl_(__notvalid_impl))
+
+#define _Deref_pre_notnull_                     _SAL1_1_Source_(_Deref_pre_notnull_, (), _Deref_pre1_impl_(__notnull_impl_notref))
+#define _Deref_pre_maybenull_                   _SAL1_1_Source_(_Deref_pre_maybenull_, (), _Deref_pre1_impl_(__maybenull_impl_notref))
+#define _Deref_pre_null_                        _SAL1_1_Source_(_Deref_pre_null_, (), _Deref_pre1_impl_(__null_impl_notref))
+
+// restrict access rights
+#define _Deref_pre_readonly_                    _SAL1_1_Source_(_Deref_pre_readonly_, (), _Deref_pre1_impl_(__readaccess_impl_notref))
+#define _Deref_pre_writeonly_                   _SAL1_1_Source_(_Deref_pre_writeonly_, (), _Deref_pre1_impl_(__writeaccess_impl_notref))
+
+//
+// _Deref_post_ ---
+//
+// describing conditions for array elements or dereferenced pointer parameters that hold after the call
+
+// e.g. void CloneString( _In_z_ const Wchar_t* wzIn _Out_ _Deref_post_z_ WCHAR** pWzOut );
+#define _Deref_post_z_                           _SAL1_1_Source_(_Deref_post_z_, (), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__zterm_impl) _Post_valid_impl_)
+#define _Deref_post_opt_z_                       _SAL1_1_Source_(_Deref_post_opt_z_, (), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__zterm_impl) _Post_valid_impl_)
+
+// e.g. HRESULT HrAllocateMemory( size_t cb, _Out_ _Deref_post_bytecap_(cb) void** ppv );
+// buffer capacity is described by another parameter
+#define _Deref_post_cap_(size)                   _SAL1_1_Source_(_Deref_post_cap_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_impl(size)))
+#define _Deref_post_opt_cap_(size)               _SAL1_1_Source_(_Deref_post_opt_cap_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__cap_impl(size)))
+#define _Deref_post_bytecap_(size)               _SAL1_1_Source_(_Deref_post_bytecap_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecap_impl(size)))
+#define _Deref_post_opt_bytecap_(size)           _SAL1_1_Source_(_Deref_post_opt_bytecap_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecap_impl(size)))
+
+// buffer capacity is described by a constant expression
+#define _Deref_post_cap_c_(size)                 _SAL1_1_Source_(_Deref_post_cap_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_c_impl(size)))
+#define _Deref_post_opt_cap_c_(size)             _SAL1_1_Source_(_Deref_post_opt_cap_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__cap_c_impl(size)))
+#define _Deref_post_bytecap_c_(size)             _SAL1_1_Source_(_Deref_post_bytecap_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecap_c_impl(size)))
+#define _Deref_post_opt_bytecap_c_(size)         _SAL1_1_Source_(_Deref_post_opt_bytecap_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecap_c_impl(size)))
+
+// buffer capacity is described by a complex expression
+#define _Deref_post_cap_x_(size)                 _SAL1_1_Source_(_Deref_post_cap_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_x_impl(size)))
+#define _Deref_post_opt_cap_x_(size)             _SAL1_1_Source_(_Deref_post_opt_cap_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__cap_x_impl(size)))
+#define _Deref_post_bytecap_x_(size)             _SAL1_1_Source_(_Deref_post_bytecap_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecap_x_impl(size)))
+#define _Deref_post_opt_bytecap_x_(size)         _SAL1_1_Source_(_Deref_post_opt_bytecap_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecap_x_impl(size)))
+
+// convenience macros for nullterminated buffers with given capacity
+#define _Deref_post_z_cap_(size)                 _SAL1_1_Source_(_Deref_post_z_cap_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_(__zterm_impl,__cap_impl(size))       _Post_valid_impl_)
+#define _Deref_post_opt_z_cap_(size)             _SAL1_1_Source_(_Deref_post_opt_z_cap_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_(__zterm_impl,__cap_impl(size))       _Post_valid_impl_)
+#define _Deref_post_z_bytecap_(size)             _SAL1_1_Source_(_Deref_post_z_bytecap_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_(__zterm_impl,__bytecap_impl(size))   _Post_valid_impl_)
+#define _Deref_post_opt_z_bytecap_(size)         _SAL1_1_Source_(_Deref_post_opt_z_bytecap_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_(__zterm_impl,__bytecap_impl(size))   _Post_valid_impl_)
+
+#define _Deref_post_z_cap_c_(size)               _SAL1_1_Source_(_Deref_post_z_cap_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_(__zterm_impl,__cap_c_impl(size))     _Post_valid_impl_)
+#define _Deref_post_opt_z_cap_c_(size)           _SAL1_1_Source_(_Deref_post_opt_z_cap_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_(__zterm_impl,__cap_c_impl(size))     _Post_valid_impl_)
+#define _Deref_post_z_bytecap_c_(size)           _SAL1_1_Source_(_Deref_post_z_bytecap_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_(__zterm_impl,__bytecap_c_impl(size)) _Post_valid_impl_)
+#define _Deref_post_opt_z_bytecap_c_(size)       _SAL1_1_Source_(_Deref_post_opt_z_bytecap_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_(__zterm_impl,__bytecap_c_impl(size)) _Post_valid_impl_)
+
+#define _Deref_post_z_cap_x_(size)               _SAL1_1_Source_(_Deref_post_z_cap_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_(__zterm_impl,__cap_x_impl(size))     _Post_valid_impl_)
+#define _Deref_post_opt_z_cap_x_(size)           _SAL1_1_Source_(_Deref_post_opt_z_cap_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_(__zterm_impl,__cap_x_impl(size))     _Post_valid_impl_)
+#define _Deref_post_z_bytecap_x_(size)           _SAL1_1_Source_(_Deref_post_z_bytecap_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_(__zterm_impl,__bytecap_x_impl(size)) _Post_valid_impl_)
+#define _Deref_post_opt_z_bytecap_x_(size)       _SAL1_1_Source_(_Deref_post_opt_z_bytecap_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_(__zterm_impl,__bytecap_x_impl(size)) _Post_valid_impl_)
+
+// known capacity and valid but unknown readable extent
+#define _Deref_post_valid_cap_(size)             _SAL1_1_Source_(_Deref_post_valid_cap_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_impl(size))       _Post_valid_impl_)
+#define _Deref_post_opt_valid_cap_(size)         _SAL1_1_Source_(_Deref_post_opt_valid_cap_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__cap_impl(size))       _Post_valid_impl_)
+#define _Deref_post_valid_bytecap_(size)         _SAL1_1_Source_(_Deref_post_valid_bytecap_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecap_impl(size))   _Post_valid_impl_)
+#define _Deref_post_opt_valid_bytecap_(size)     _SAL1_1_Source_(_Deref_post_opt_valid_bytecap_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecap_impl(size))   _Post_valid_impl_)
+                                                
+#define _Deref_post_valid_cap_c_(size)           _SAL1_1_Source_(_Deref_post_valid_cap_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_c_impl(size))     _Post_valid_impl_)
+#define _Deref_post_opt_valid_cap_c_(size)       _SAL1_1_Source_(_Deref_post_opt_valid_cap_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__cap_c_impl(size))     _Post_valid_impl_)
+#define _Deref_post_valid_bytecap_c_(size)       _SAL1_1_Source_(_Deref_post_valid_bytecap_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecap_c_impl(size)) _Post_valid_impl_)
+#define _Deref_post_opt_valid_bytecap_c_(size)   _SAL1_1_Source_(_Deref_post_opt_valid_bytecap_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecap_c_impl(size)) _Post_valid_impl_)
+                                                
+#define _Deref_post_valid_cap_x_(size)           _SAL1_1_Source_(_Deref_post_valid_cap_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_x_impl(size))     _Post_valid_impl_)
+#define _Deref_post_opt_valid_cap_x_(size)       _SAL1_1_Source_(_Deref_post_opt_valid_cap_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__cap_x_impl(size))     _Post_valid_impl_)
+#define _Deref_post_valid_bytecap_x_(size)       _SAL1_1_Source_(_Deref_post_valid_bytecap_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecap_x_impl(size)) _Post_valid_impl_)
+#define _Deref_post_opt_valid_bytecap_x_(size)   _SAL1_1_Source_(_Deref_post_opt_valid_bytecap_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecap_x_impl(size)) _Post_valid_impl_)
+
+// e.g. HRESULT HrAllocateZeroInitializedMemory( size_t cb, _Out_ _Deref_post_bytecount_(cb) void** ppv );
+// valid buffer extent is described by another parameter
+#define _Deref_post_count_(size)                 _SAL1_1_Source_(_Deref_post_count_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__count_impl(size))       _Post_valid_impl_)
+#define _Deref_post_opt_count_(size)             _SAL1_1_Source_(_Deref_post_opt_count_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__count_impl(size))       _Post_valid_impl_)
+#define _Deref_post_bytecount_(size)             _SAL1_1_Source_(_Deref_post_bytecount_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecount_impl(size))   _Post_valid_impl_)
+#define _Deref_post_opt_bytecount_(size)         _SAL1_1_Source_(_Deref_post_opt_bytecount_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecount_impl(size))   _Post_valid_impl_)
+
+// buffer capacity is described by a constant expression
+#define _Deref_post_count_c_(size)               _SAL1_1_Source_(_Deref_post_count_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__count_c_impl(size))     _Post_valid_impl_)
+#define _Deref_post_opt_count_c_(size)           _SAL1_1_Source_(_Deref_post_opt_count_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__count_c_impl(size))     _Post_valid_impl_)
+#define _Deref_post_bytecount_c_(size)           _SAL1_1_Source_(_Deref_post_bytecount_c_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecount_c_impl(size)) _Post_valid_impl_)
+#define _Deref_post_opt_bytecount_c_(size)       _SAL1_1_Source_(_Deref_post_opt_bytecount_c_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecount_c_impl(size)) _Post_valid_impl_)
+
+// buffer capacity is described by a complex expression
+#define _Deref_post_count_x_(size)               _SAL1_1_Source_(_Deref_post_count_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__count_x_impl(size))     _Post_valid_impl_)
+#define _Deref_post_opt_count_x_(size)           _SAL1_1_Source_(_Deref_post_opt_count_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__count_x_impl(size))     _Post_valid_impl_)
+#define _Deref_post_bytecount_x_(size)           _SAL1_1_Source_(_Deref_post_bytecount_x_, (size), _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__bytecount_x_impl(size)) _Post_valid_impl_)
+#define _Deref_post_opt_bytecount_x_(size)       _SAL1_1_Source_(_Deref_post_opt_bytecount_x_, (size), _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__bytecount_x_impl(size)) _Post_valid_impl_)
+
+// e.g. void GetStrings( _Out_count_(cElems) _Deref_post_valid_ LPSTR const rgStr[], size_t cElems );
+#define _Deref_post_valid_                       _SAL1_1_Source_(_Deref_post_valid_, (), _Deref_post1_impl_(__notnull_impl_notref)   _Post_valid_impl_)
+#define _Deref_post_opt_valid_                   _SAL1_1_Source_(_Deref_post_opt_valid_, (), _Deref_post1_impl_(__maybenull_impl_notref) _Post_valid_impl_)
+
+#define _Deref_post_notnull_                     _SAL1_1_Source_(_Deref_post_notnull_, (), _Deref_post1_impl_(__notnull_impl_notref))
+#define _Deref_post_maybenull_                   _SAL1_1_Source_(_Deref_post_maybenull_, (), _Deref_post1_impl_(__maybenull_impl_notref))
+#define _Deref_post_null_                        _SAL1_1_Source_(_Deref_post_null_, (), _Deref_post1_impl_(__null_impl_notref))
+
+//
+// _Deref_ret_ ---
+//
+
+#define _Deref_ret_z_                            _SAL1_1_Source_(_Deref_ret_z_, (), _Deref_ret1_impl_(__notnull_impl_notref) _Deref_ret1_impl_(__zterm_impl))
+#define _Deref_ret_opt_z_                        _SAL1_1_Source_(_Deref_ret_opt_z_, (), _Deref_ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__zterm_impl))
+
+//
+// special _Deref_ ---
+//
+#define _Deref2_pre_readonly_                    _SAL1_1_Source_(_Deref2_pre_readonly_, (), _Deref2_pre1_impl_(__readaccess_impl_notref))
+
+//
+// _Ret_ ---
+//
+
+// e.g. _Ret_opt_valid_ LPSTR void* CloneSTR( _Pre_valid_ LPSTR src );
+#define _Ret_opt_valid_                   _SAL1_1_Source_(_Ret_opt_valid_, (), _Ret1_impl_(__maybenull_impl_notref) _Ret_valid_impl_)
+#define _Ret_opt_z_                       _SAL1_1_Source_(_Ret_opt_z_, (), _Ret2_impl_(__maybenull_impl,__zterm_impl) _Ret_valid_impl_)
+
+// e.g. _Ret_opt_bytecap_(cb) void* AllocateMemory( size_t cb );
+// Buffer capacity is described by another parameter
+#define _Ret_cap_(size)                   _SAL1_1_Source_(_Ret_cap_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__cap_impl(size)))
+#define _Ret_opt_cap_(size)               _SAL1_1_Source_(_Ret_opt_cap_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__cap_impl(size)))
+#define _Ret_bytecap_(size)               _SAL1_1_Source_(_Ret_bytecap_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecap_impl(size)))
+#define _Ret_opt_bytecap_(size)           _SAL1_1_Source_(_Ret_opt_bytecap_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecap_impl(size)))
+
+// Buffer capacity is described by a constant expression
+#define _Ret_cap_c_(size)                 _SAL1_1_Source_(_Ret_cap_c_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__cap_c_impl(size)))
+#define _Ret_opt_cap_c_(size)             _SAL1_1_Source_(_Ret_opt_cap_c_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__cap_c_impl(size)))
+#define _Ret_bytecap_c_(size)             _SAL1_1_Source_(_Ret_bytecap_c_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecap_c_impl(size)))
+#define _Ret_opt_bytecap_c_(size)         _SAL1_1_Source_(_Ret_opt_bytecap_c_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecap_c_impl(size)))
+
+// Buffer capacity is described by a complex condition
+#define _Ret_cap_x_(size)                 _SAL1_1_Source_(_Ret_cap_x_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__cap_x_impl(size)))
+#define _Ret_opt_cap_x_(size)             _SAL1_1_Source_(_Ret_opt_cap_x_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__cap_x_impl(size)))
+#define _Ret_bytecap_x_(size)             _SAL1_1_Source_(_Ret_bytecap_x_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecap_x_impl(size)))
+#define _Ret_opt_bytecap_x_(size)         _SAL1_1_Source_(_Ret_opt_bytecap_x_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecap_x_impl(size)))
+
+// return value is nullterminated and capacity is given by another parameter
+#define _Ret_z_cap_(size)                 _SAL1_1_Source_(_Ret_z_cap_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret2_impl_(__zterm_impl,__cap_impl(size))     _Ret_valid_impl_)
+#define _Ret_opt_z_cap_(size)             _SAL1_1_Source_(_Ret_opt_z_cap_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret2_impl_(__zterm_impl,__cap_impl(size))     _Ret_valid_impl_)
+#define _Ret_z_bytecap_(size)             _SAL1_1_Source_(_Ret_z_bytecap_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret2_impl_(__zterm_impl,__bytecap_impl(size)) _Ret_valid_impl_)
+#define _Ret_opt_z_bytecap_(size)         _SAL1_1_Source_(_Ret_opt_z_bytecap_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret2_impl_(__zterm_impl,__bytecap_impl(size)) _Ret_valid_impl_)
+
+// e.g. _Ret_opt_bytecount_(cb) void* AllocateZeroInitializedMemory( size_t cb );
+// Valid Buffer extent is described by another parameter
+#define _Ret_count_(size)                 _SAL1_1_Source_(_Ret_count_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__count_impl(size))     _Ret_valid_impl_)
+#define _Ret_opt_count_(size)             _SAL1_1_Source_(_Ret_opt_count_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__count_impl(size))     _Ret_valid_impl_)
+#define _Ret_bytecount_(size)             _SAL1_1_Source_(_Ret_bytecount_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecount_impl(size)) _Ret_valid_impl_)
+#define _Ret_opt_bytecount_(size)         _SAL1_1_Source_(_Ret_opt_bytecount_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecount_impl(size)) _Ret_valid_impl_)
+
+// Valid Buffer extent is described by a constant expression
+#define _Ret_count_c_(size)               _SAL1_1_Source_(_Ret_count_c_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__count_c_impl(size))     _Ret_valid_impl_)
+#define _Ret_opt_count_c_(size)           _SAL1_1_Source_(_Ret_opt_count_c_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__count_c_impl(size))     _Ret_valid_impl_)
+#define _Ret_bytecount_c_(size)           _SAL1_1_Source_(_Ret_bytecount_c_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecount_c_impl(size)) _Ret_valid_impl_)
+#define _Ret_opt_bytecount_c_(size)       _SAL1_1_Source_(_Ret_opt_bytecount_c_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecount_c_impl(size)) _Ret_valid_impl_)
+
+// Valid Buffer extent is described by a complex expression
+#define _Ret_count_x_(size)               _SAL1_1_Source_(_Ret_count_x_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__count_x_impl(size))     _Ret_valid_impl_)
+#define _Ret_opt_count_x_(size)           _SAL1_1_Source_(_Ret_opt_count_x_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__count_x_impl(size))     _Ret_valid_impl_)
+#define _Ret_bytecount_x_(size)           _SAL1_1_Source_(_Ret_bytecount_x_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecount_x_impl(size)) _Ret_valid_impl_)
+#define _Ret_opt_bytecount_x_(size)       _SAL1_1_Source_(_Ret_opt_bytecount_x_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecount_x_impl(size)) _Ret_valid_impl_)
+
+// return value is nullterminated and length is given by another parameter
+#define _Ret_z_count_(size)               _SAL1_1_Source_(_Ret_z_count_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret2_impl_(__zterm_impl,__count_impl(size))     _Ret_valid_impl_)
+#define _Ret_opt_z_count_(size)           _SAL1_1_Source_(_Ret_opt_z_count_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret2_impl_(__zterm_impl,__count_impl(size))     _Ret_valid_impl_)
+#define _Ret_z_bytecount_(size)           _SAL1_1_Source_(_Ret_z_bytecount_, (size), _Ret1_impl_(__notnull_impl_notref) _Ret2_impl_(__zterm_impl,__bytecount_impl(size)) _Ret_valid_impl_)
+#define _Ret_opt_z_bytecount_(size)       _SAL1_1_Source_(_Ret_opt_z_bytecount_, (size), _Ret1_impl_(__maybenull_impl_notref) _Ret2_impl_(__zterm_impl,__bytecount_impl(size)) _Ret_valid_impl_)
+
+
+// _Pre_ annotations ---
+#define _Pre_opt_z_                       _SAL1_1_Source_(_Pre_opt_z_, (), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__zterm_impl) _Pre_valid_impl_)
+
+// restrict access rights
+#define _Pre_readonly_                    _SAL1_1_Source_(_Pre_readonly_, (), _Pre1_impl_(__readaccess_impl_notref))
+#define _Pre_writeonly_                   _SAL1_1_Source_(_Pre_writeonly_, (), _Pre1_impl_(__writeaccess_impl_notref))
+
+// e.g. void FreeMemory( _Pre_bytecap_(cb) _Post_ptr_invalid_ void* pv, size_t cb );
+// buffer capacity described by another parameter
+#define _Pre_cap_(size)                   _SAL1_1_Source_(_Pre_cap_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_impl(size)))
+#define _Pre_opt_cap_(size)               _SAL1_1_Source_(_Pre_opt_cap_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_impl(size)))
+#define _Pre_bytecap_(size)               _SAL1_1_Source_(_Pre_bytecap_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_impl(size)))
+#define _Pre_opt_bytecap_(size)           _SAL1_1_Source_(_Pre_opt_bytecap_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_impl(size)))
+
+// buffer capacity described by a constant expression
+#define _Pre_cap_c_(size)                 _SAL1_1_Source_(_Pre_cap_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_c_impl(size)))
+#define _Pre_opt_cap_c_(size)             _SAL1_1_Source_(_Pre_opt_cap_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_c_impl(size)))
+#define _Pre_bytecap_c_(size)             _SAL1_1_Source_(_Pre_bytecap_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_c_impl(size)))
+#define _Pre_opt_bytecap_c_(size)         _SAL1_1_Source_(_Pre_opt_bytecap_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_c_impl(size)))
+#define _Pre_cap_c_one_                   _SAL1_1_Source_(_Pre_cap_c_one_, (), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_c_one_notref_impl))
+#define _Pre_opt_cap_c_one_               _SAL1_1_Source_(_Pre_opt_cap_c_one_, (), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_c_one_notref_impl))
+
+// buffer capacity is described by another parameter multiplied by a constant expression
+#define _Pre_cap_m_(mult,size)            _SAL1_1_Source_(_Pre_cap_m_, (mult,size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__mult_impl(mult,size)))
+#define _Pre_opt_cap_m_(mult,size)        _SAL1_1_Source_(_Pre_opt_cap_m_, (mult,size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__mult_impl(mult,size)))
+
+// buffer capacity described by size of other buffer, only used by dangerous legacy APIs
+// e.g. int strcpy(_Pre_cap_for_(src) char* dst, const char* src);
+#define _Pre_cap_for_(param)              _SAL1_1_Source_(_Pre_cap_for_, (param), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_for_impl(param)))
+#define _Pre_opt_cap_for_(param)          _SAL1_1_Source_(_Pre_opt_cap_for_, (param), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_for_impl(param)))
+
+// buffer capacity described by a complex condition
+#define _Pre_cap_x_(size)                 _SAL1_1_Source_(_Pre_cap_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_x_impl(size)))
+#define _Pre_opt_cap_x_(size)             _SAL1_1_Source_(_Pre_opt_cap_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_x_impl(size)))
+#define _Pre_bytecap_x_(size)             _SAL1_1_Source_(_Pre_bytecap_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_x_impl(size)))
+#define _Pre_opt_bytecap_x_(size)         _SAL1_1_Source_(_Pre_opt_bytecap_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_x_impl(size)))
+
+// buffer capacity described by the difference to another pointer parameter
+#define _Pre_ptrdiff_cap_(ptr)            _SAL1_1_Source_(_Pre_ptrdiff_cap_, (ptr), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_x_impl(__ptrdiff(ptr))))
+#define _Pre_opt_ptrdiff_cap_(ptr)        _SAL1_1_Source_(_Pre_opt_ptrdiff_cap_, (ptr), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_x_impl(__ptrdiff(ptr))))
+
+// e.g. void AppendStr( _Pre_z_ const char* szFrom, _Pre_z_cap_(cchTo) _Post_z_ char* szTo, size_t cchTo );
+#define _Pre_z_cap_(size)                 _SAL1_1_Source_(_Pre_z_cap_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl,__cap_impl(size))       _Pre_valid_impl_)
+#define _Pre_opt_z_cap_(size)             _SAL1_1_Source_(_Pre_opt_z_cap_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre2_impl_(__zterm_impl,__cap_impl(size))       _Pre_valid_impl_)
+#define _Pre_z_bytecap_(size)             _SAL1_1_Source_(_Pre_z_bytecap_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl,__bytecap_impl(size))   _Pre_valid_impl_)
+#define _Pre_opt_z_bytecap_(size)         _SAL1_1_Source_(_Pre_opt_z_bytecap_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre2_impl_(__zterm_impl,__bytecap_impl(size))   _Pre_valid_impl_)
+
+#define _Pre_z_cap_c_(size)               _SAL1_1_Source_(_Pre_z_cap_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl,__cap_c_impl(size))     _Pre_valid_impl_)
+#define _Pre_opt_z_cap_c_(size)           _SAL1_1_Source_(_Pre_opt_z_cap_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre2_impl_(__zterm_impl,__cap_c_impl(size))     _Pre_valid_impl_)
+#define _Pre_z_bytecap_c_(size)           _SAL1_1_Source_(_Pre_z_bytecap_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl,__bytecap_c_impl(size)) _Pre_valid_impl_)
+#define _Pre_opt_z_bytecap_c_(size)       _SAL1_1_Source_(_Pre_opt_z_bytecap_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre2_impl_(__zterm_impl,__bytecap_c_impl(size)) _Pre_valid_impl_)
+
+#define _Pre_z_cap_x_(size)               _SAL1_1_Source_(_Pre_z_cap_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl,__cap_x_impl(size))     _Pre_valid_impl_)
+#define _Pre_opt_z_cap_x_(size)           _SAL1_1_Source_(_Pre_opt_z_cap_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre2_impl_(__zterm_impl,__cap_x_impl(size))     _Pre_valid_impl_)
+#define _Pre_z_bytecap_x_(size)           _SAL1_1_Source_(_Pre_z_bytecap_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl,__bytecap_x_impl(size)) _Pre_valid_impl_)
+#define _Pre_opt_z_bytecap_x_(size)       _SAL1_1_Source_(_Pre_opt_z_bytecap_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre2_impl_(__zterm_impl,__bytecap_x_impl(size)) _Pre_valid_impl_)
+
+// known capacity and valid but unknown readable extent
+#define _Pre_valid_cap_(size)             _SAL1_1_Source_(_Pre_valid_cap_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_impl(size))       _Pre_valid_impl_)
+#define _Pre_opt_valid_cap_(size)         _SAL1_1_Source_(_Pre_opt_valid_cap_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_impl(size))       _Pre_valid_impl_)
+#define _Pre_valid_bytecap_(size)         _SAL1_1_Source_(_Pre_valid_bytecap_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_impl(size))   _Pre_valid_impl_)
+#define _Pre_opt_valid_bytecap_(size)     _SAL1_1_Source_(_Pre_opt_valid_bytecap_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_impl(size))   _Pre_valid_impl_)
+
+#define _Pre_valid_cap_c_(size)           _SAL1_1_Source_(_Pre_valid_cap_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_c_impl(size))     _Pre_valid_impl_)
+#define _Pre_opt_valid_cap_c_(size)       _SAL1_1_Source_(_Pre_opt_valid_cap_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_c_impl(size))     _Pre_valid_impl_)
+#define _Pre_valid_bytecap_c_(size)       _SAL1_1_Source_(_Pre_valid_bytecap_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_c_impl(size)) _Pre_valid_impl_)
+#define _Pre_opt_valid_bytecap_c_(size)   _SAL1_1_Source_(_Pre_opt_valid_bytecap_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_c_impl(size)) _Pre_valid_impl_)
+
+#define _Pre_valid_cap_x_(size)           _SAL1_1_Source_(_Pre_valid_cap_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_x_impl(size))     _Pre_valid_impl_)
+#define _Pre_opt_valid_cap_x_(size)       _SAL1_1_Source_(_Pre_opt_valid_cap_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_x_impl(size))     _Pre_valid_impl_)
+#define _Pre_valid_bytecap_x_(size)       _SAL1_1_Source_(_Pre_valid_bytecap_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_x_impl(size)) _Pre_valid_impl_)
+#define _Pre_opt_valid_bytecap_x_(size)   _SAL1_1_Source_(_Pre_opt_valid_bytecap_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_x_impl(size)) _Pre_valid_impl_)
+
+// e.g. void AppendCharRange( _Pre_count_(cchFrom) const char* rgFrom, size_t cchFrom, _Out_z_cap_(cchTo) char* szTo, size_t cchTo );
+// Valid buffer extent described by another parameter
+#define _Pre_count_(size)                 _SAL1_1_Source_(_Pre_count_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__count_impl(size))       _Pre_valid_impl_)
+#define _Pre_opt_count_(size)             _SAL1_1_Source_(_Pre_opt_count_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__count_impl(size))       _Pre_valid_impl_)
+#define _Pre_bytecount_(size)             _SAL1_1_Source_(_Pre_bytecount_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecount_impl(size))   _Pre_valid_impl_)
+#define _Pre_opt_bytecount_(size)         _SAL1_1_Source_(_Pre_opt_bytecount_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecount_impl(size))   _Pre_valid_impl_)
+
+// Valid buffer extent described by a constant expression
+#define _Pre_count_c_(size)               _SAL1_1_Source_(_Pre_count_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__count_c_impl(size))     _Pre_valid_impl_)
+#define _Pre_opt_count_c_(size)           _SAL1_1_Source_(_Pre_opt_count_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__count_c_impl(size))     _Pre_valid_impl_)
+#define _Pre_bytecount_c_(size)           _SAL1_1_Source_(_Pre_bytecount_c_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecount_c_impl(size)) _Pre_valid_impl_)
+#define _Pre_opt_bytecount_c_(size)       _SAL1_1_Source_(_Pre_opt_bytecount_c_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecount_c_impl(size)) _Pre_valid_impl_)
+
+// Valid buffer extent described by a complex expression
+#define _Pre_count_x_(size)               _SAL1_1_Source_(_Pre_count_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__count_x_impl(size))     _Pre_valid_impl_)
+#define _Pre_opt_count_x_(size)           _SAL1_1_Source_(_Pre_opt_count_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__count_x_impl(size))     _Pre_valid_impl_)
+#define _Pre_bytecount_x_(size)           _SAL1_1_Source_(_Pre_bytecount_x_, (size), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecount_x_impl(size)) _Pre_valid_impl_)
+#define _Pre_opt_bytecount_x_(size)       _SAL1_1_Source_(_Pre_opt_bytecount_x_, (size), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecount_x_impl(size)) _Pre_valid_impl_)
+
+// Valid buffer extent described by the difference to another pointer parameter
+#define _Pre_ptrdiff_count_(ptr)          _SAL1_1_Source_(_Pre_ptrdiff_count_, (ptr), _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__count_x_impl(__ptrdiff(ptr))) _Pre_valid_impl_)
+#define _Pre_opt_ptrdiff_count_(ptr)      _SAL1_1_Source_(_Pre_opt_ptrdiff_count_, (ptr), _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__count_x_impl(__ptrdiff(ptr))) _Pre_valid_impl_)
+
+
+// char * strncpy(_Out_cap_(_Count) _Post_maybez_ char * _Dest, _In_z_ const char * _Source, _In_ size_t _Count)
+// buffer maybe zero-terminated after the call
+#define _Post_maybez_                    _SAL1_1_Source_(_Post_maybez_, (), _Post1_impl_(__maybezterm_impl))
+
+// e.g. SIZE_T HeapSize( _In_ HANDLE hHeap, DWORD dwFlags, _Pre_notnull_ _Post_bytecap_(return) LPCVOID lpMem );
+#define _Post_cap_(size)                 _SAL1_1_Source_(_Post_cap_, (size), _Post1_impl_(__cap_impl(size)))
+#define _Post_bytecap_(size)             _SAL1_1_Source_(_Post_bytecap_, (size), _Post1_impl_(__bytecap_impl(size)))
+
+// e.g. int strlen( _In_z_ _Post_count_(return+1) const char* sz );
+#define _Post_count_(size)               _SAL1_1_Source_(_Post_count_, (size), _Post1_impl_(__count_impl(size))       _Post_valid_impl_)
+#define _Post_bytecount_(size)           _SAL1_1_Source_(_Post_bytecount_, (size), _Post1_impl_(__bytecount_impl(size))   _Post_valid_impl_)
+#define _Post_count_c_(size)             _SAL1_1_Source_(_Post_count_c_, (size), _Post1_impl_(__count_c_impl(size))     _Post_valid_impl_)
+#define _Post_bytecount_c_(size)         _SAL1_1_Source_(_Post_bytecount_c_, (size), _Post1_impl_(__bytecount_c_impl(size)) _Post_valid_impl_)
+#define _Post_count_x_(size)             _SAL1_1_Source_(_Post_count_x_, (size), _Post1_impl_(__count_x_impl(size))     _Post_valid_impl_)
+#define _Post_bytecount_x_(size)         _SAL1_1_Source_(_Post_bytecount_x_, (size), _Post1_impl_(__bytecount_x_impl(size)) _Post_valid_impl_)
+
+// e.g. size_t CopyStr( _In_z_ const char* szFrom, _Pre_cap_(cch) _Post_z_count_(return+1) char* szFrom, size_t cchFrom );
+#define _Post_z_count_(size)             _SAL1_1_Source_(_Post_z_count_, (size), _Post2_impl_(__zterm_impl,__count_impl(size))       _Post_valid_impl_)
+#define _Post_z_bytecount_(size)         _SAL1_1_Source_(_Post_z_bytecount_, (size), _Post2_impl_(__zterm_impl,__bytecount_impl(size))   _Post_valid_impl_)
+#define _Post_z_count_c_(size)           _SAL1_1_Source_(_Post_z_count_c_, (size), _Post2_impl_(__zterm_impl,__count_c_impl(size))     _Post_valid_impl_)
+#define _Post_z_bytecount_c_(size)       _SAL1_1_Source_(_Post_z_bytecount_c_, (size), _Post2_impl_(__zterm_impl,__bytecount_c_impl(size)) _Post_valid_impl_)
+#define _Post_z_count_x_(size)           _SAL1_1_Source_(_Post_z_count_x_, (size), _Post2_impl_(__zterm_impl,__count_x_impl(size))     _Post_valid_impl_)
+#define _Post_z_bytecount_x_(size)       _SAL1_1_Source_(_Post_z_bytecount_x_, (size), _Post2_impl_(__zterm_impl,__bytecount_x_impl(size)) _Post_valid_impl_)
+
+//
+// _Prepost_ ---
+//
+// describing conditions that hold before and after the function call
+
+#define _Prepost_opt_z_                  _SAL1_1_Source_(_Prepost_opt_z_, (), _Pre_opt_z_  _Post_z_)
+
+#define _Prepost_count_(size)            _SAL1_1_Source_(_Prepost_count_, (size), _Pre_count_(size)           _Post_count_(size))
+#define _Prepost_opt_count_(size)        _SAL1_1_Source_(_Prepost_opt_count_, (size), _Pre_opt_count_(size)       _Post_count_(size))
+#define _Prepost_bytecount_(size)        _SAL1_1_Source_(_Prepost_bytecount_, (size), _Pre_bytecount_(size)       _Post_bytecount_(size))
+#define _Prepost_opt_bytecount_(size)    _SAL1_1_Source_(_Prepost_opt_bytecount_, (size), _Pre_opt_bytecount_(size)   _Post_bytecount_(size))
+#define _Prepost_count_c_(size)          _SAL1_1_Source_(_Prepost_count_c_, (size), _Pre_count_c_(size)         _Post_count_c_(size))
+#define _Prepost_opt_count_c_(size)      _SAL1_1_Source_(_Prepost_opt_count_c_, (size), _Pre_opt_count_c_(size)     _Post_count_c_(size))
+#define _Prepost_bytecount_c_(size)      _SAL1_1_Source_(_Prepost_bytecount_c_, (size), _Pre_bytecount_c_(size)     _Post_bytecount_c_(size))
+#define _Prepost_opt_bytecount_c_(size)  _SAL1_1_Source_(_Prepost_opt_bytecount_c_, (size), _Pre_opt_bytecount_c_(size) _Post_bytecount_c_(size))
+#define _Prepost_count_x_(size)          _SAL1_1_Source_(_Prepost_count_x_, (size), _Pre_count_x_(size)         _Post_count_x_(size))
+#define _Prepost_opt_count_x_(size)      _SAL1_1_Source_(_Prepost_opt_count_x_, (size), _Pre_opt_count_x_(size)     _Post_count_x_(size))
+#define _Prepost_bytecount_x_(size)      _SAL1_1_Source_(_Prepost_bytecount_x_, (size), _Pre_bytecount_x_(size)     _Post_bytecount_x_(size))
+#define _Prepost_opt_bytecount_x_(size)  _SAL1_1_Source_(_Prepost_opt_bytecount_x_, (size), _Pre_opt_bytecount_x_(size) _Post_bytecount_x_(size))
+
+#define _Prepost_valid_                   _SAL1_1_Source_(_Prepost_valid_, (), _Pre_valid_     _Post_valid_)
+#define _Prepost_opt_valid_               _SAL1_1_Source_(_Prepost_opt_valid_, (), _Pre_opt_valid_ _Post_valid_)
+
+//
+// _Deref_<both> ---
+//
+// short version for _Deref_pre_<ann> _Deref_post_<ann>
+// describing conditions for array elements or dereferenced pointer parameters that hold before and after the call
+
+#define _Deref_prepost_z_                         _SAL1_1_Source_(_Deref_prepost_z_, (), _Deref_pre_z_      _Deref_post_z_)
+#define _Deref_prepost_opt_z_                     _SAL1_1_Source_(_Deref_prepost_opt_z_, (), _Deref_pre_opt_z_  _Deref_post_opt_z_)
+
+#define _Deref_prepost_cap_(size)                 _SAL1_1_Source_(_Deref_prepost_cap_, (size), _Deref_pre_cap_(size)                _Deref_post_cap_(size))
+#define _Deref_prepost_opt_cap_(size)             _SAL1_1_Source_(_Deref_prepost_opt_cap_, (size), _Deref_pre_opt_cap_(size)            _Deref_post_opt_cap_(size))
+#define _Deref_prepost_bytecap_(size)             _SAL1_1_Source_(_Deref_prepost_bytecap_, (size), _Deref_pre_bytecap_(size)            _Deref_post_bytecap_(size))
+#define _Deref_prepost_opt_bytecap_(size)         _SAL1_1_Source_(_Deref_prepost_opt_bytecap_, (size), _Deref_pre_opt_bytecap_(size)        _Deref_post_opt_bytecap_(size))
+
+#define _Deref_prepost_cap_x_(size)               _SAL1_1_Source_(_Deref_prepost_cap_x_, (size), _Deref_pre_cap_x_(size)              _Deref_post_cap_x_(size))
+#define _Deref_prepost_opt_cap_x_(size)           _SAL1_1_Source_(_Deref_prepost_opt_cap_x_, (size), _Deref_pre_opt_cap_x_(size)          _Deref_post_opt_cap_x_(size))
+#define _Deref_prepost_bytecap_x_(size)           _SAL1_1_Source_(_Deref_prepost_bytecap_x_, (size), _Deref_pre_bytecap_x_(size)          _Deref_post_bytecap_x_(size))
+#define _Deref_prepost_opt_bytecap_x_(size)       _SAL1_1_Source_(_Deref_prepost_opt_bytecap_x_, (size), _Deref_pre_opt_bytecap_x_(size)      _Deref_post_opt_bytecap_x_(size))
+
+#define _Deref_prepost_z_cap_(size)               _SAL1_1_Source_(_Deref_prepost_z_cap_, (size), _Deref_pre_z_cap_(size)              _Deref_post_z_cap_(size))
+#define _Deref_prepost_opt_z_cap_(size)           _SAL1_1_Source_(_Deref_prepost_opt_z_cap_, (size), _Deref_pre_opt_z_cap_(size)          _Deref_post_opt_z_cap_(size))
+#define _Deref_prepost_z_bytecap_(size)           _SAL1_1_Source_(_Deref_prepost_z_bytecap_, (size), _Deref_pre_z_bytecap_(size)          _Deref_post_z_bytecap_(size))
+#define _Deref_prepost_opt_z_bytecap_(size)       _SAL1_1_Source_(_Deref_prepost_opt_z_bytecap_, (size), _Deref_pre_opt_z_bytecap_(size)      _Deref_post_opt_z_bytecap_(size))
+
+#define _Deref_prepost_valid_cap_(size)           _SAL1_1_Source_(_Deref_prepost_valid_cap_, (size), _Deref_pre_valid_cap_(size)          _Deref_post_valid_cap_(size))
+#define _Deref_prepost_opt_valid_cap_(size)       _SAL1_1_Source_(_Deref_prepost_opt_valid_cap_, (size), _Deref_pre_opt_valid_cap_(size)      _Deref_post_opt_valid_cap_(size))
+#define _Deref_prepost_valid_bytecap_(size)       _SAL1_1_Source_(_Deref_prepost_valid_bytecap_, (size), _Deref_pre_valid_bytecap_(size)      _Deref_post_valid_bytecap_(size))
+#define _Deref_prepost_opt_valid_bytecap_(size)   _SAL1_1_Source_(_Deref_prepost_opt_valid_bytecap_, (size), _Deref_pre_opt_valid_bytecap_(size)  _Deref_post_opt_valid_bytecap_(size))
+
+#define _Deref_prepost_valid_cap_x_(size)           _SAL1_1_Source_(_Deref_prepost_valid_cap_x_, (size), _Deref_pre_valid_cap_x_(size)          _Deref_post_valid_cap_x_(size))
+#define _Deref_prepost_opt_valid_cap_x_(size)       _SAL1_1_Source_(_Deref_prepost_opt_valid_cap_x_, (size), _Deref_pre_opt_valid_cap_x_(size)      _Deref_post_opt_valid_cap_x_(size))
+#define _Deref_prepost_valid_bytecap_x_(size)       _SAL1_1_Source_(_Deref_prepost_valid_bytecap_x_, (size), _Deref_pre_valid_bytecap_x_(size)      _Deref_post_valid_bytecap_x_(size))
+#define _Deref_prepost_opt_valid_bytecap_x_(size)   _SAL1_1_Source_(_Deref_prepost_opt_valid_bytecap_x_, (size), _Deref_pre_opt_valid_bytecap_x_(size)  _Deref_post_opt_valid_bytecap_x_(size))
+
+#define _Deref_prepost_count_(size)             _SAL1_1_Source_(_Deref_prepost_count_, (size), _Deref_pre_count_(size)            _Deref_post_count_(size))
+#define _Deref_prepost_opt_count_(size)         _SAL1_1_Source_(_Deref_prepost_opt_count_, (size), _Deref_pre_opt_count_(size)        _Deref_post_opt_count_(size))
+#define _Deref_prepost_bytecount_(size)         _SAL1_1_Source_(_Deref_prepost_bytecount_, (size), _Deref_pre_bytecount_(size)        _Deref_post_bytecount_(size))
+#define _Deref_prepost_opt_bytecount_(size)     _SAL1_1_Source_(_Deref_prepost_opt_bytecount_, (size), _Deref_pre_opt_bytecount_(size)    _Deref_post_opt_bytecount_(size))
+
+#define _Deref_prepost_count_x_(size)           _SAL1_1_Source_(_Deref_prepost_count_x_, (size), _Deref_pre_count_x_(size)          _Deref_post_count_x_(size))
+#define _Deref_prepost_opt_count_x_(size)       _SAL1_1_Source_(_Deref_prepost_opt_count_x_, (size), _Deref_pre_opt_count_x_(size)      _Deref_post_opt_count_x_(size))
+#define _Deref_prepost_bytecount_x_(size)       _SAL1_1_Source_(_Deref_prepost_bytecount_x_, (size), _Deref_pre_bytecount_x_(size)      _Deref_post_bytecount_x_(size))
+#define _Deref_prepost_opt_bytecount_x_(size)   _SAL1_1_Source_(_Deref_prepost_opt_bytecount_x_, (size), _Deref_pre_opt_bytecount_x_(size)  _Deref_post_opt_bytecount_x_(size))
+
+#define _Deref_prepost_valid_                    _SAL1_1_Source_(_Deref_prepost_valid_, (), _Deref_pre_valid_     _Deref_post_valid_)
+#define _Deref_prepost_opt_valid_                _SAL1_1_Source_(_Deref_prepost_opt_valid_, (), _Deref_pre_opt_valid_ _Deref_post_opt_valid_)
+
+//
+// _Deref_<miscellaneous>
+//
+// used with references to arrays
+
+#define _Deref_out_z_cap_c_(size)  _SAL1_1_Source_(_Deref_out_z_cap_c_, (size), _Deref_pre_cap_c_(size) _Deref_post_z_)
+#define _Deref_inout_z_cap_c_(size)  _SAL1_1_Source_(_Deref_inout_z_cap_c_, (size), _Deref_pre_z_cap_c_(size) _Deref_post_z_)
+#define _Deref_out_z_bytecap_c_(size)  _SAL1_1_Source_(_Deref_out_z_bytecap_c_, (size), _Deref_pre_bytecap_c_(size) _Deref_post_z_)
+#define _Deref_inout_z_bytecap_c_(size)  _SAL1_1_Source_(_Deref_inout_z_bytecap_c_, (size), _Deref_pre_z_bytecap_c_(size) _Deref_post_z_)
+#define _Deref_inout_z_  _SAL1_1_Source_(_Deref_inout_z_, (), _Deref_prepost_z_)
+
+// #pragma endregion Input Buffer SAL 1 compatibility macros
+
+
+//============================================================================
+//   Implementation Layer:
+//============================================================================
+
+
+// Naming conventions:
+// A symbol the begins with _SA_ is for the machinery of creating any
+// annotations; many of those come from sourceannotations.h in the case
+// of attributes.
+
+// A symbol that ends with _impl is the very lowest level macro.  It is
+// not required to be a legal standalone annotation, and in the case
+// of attribute annotations, usually is not.  (In the case of some declspec
+// annotations, it might be, but it should not be assumed so.)  Those
+// symols will be used in the _PreN..., _PostN... and _RetN... annotations 
+// to build up more complete annotations.
+
+// A symbol ending in _impl_ is reserved to the implementation as well,
+// but it does form a complete annotation; usually they are used to build
+// up even higher level annotations.
+
+
+#if _USE_ATTRIBUTES_FOR_SAL || _USE_DECLSPECS_FOR_SAL // [
+// Sharable "_impl" macros: these can be shared between the various annotation
+// forms but are part of the implementation of the macros.  These are collected
+// here to assure that only necessary differences in the annotations
+// exist.
+
+#define _Always_impl_(annos)            _Group_(annos _SAL_nop_impl_) _On_failure_impl_(annos _SAL_nop_impl_)
+#define _Bound_impl_                    _SA_annotes0(SAL_bound)
+#define _Field_range_impl_(min,max)     _Range_impl_(min,max)
+#define _Literal_impl_                  _SA_annotes1(SAL_constant, __yes)
+#define _Maybenull_impl_                _SA_annotes1(SAL_null, __maybe)
+#define _Maybevalid_impl_               _SA_annotes1(SAL_valid, __maybe)
+#define _Must_inspect_impl_ _Post_impl_ _SA_annotes0(SAL_mustInspect)
+#define _Notliteral_impl_               _SA_annotes1(SAL_constant, __no)
+#define _Notnull_impl_                  _SA_annotes1(SAL_null, __no)
+#define _Notvalid_impl_                 _SA_annotes1(SAL_valid, __no)
+#define _NullNull_terminated_impl_      _Group_(_SA_annotes1(SAL_nullTerminated, __yes) _SA_annotes1(SAL_readableTo,inexpressibleCount("NullNull terminated string")))
+#define _Null_impl_                     _SA_annotes1(SAL_null, __yes)
+#define _Null_terminated_impl_          _SA_annotes1(SAL_nullTerminated, __yes)
+#define _Out_impl_                      _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_c_one_notref_impl) _Post_valid_impl_
+#define _Out_opt_impl_                  _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_c_one_notref_impl) _Post_valid_impl_
+#define _Points_to_data_impl_           _At_(*_Curr_, _SA_annotes1(SAL_mayBePointer, __no))
+#define _Post_satisfies_impl_(cond)     _Post_impl_ _Satisfies_impl_(cond)
+#define _Post_valid_impl_               _Post1_impl_(__valid_impl)
+#define _Pre_satisfies_impl_(cond)      _Pre_impl_ _Satisfies_impl_(cond)
+#define _Pre_valid_impl_                _Pre1_impl_(__valid_impl)
+#define _Range_impl_(min,max)           _SA_annotes2(SAL_range, min, max)
+#define _Readable_bytes_impl_(size)     _SA_annotes1(SAL_readableTo, byteCount(size))
+#define _Readable_elements_impl_(size)  _SA_annotes1(SAL_readableTo, elementCount(size))
+#define _Ret_valid_impl_                _Ret1_impl_(__valid_impl)
+#define _Satisfies_impl_(cond)          _SA_annotes1(SAL_satisfies, cond)
+#define _Valid_impl_                    _SA_annotes1(SAL_valid, __yes)
+#define _Writable_bytes_impl_(size)     _SA_annotes1(SAL_writableTo, byteCount(size))
+#define _Writable_elements_impl_(size)  _SA_annotes1(SAL_writableTo, elementCount(size))
+
+#define _In_range_impl_(min,max)        _Pre_impl_ _Range_impl_(min,max)
+#define _Out_range_impl_(min,max)       _Post_impl_ _Range_impl_(min,max)
+#define _Ret_range_impl_(min,max)       _Post_impl_ _Range_impl_(min,max)
+#define _Deref_in_range_impl_(min,max)  _Deref_pre_impl_ _Range_impl_(min,max)
+#define _Deref_out_range_impl_(min,max) _Deref_post_impl_ _Range_impl_(min,max)
+#define _Deref_ret_range_impl_(min,max) _Deref_post_impl_ _Range_impl_(min,max)
+
+#define _Deref_pre_impl_                _Pre_impl_  _Notref_impl_ _Deref_impl_
+#define _Deref_post_impl_               _Post_impl_ _Notref_impl_ _Deref_impl_
+
+// The following are for the implementation machinery, and are not
+// suitable for annotating general code.
+// We're tying to phase this out, someday.  The parser quotes the param.
+#define __AuToQuOtE                     _SA_annotes0(SAL_AuToQuOtE)
+
+// Normally the parser does some simple type checking of annotation params,
+// defer that check to the plugin.
+#define __deferTypecheck                _SA_annotes0(SAL_deferTypecheck)
+
+#define _SA_SPECSTRIZE( x ) #x
+#define _SAL_nop_impl_       /* nothing */
+#define __nop_impl(x)            x
+#endif
+
+
+#if _USE_ATTRIBUTES_FOR_SAL // [
+
+// Using attributes for sal
+
+#include "codeanalysis\sourceannotations.h"
+
+
+#define _SA_annotes0(n)                [SAL_annotes(Name=#n)]
+#define _SA_annotes1(n,pp1)            [SAL_annotes(Name=#n, p1=_SA_SPECSTRIZE(pp1))]
+#define _SA_annotes2(n,pp1,pp2)        [SAL_annotes(Name=#n, p1=_SA_SPECSTRIZE(pp1), p2=_SA_SPECSTRIZE(pp2))]
+#define _SA_annotes3(n,pp1,pp2,pp3)    [SAL_annotes(Name=#n, p1=_SA_SPECSTRIZE(pp1), p2=_SA_SPECSTRIZE(pp2), p3=_SA_SPECSTRIZE(pp3))]
+
+#define _Pre_impl_                     [SAL_pre]
+#define _Post_impl_                    [SAL_post]
+#define _Deref_impl_                   [SAL_deref]
+#define _Notref_impl_                  [SAL_notref]
+
+
+// Declare a function to be an annotation or primop (respectively).
+// Done this way so that they don't appear in the regular compiler's
+// namespace.
+#define __ANNOTATION(fun)              _SA_annotes0(SAL_annotation)  void __SA_##fun;
+#define __PRIMOP(type, fun)            _SA_annotes0(SAL_primop)  type __SA_##fun;
+#define __QUALIFIER(fun)               _SA_annotes0(SAL_qualifier)  void __SA_##fun;
+
+// Benign declspec needed here for WindowsPREfast
+#define __In_impl_ [SA_Pre(Valid=SA_Yes)] [SA_Pre(Deref=1, Notref=1, Access=SA_Read)] __declspec("SAL_pre SAL_valid")
+
+#elif _USE_DECLSPECS_FOR_SAL // ][
+
+// Using declspecs for sal
+
+#define _SA_annotes0(n)                __declspec(#n)
+#define _SA_annotes1(n,pp1)            __declspec(#n "(" _SA_SPECSTRIZE(pp1) ")" )
+#define _SA_annotes2(n,pp1,pp2)        __declspec(#n "(" _SA_SPECSTRIZE(pp1) "," _SA_SPECSTRIZE(pp2) ")")
+#define _SA_annotes3(n,pp1,pp2,pp3)    __declspec(#n "(" _SA_SPECSTRIZE(pp1) "," _SA_SPECSTRIZE(pp2) "," _SA_SPECSTRIZE(pp3) ")")
+
+#define _Pre_impl_                     _SA_annotes0(SAL_pre)
+#define _Post_impl_                    _SA_annotes0(SAL_post)
+#define _Deref_impl_                   _SA_annotes0(SAL_deref)
+#define _Notref_impl_                  _SA_annotes0(SAL_notref)
+
+// Declare a function to be an annotation or primop (respectively).
+// Done this way so that they don't appear in the regular compiler's
+// namespace.
+#define __ANNOTATION(fun)              _SA_annotes0(SAL_annotation) void __SA_##fun
+ 
+#define __PRIMOP(type, fun)            _SA_annotes0(SAL_primop) type __SA_##fun
+
+#define __QUALIFIER(fun)               _SA_annotes0(SAL_qualifier)  void __SA_##fun;
+
+#define __In_impl_ _Pre_impl_ _SA_annotes0(SAL_valid) _Pre_impl_ _Deref_impl_ _Notref_impl_ _SA_annotes0(SAL_readonly)
+
+#else // ][
+
+// Using "nothing" for sal
+
+#define _SA_annotes0(n)
+#define _SA_annotes1(n,pp1)
+#define _SA_annotes2(n,pp1,pp2)
+#define _SA_annotes3(n,pp1,pp2,pp3)
+
+#define __ANNOTATION(fun)              
+#define __PRIMOP(type, fun)            
+#define __QUALIFIER(type, fun)            
+
+#endif // ]
+
+#if _USE_ATTRIBUTES_FOR_SAL || _USE_DECLSPECS_FOR_SAL // [
+
+// Declare annotations that need to be declared.
+__ANNOTATION(SAL_useHeader(void));
+__ANNOTATION(SAL_bound(void));
+__ANNOTATION(SAL_allocator(void));   //??? resolve with PFD
+__ANNOTATION(SAL_file_parser(__AuToQuOtE __In_impl_ char *, __In_impl_ char *));
+__ANNOTATION(SAL_source_code_content(__In_impl_ char *));
+__ANNOTATION(SAL_analysisHint(__AuToQuOtE __In_impl_ char *));
+__ANNOTATION(SAL_untrusted_data_source(__AuToQuOtE __In_impl_ char *));
+__ANNOTATION(SAL_untrusted_data_source_this(__AuToQuOtE __In_impl_ char *));
+__ANNOTATION(SAL_validated(__AuToQuOtE __In_impl_ char *));
+__ANNOTATION(SAL_validated_this(__AuToQuOtE __In_impl_ char *));
+__ANNOTATION(SAL_encoded(void));
+__ANNOTATION(SAL_adt(__AuToQuOtE __In_impl_ char *, __AuToQuOtE __In_impl_ char *));
+__ANNOTATION(SAL_add_adt_property(__AuToQuOtE __In_impl_ char *, __AuToQuOtE __In_impl_ char *));
+__ANNOTATION(SAL_remove_adt_property(__AuToQuOtE __In_impl_ char *, __AuToQuOtE __In_impl_ char *));
+__ANNOTATION(SAL_transfer_adt_property_from(__AuToQuOtE __In_impl_ char *));
+__ANNOTATION(SAL_post_type(__AuToQuOtE __In_impl_ char *));
+__ANNOTATION(SAL_volatile(void));
+__ANNOTATION(SAL_nonvolatile(void));
+__ANNOTATION(SAL_entrypoint(__AuToQuOtE __In_impl_ char *, __AuToQuOtE __In_impl_ char *));
+__ANNOTATION(SAL_blocksOn(__In_impl_ void*));
+__ANNOTATION(SAL_mustInspect(void));
+
+// Only appears in model files, but needs to be declared.
+__ANNOTATION(SAL_TypeName(__AuToQuOtE __In_impl_ char *));
+
+// To be declared well-known soon.
+__ANNOTATION(SAL_interlocked(void);)
+
+#pragma warning (suppress: 28227 28241)
+__ANNOTATION(SAL_name(__In_impl_ char *, __In_impl_ char *, __In_impl_ char *);)
+
+__PRIMOP(char *, _Macro_value_(__In_impl_ char *));
+__PRIMOP(int, _Macro_defined_(__In_impl_ char *));
+__PRIMOP(char *, _Strstr_(__In_impl_ char *, __In_impl_ char *));
+
+#endif // ]
+
+#if _USE_ATTRIBUTES_FOR_SAL // [
+
+#define _Check_return_impl_           [SA_Post(MustCheck=SA_Yes)]
+
+#define _Success_impl_(expr)          [SA_Success(Condition=#expr)]
+#define _On_failure_impl_(annos)      [SAL_context(p1="SAL_failed")] _Group_(_Post_impl_ _Group_(annos _SAL_nop_impl_))
+
+#define _Printf_format_string_impl_   [SA_FormatString(Style="printf")]
+#define _Scanf_format_string_impl_    [SA_FormatString(Style="scanf")]
+#define _Scanf_s_format_string_impl_  [SA_FormatString(Style="scanf_s")]
+
+#define _In_bound_impl_               [SA_PreBound(Deref=0)]
+#define _Out_bound_impl_              [SA_PostBound(Deref=0)]
+#define _Ret_bound_impl_              [SA_PostBound(Deref=0)]
+#define _Deref_in_bound_impl_         [SA_PreBound(Deref=1)]
+#define _Deref_out_bound_impl_        [SA_PostBound(Deref=1)]
+#define _Deref_ret_bound_impl_        [SA_PostBound(Deref=1)]
+
+#define __valid_impl                  Valid=SA_Yes
+#define __maybevalid_impl             Valid=SA_Maybe
+#define __notvalid_impl               Valid=SA_No
+
+#define __null_impl                   Null=SA_Yes
+#define __maybenull_impl              Null=SA_Maybe
+#define __notnull_impl                Null=SA_No
+
+#define __null_impl_notref        Null=SA_Yes,Notref=1
+#define __maybenull_impl_notref   Null=SA_Maybe,Notref=1
+#define __notnull_impl_notref     Null=SA_No,Notref=1
+
+#define __zterm_impl              NullTerminated=SA_Yes
+#define __maybezterm_impl         NullTerminated=SA_Maybe
+#define __maybzterm_impl          NullTerminated=SA_Maybe
+#define __notzterm_impl           NullTerminated=SA_No
+
+#define __readaccess_impl         Access=SA_Read
+#define __writeaccess_impl        Access=SA_Write
+#define __allaccess_impl          Access=SA_ReadWrite
+
+#define __readaccess_impl_notref  Access=SA_Read,Notref=1
+#define __writeaccess_impl_notref Access=SA_Write,Notref=1
+#define __allaccess_impl_notref   Access=SA_ReadWrite,Notref=1
+
+#if _MSC_VER >= 1610 /*IFSTRIP=IGN*/ // [
+
+// For SAL2, we need to expect general expressions.
+
+#define __cap_impl(size)          WritableElements="\n"#size
+#define __bytecap_impl(size)      WritableBytes="\n"#size
+#define __bytecount_impl(size)    ValidBytes="\n"#size
+#define __count_impl(size)        ValidElements="\n"#size
+
+#else // ][
+
+#define __cap_impl(size)          WritableElements=#size
+#define __bytecap_impl(size)      WritableBytes=#size
+#define __bytecount_impl(size)    ValidBytes=#size
+#define __count_impl(size)        ValidElements=#size
+
+#endif // ]
+
+#define __cap_c_impl(size)        WritableElementsConst=size
+#define __cap_c_one_notref_impl   WritableElementsConst=1,Notref=1
+#define __cap_for_impl(param)     WritableElementsLength=#param
+#define __cap_x_impl(size)        WritableElements="\n@"#size
+
+#define __bytecap_c_impl(size)    WritableBytesConst=size
+#define __bytecap_x_impl(size)    WritableBytes="\n@"#size
+
+#define __mult_impl(mult,size)    __cap_impl((mult)*(size))
+
+#define __count_c_impl(size)      ValidElementsConst=size
+#define __count_x_impl(size)      ValidElements="\n@"#size
+
+#define __bytecount_c_impl(size)  ValidBytesConst=size
+#define __bytecount_x_impl(size)  ValidBytes="\n@"#size
+
+
+#define _At_impl_(target, annos)       [SAL_at(p1=#target)] _Group_(annos)
+#define _At_buffer_impl_(target, iter, bound, annos)  [SAL_at_buffer(p1=#target, p2=#iter, p3=#bound)] _Group_(annos)
+#define _When_impl_(expr, annos)       [SAL_when(p1=#expr)] _Group_(annos)
+
+#define _Group_impl_(annos)            [SAL_begin] annos [SAL_end]
+#define _GrouP_impl_(annos)            [SAL_BEGIN] annos [SAL_END]
+
+#define _Use_decl_anno_impl_               _SA_annotes0(SAL_useHeader) // this is a special case!
+
+#define _Pre1_impl_(p1)                    [SA_Pre(p1)]
+#define _Pre2_impl_(p1,p2)                 [SA_Pre(p1,p2)]
+#define _Pre3_impl_(p1,p2,p3)              [SA_Pre(p1,p2,p3)]
+
+#define _Post1_impl_(p1)                   [SA_Post(p1)]
+#define _Post2_impl_(p1,p2)                [SA_Post(p1,p2)]
+#define _Post3_impl_(p1,p2,p3)             [SA_Post(p1,p2,p3)]
+
+#define _Ret1_impl_(p1)                    [SA_Post(p1)]
+#define _Ret2_impl_(p1,p2)                 [SA_Post(p1,p2)]
+#define _Ret3_impl_(p1,p2,p3)              [SA_Post(p1,p2,p3)]
+
+#define _Deref_pre1_impl_(p1)              [SA_Pre(Deref=1,p1)]
+#define _Deref_pre2_impl_(p1,p2)           [SA_Pre(Deref=1,p1,p2)]
+#define _Deref_pre3_impl_(p1,p2,p3)        [SA_Pre(Deref=1,p1,p2,p3)]
+
+
+#define _Deref_post1_impl_(p1)             [SA_Post(Deref=1,p1)]
+#define _Deref_post2_impl_(p1,p2)          [SA_Post(Deref=1,p1,p2)]
+#define _Deref_post3_impl_(p1,p2,p3)       [SA_Post(Deref=1,p1,p2,p3)]
+
+#define _Deref_ret1_impl_(p1)              [SA_Post(Deref=1,p1)]
+#define _Deref_ret2_impl_(p1,p2)           [SA_Post(Deref=1,p1,p2)]
+#define _Deref_ret3_impl_(p1,p2,p3)        [SA_Post(Deref=1,p1,p2,p3)]
+
+#define _Deref2_pre1_impl_(p1)             [SA_Pre(Deref=2,Notref=1,p1)]
+#define _Deref2_post1_impl_(p1)            [SA_Post(Deref=2,Notref=1,p1)]
+#define _Deref2_ret1_impl_(p1)             [SA_Post(Deref=2,Notref=1,p1)]
+
+// Obsolete -- may be needed for transition to attributes.
+#define __inner_typefix(ctype)             [SAL_typefix(p1=_SA_SPECSTRIZE(ctype))]
+#define __inner_exceptthat                 [SAL_except]
+
+
+#elif _USE_DECLSPECS_FOR_SAL // ][
+
+#define _Check_return_impl_ __post      _SA_annotes0(SAL_checkReturn)
+
+#define _Success_impl_(expr)            _SA_annotes1(SAL_success, expr)
+#define _On_failure_impl_(annos)        _SA_annotes1(SAL_context, SAL_failed) _Group_(_Post_impl_ _Group_(_SAL_nop_impl_ annos))
+
+#define _Printf_format_string_impl_     _SA_annotes1(SAL_IsFormatString, "printf")
+#define _Scanf_format_string_impl_      _SA_annotes1(SAL_IsFormatString, "scanf")
+#define _Scanf_s_format_string_impl_    _SA_annotes1(SAL_IsFormatString, "scanf_s")
+
+#define _In_bound_impl_                 _Pre_impl_ _Bound_impl_
+#define _Out_bound_impl_                _Post_impl_ _Bound_impl_
+#define _Ret_bound_impl_                _Post_impl_ _Bound_impl_
+#define _Deref_in_bound_impl_           _Deref_pre_impl_ _Bound_impl_
+#define _Deref_out_bound_impl_          _Deref_post_impl_ _Bound_impl_
+#define _Deref_ret_bound_impl_          _Deref_post_impl_ _Bound_impl_
+
+
+#define __null_impl              _SA_annotes0(SAL_null) // _SA_annotes1(SAL_null, __yes)
+#define __notnull_impl           _SA_annotes0(SAL_notnull) // _SA_annotes1(SAL_null, __no)
+#define __maybenull_impl         _SA_annotes0(SAL_maybenull) // _SA_annotes1(SAL_null, __maybe)
+
+#define __valid_impl             _SA_annotes0(SAL_valid) // _SA_annotes1(SAL_valid, __yes)
+#define __notvalid_impl          _SA_annotes0(SAL_notvalid) // _SA_annotes1(SAL_valid, __no)
+#define __maybevalid_impl        _SA_annotes0(SAL_maybevalid) // _SA_annotes1(SAL_valid, __maybe)
+
+#define __null_impl_notref       _Notref_ _Null_impl_
+#define __maybenull_impl_notref  _Notref_ _Maybenull_impl_
+#define __notnull_impl_notref    _Notref_ _Notnull_impl_
+
+#define __zterm_impl             _SA_annotes1(SAL_nullTerminated, __yes)
+#define __maybezterm_impl        _SA_annotes1(SAL_nullTerminated, __maybe)
+#define __maybzterm_impl         _SA_annotes1(SAL_nullTerminated, __maybe)
+#define __notzterm_impl          _SA_annotes1(SAL_nullTerminated, __no)
+
+#define __readaccess_impl        _SA_annotes1(SAL_access, 0x1)
+#define __writeaccess_impl       _SA_annotes1(SAL_access, 0x2)
+#define __allaccess_impl         _SA_annotes1(SAL_access, 0x3)
+
+#define __readaccess_impl_notref  _Notref_ _SA_annotes1(SAL_access, 0x1)
+#define __writeaccess_impl_notref _Notref_ _SA_annotes1(SAL_access, 0x2)
+#define __allaccess_impl_notref   _Notref_ _SA_annotes1(SAL_access, 0x3)
+
+#define __cap_impl(size)         _SA_annotes1(SAL_writableTo,elementCount(size))
+#define __cap_c_impl(size)       _SA_annotes1(SAL_writableTo,elementCount(size))
+#define __cap_c_one_notref_impl  _Notref_ _SA_annotes1(SAL_writableTo,elementCount(1))
+#define __cap_for_impl(param)    _SA_annotes1(SAL_writableTo,inexpressibleCount(sizeof(param)))
+#define __cap_x_impl(size)       _SA_annotes1(SAL_writableTo,inexpressibleCount(#size))
+
+#define __bytecap_impl(size)     _SA_annotes1(SAL_writableTo,byteCount(size))
+#define __bytecap_c_impl(size)   _SA_annotes1(SAL_writableTo,byteCount(size))
+#define __bytecap_x_impl(size)   _SA_annotes1(SAL_writableTo,inexpressibleCount(#size))
+
+#define __mult_impl(mult,size)   _SA_annotes1(SAL_writableTo,(mult)*(size))
+
+#define __count_impl(size)       _SA_annotes1(SAL_readableTo,elementCount(size))
+#define __count_c_impl(size)     _SA_annotes1(SAL_readableTo,elementCount(size))
+#define __count_x_impl(size)     _SA_annotes1(SAL_readableTo,inexpressibleCount(#size))
+
+#define __bytecount_impl(size)   _SA_annotes1(SAL_readableTo,byteCount(size))
+#define __bytecount_c_impl(size) _SA_annotes1(SAL_readableTo,byteCount(size))
+#define __bytecount_x_impl(size) _SA_annotes1(SAL_readableTo,inexpressibleCount(#size))
+
+#define _At_impl_(target, annos)     _SA_annotes0(SAL_at(target)) _Group_(annos)
+#define _At_buffer_impl_(target, iter, bound, annos)  _SA_annotes3(SAL_at_buffer, target, iter, bound) _Group_(annos)
+#define _Group_impl_(annos)          _SA_annotes0(SAL_begin) annos _SA_annotes0(SAL_end)
+#define _GrouP_impl_(annos)          _SA_annotes0(SAL_BEGIN) annos _SA_annotes0(SAL_END)
+#define _When_impl_(expr, annos)     _SA_annotes0(SAL_when(expr)) _Group_(annos)
+
+#define _Use_decl_anno_impl_         __declspec("SAL_useHeader()") // this is a special case!
+
+#define _Pre1_impl_(p1)              _Pre_impl_ p1
+#define _Pre2_impl_(p1,p2)           _Pre_impl_ p1 _Pre_impl_ p2
+#define _Pre3_impl_(p1,p2,p3)        _Pre_impl_ p1 _Pre_impl_ p2 _Pre_impl_ p3
+
+#define _Post1_impl_(p1)             _Post_impl_ p1
+#define _Post2_impl_(p1,p2)          _Post_impl_ p1 _Post_impl_ p2
+#define _Post3_impl_(p1,p2,p3)       _Post_impl_ p1 _Post_impl_ p2 _Post_impl_ p3
+
+#define _Ret1_impl_(p1)              _Post_impl_ p1
+#define _Ret2_impl_(p1,p2)           _Post_impl_ p1 _Post_impl_ p2
+#define _Ret3_impl_(p1,p2,p3)        _Post_impl_ p1 _Post_impl_ p2 _Post_impl_ p3
+
+#define _Deref_pre1_impl_(p1)        _Deref_pre_impl_ p1
+#define _Deref_pre2_impl_(p1,p2)     _Deref_pre_impl_ p1 _Deref_pre_impl_ p2
+#define _Deref_pre3_impl_(p1,p2,p3)  _Deref_pre_impl_ p1 _Deref_pre_impl_ p2 _Deref_pre_impl_ p3
+
+#define _Deref_post1_impl_(p1)       _Deref_post_impl_ p1
+#define _Deref_post2_impl_(p1,p2)    _Deref_post_impl_ p1 _Deref_post_impl_ p2
+#define _Deref_post3_impl_(p1,p2,p3) _Deref_post_impl_ p1 _Deref_post_impl_ p2 _Deref_post_impl_ p3
+
+#define _Deref_ret1_impl_(p1)        _Deref_post_impl_ p1
+#define _Deref_ret2_impl_(p1,p2)     _Deref_post_impl_ p1 _Deref_post_impl_ p2
+#define _Deref_ret3_impl_(p1,p2,p3)  _Deref_post_impl_ p1 _Deref_post_impl_ p2 _Deref_post_impl_ p3
+
+#define _Deref2_pre1_impl_(p1)       _Deref_pre_impl_ _Notref_impl_ _Deref_impl_ p1
+#define _Deref2_post1_impl_(p1)      _Deref_post_impl_ _Notref_impl_ _Deref_impl_ p1
+#define _Deref2_ret1_impl_(p1)       _Deref_post_impl_ _Notref_impl_ _Deref_impl_ p1
+
+#define __inner_typefix(ctype)             _SA_annotes1(SAL_typefix, ctype)
+#define __inner_exceptthat                 _SA_annotes0(SAL_except)
+
+#elif defined(_MSC_EXTENSIONS) && !defined( MIDL_PASS ) && !defined(__midl) && !defined(RC_INVOKED) && defined(_PFT_VER) && _MSC_VER >= 1400 /*IFSTRIP=IGN*/ // ][
+
+// minimum attribute expansion for foreground build
+
+#pragma push_macro( "SA" )
+#pragma push_macro( "REPEATABLE" )
+
+#ifdef __cplusplus // [
+#define SA( id ) id
+#define REPEATABLE [repeatable]
+#else  // !__cplusplus // ][
+#define SA( id ) SA_##id
+#define REPEATABLE
+#endif  // !__cplusplus // ]
+
+REPEATABLE
+[source_annotation_attribute( SA( Parameter ) )]
+struct __P_impl
+{
+#ifdef __cplusplus // [
+    __P_impl();
+#endif // ]
+   int __d_;
+};
+typedef struct __P_impl __P_impl;
+
+REPEATABLE
+[source_annotation_attribute( SA( ReturnValue ) )]
+struct __R_impl
+{
+#ifdef __cplusplus // [
+    __R_impl();
+#endif // ]
+   int __d_;
+};
+typedef struct __R_impl __R_impl;
+
+[source_annotation_attribute( SA( Method ) )]
+struct __M_
+{
+#ifdef __cplusplus // [
+    __M_();
+#endif // ]
+   int __d_;
+};
+typedef struct __M_ __M_;
+
+[source_annotation_attribute( SA( All ) )]
+struct __A_
+{
+#ifdef __cplusplus // [
+    __A_();
+#endif // ]
+   int __d_;
+};
+typedef struct __A_ __A_;
+
+[source_annotation_attribute( SA( Field ) )]
+struct __F_
+{
+#ifdef __cplusplus // [
+    __F_();
+#endif // ]
+   int __d_;
+};
+typedef struct __F_ __F_;
+
+#pragma pop_macro( "REPEATABLE" )
+#pragma pop_macro( "SA" )
+
+
+#define _SAL_nop_impl_
+
+#define _At_impl_(target, annos)        [__A_(__d_=0)]
+#define _At_buffer_impl_(target, iter, bound, annos)  [__A_(__d_=0)]
+#define _When_impl_(expr, annos)        annos
+#define _Group_impl_(annos)             annos
+#define _GrouP_impl_(annos)             annos
+#define _Use_decl_anno_impl_            [__M_(__d_=0)]
+
+#define _Points_to_data_impl_           [__P_impl(__d_=0)]
+#define _Literal_impl_                  [__P_impl(__d_=0)]
+#define _Notliteral_impl_               [__P_impl(__d_=0)]
+
+#define _Pre_valid_impl_                [__P_impl(__d_=0)]
+#define _Post_valid_impl_               [__P_impl(__d_=0)]
+#define _Ret_valid_impl_                [__R_impl(__d_=0)]
+
+#define _Check_return_impl_             [__R_impl(__d_=0)]
+#define _Must_inspect_impl_             [__R_impl(__d_=0)]
+
+#define _Success_impl_(expr)            [__M_(__d_=0)]
+#define _On_failure_impl_(expr)         [__M_(__d_=0)]
+#define _Always_impl_(expr)             [__M_(__d_=0)]
+
+#define _Printf_format_string_impl_     [__P_impl(__d_=0)]
+#define _Scanf_format_string_impl_      [__P_impl(__d_=0)]
+#define _Scanf_s_format_string_impl_    [__P_impl(__d_=0)]
+
+#define _Raises_SEH_exception_impl_         [__M_(__d_=0)]
+#define _Maybe_raises_SEH_exception_impl_   [__M_(__d_=0)]
+
+#define _In_bound_impl_                 [__P_impl(__d_=0)]
+#define _Out_bound_impl_                [__P_impl(__d_=0)]
+#define _Ret_bound_impl_                [__R_impl(__d_=0)]
+#define _Deref_in_bound_impl_           [__P_impl(__d_=0)]
+#define _Deref_out_bound_impl_          [__P_impl(__d_=0)]
+#define _Deref_ret_bound_impl_          [__R_impl(__d_=0)]
+
+#define _Range_impl_(min,max)           [__P_impl(__d_=0)]
+#define _In_range_impl_(min,max)        [__P_impl(__d_=0)]
+#define _Out_range_impl_(min,max)       [__P_impl(__d_=0)]
+#define _Ret_range_impl_(min,max)       [__R_impl(__d_=0)]
+#define _Deref_in_range_impl_(min,max)  [__P_impl(__d_=0)]
+#define _Deref_out_range_impl_(min,max) [__P_impl(__d_=0)]
+#define _Deref_ret_range_impl_(min,max) [__R_impl(__d_=0)]
+
+#define _Field_range_impl_(min,max)     [__F_(__d_=0)]
+
+#define _Pre_satisfies_impl_(cond)      [__A_(__d_=0)]
+#define _Post_satisfies_impl_(cond)     [__A_(__d_=0)]
+#define _Satisfies_impl_(cond)          [__A_(__d_=0)]
+
+#define _Null_impl_                     [__A_(__d_=0)]
+#define _Notnull_impl_                  [__A_(__d_=0)]
+#define _Maybenull_impl_                [__A_(__d_=0)]
+
+#define _Valid_impl_                    [__A_(__d_=0)]
+#define _Notvalid_impl_                 [__A_(__d_=0)]
+#define _Maybevalid_impl_               [__A_(__d_=0)]
+
+#define _Readable_bytes_impl_(size)     [__A_(__d_=0)]
+#define _Readable_elements_impl_(size)  [__A_(__d_=0)]
+#define _Writable_bytes_impl_(size)     [__A_(__d_=0)]
+#define _Writable_elements_impl_(size)  [__A_(__d_=0)]
+
+#define _Null_terminated_impl_          [__A_(__d_=0)]
+#define _NullNull_terminated_impl_      [__A_(__d_=0)]
+
+#define _Pre_impl_                      [__P_impl(__d_=0)]
+#define _Pre1_impl_(p1)                 [__P_impl(__d_=0)]
+#define _Pre2_impl_(p1,p2)              [__P_impl(__d_=0)]
+#define _Pre3_impl_(p1,p2,p3)           [__P_impl(__d_=0)]
+
+#define _Post_impl_                     [__P_impl(__d_=0)]
+#define _Post1_impl_(p1)                [__P_impl(__d_=0)]
+#define _Post2_impl_(p1,p2)             [__P_impl(__d_=0)]
+#define _Post3_impl_(p1,p2,p3)          [__P_impl(__d_=0)]
+
+#define _Ret1_impl_(p1)                 [__R_impl(__d_=0)]
+#define _Ret2_impl_(p1,p2)              [__R_impl(__d_=0)]
+#define _Ret3_impl_(p1,p2,p3)           [__R_impl(__d_=0)]
+
+#define _Deref_pre1_impl_(p1)           [__P_impl(__d_=0)]
+#define _Deref_pre2_impl_(p1,p2)        [__P_impl(__d_=0)]
+#define _Deref_pre3_impl_(p1,p2,p3)     [__P_impl(__d_=0)]
+
+#define _Deref_post1_impl_(p1)          [__P_impl(__d_=0)]
+#define _Deref_post2_impl_(p1,p2)       [__P_impl(__d_=0)]
+#define _Deref_post3_impl_(p1,p2,p3)    [__P_impl(__d_=0)]
+
+#define _Deref_ret1_impl_(p1)           [__R_impl(__d_=0)]
+#define _Deref_ret2_impl_(p1,p2)        [__R_impl(__d_=0)]
+#define _Deref_ret3_impl_(p1,p2,p3)     [__R_impl(__d_=0)]
+
+#define _Deref2_pre1_impl_(p1)          //[__P_impl(__d_=0)]
+#define _Deref2_post1_impl_(p1)         //[__P_impl(__d_=0)]
+#define _Deref2_ret1_impl_(p1)          //[__P_impl(__d_=0)]
+
+#else // ][
+
+
+#define _SAL_nop_impl_ X
+
+#define _At_impl_(target, annos)
+#define _When_impl_(expr, annos)
+#define _Group_impl_(annos)
+#define _GrouP_impl_(annos)
+#define _At_buffer_impl_(target, iter, bound, annos)
+#define _Use_decl_anno_impl_
+#define _Points_to_data_impl_
+#define _Literal_impl_
+#define _Notliteral_impl_
+#define _Notref_impl_
+
+#define _Pre_valid_impl_
+#define _Post_valid_impl_
+#define _Ret_valid_impl_
+
+#define _Check_return_impl_
+#define _Must_inspect_impl_
+
+#define _Success_impl_(expr)
+#define _On_failure_impl_(annos)
+#define _Always_impl_(annos)
+
+#define _Printf_format_string_impl_
+#define _Scanf_format_string_impl_
+#define _Scanf_s_format_string_impl_
+
+#define _In_bound_impl_
+#define _Out_bound_impl_
+#define _Ret_bound_impl_
+#define _Deref_in_bound_impl_
+#define _Deref_out_bound_impl_
+#define _Deref_ret_bound_impl_
+
+#define _Range_impl_(min,max)
+#define _In_range_impl_(min,max)
+#define _Out_range_impl_(min,max)
+#define _Ret_range_impl_(min,max)
+#define _Deref_in_range_impl_(min,max)
+#define _Deref_out_range_impl_(min,max)
+#define _Deref_ret_range_impl_(min,max)
+
+#define _Satisfies_impl_(expr)
+#define _Pre_satisfies_impl_(expr)
+#define _Post_satisfies_impl_(expr)
+
+#define _Null_impl_
+#define _Notnull_impl_
+#define _Maybenull_impl_
+
+#define _Valid_impl_
+#define _Notvalid_impl_
+#define _Maybevalid_impl_
+
+#define _Field_range_impl_(min,max)
+
+#define _Pre_impl_
+#define _Pre1_impl_(p1)
+#define _Pre2_impl_(p1,p2)
+#define _Pre3_impl_(p1,p2,p3)
+
+#define _Post_impl_
+#define _Post1_impl_(p1)       
+#define _Post2_impl_(p1,p2)
+#define _Post3_impl_(p1,p2,p3)
+
+#define _Ret1_impl_(p1)      
+#define _Ret2_impl_(p1,p2)
+#define _Ret3_impl_(p1,p2,p3)
+
+#define _Deref_pre1_impl_(p1)       
+#define _Deref_pre2_impl_(p1,p2)
+#define _Deref_pre3_impl_(p1,p2,p3)
+
+#define _Deref_post1_impl_(p1)
+#define _Deref_post2_impl_(p1,p2)
+#define _Deref_post3_impl_(p1,p2,p3)
+
+#define _Deref_ret1_impl_(p1)
+#define _Deref_ret2_impl_(p1,p2)
+#define _Deref_ret3_impl_(p1,p2,p3)
+
+#define _Deref2_pre1_impl_(p1)
+#define _Deref2_post1_impl_(p1)
+#define _Deref2_ret1_impl_(p1)
+
+#define _Readable_bytes_impl_(size)
+#define _Readable_elements_impl_(size)
+#define _Writable_bytes_impl_(size)
+#define _Writable_elements_impl_(size)
+
+#define _Null_terminated_impl_
+#define _NullNull_terminated_impl_
+
+// Obsolete -- may be needed for transition to attributes.
+#define __inner_typefix(ctype)
+#define __inner_exceptthat
+
+#endif // ]
+
+// This section contains the deprecated annotations
+
+/* 
+ -------------------------------------------------------------------------------
+ Introduction
+
+ sal.h provides a set of annotations to describe how a function uses its
+ parameters - the assumptions it makes about them, and the guarantees it makes
+ upon finishing.
+
+ Annotations may be placed before either a function parameter's type or its return
+ type, and describe the function's behavior regarding the parameter or return value.
+ There are two classes of annotations: buffer annotations and advanced annotations.
+ Buffer annotations describe how functions use their pointer parameters, and
+ advanced annotations either describe complex/unusual buffer behavior, or provide
+ additional information about a parameter that is not otherwise expressible.
+
+ -------------------------------------------------------------------------------
+ Buffer Annotations
+
+ The most important annotations in sal.h provide a consistent way to annotate
+ buffer parameters or return values for a function. Each of these annotations describes
+ a single buffer (which could be a string, a fixed-length or variable-length array,
+ or just a pointer) that the function interacts with: where it is, how large it is,
+ how much is initialized, and what the function does with it.
+
+ The appropriate macro for a given buffer can be constructed using the table below.
+ Just pick the appropriate values from each category, and combine them together
+ with a leading underscore. Some combinations of values do not make sense as buffer
+ annotations. Only meaningful annotations can be added to your code; for a list of
+ these, see the buffer annotation definitions section.
+
+ Only a single buffer annotation should be used for each parameter.
+
+ |------------|------------|---------|--------|----------|----------|---------------|
+ |   Level    |   Usage    |  Size   | Output | NullTerm | Optional |  Parameters   |
+ |------------|------------|---------|--------|----------|----------|---------------|
+ | <>         | <>         | <>      | <>     | _z       | <>       | <>            |
+ | _deref     | _in        | _ecount | _full  | _nz      | _opt     | (size)        |
+ | _deref_opt | _out       | _bcount | _part  |          |          | (size,length) |
+ |            | _inout     |         |        |          |          |               |
+ |            |            |         |        |          |          |               |
+ |------------|------------|---------|--------|----------|----------|---------------|
+
+ Level: Describes the buffer pointer's level of indirection from the parameter or
+          return value 'p'.
+
+ <>         : p is the buffer pointer.
+ _deref     : *p is the buffer pointer. p must not be NULL.
+ _deref_opt : *p may be the buffer pointer. p may be NULL, in which case the rest of
+                the annotation is ignored.
+
+ Usage: Describes how the function uses the buffer.
+
+ <>     : The buffer is not accessed. If used on the return value or with _deref, the
+            function will provide the buffer, and it will be uninitialized at exit.
+            Otherwise, the caller must provide the buffer. This should only be used
+            for alloc and free functions.
+ _in    : The function will only read from the buffer. The caller must provide the
+            buffer and initialize it. Cannot be used with _deref.
+ _out   : The function will only write to the buffer. If used on the return value or
+            with _deref, the function will provide the buffer and initialize it.
+            Otherwise, the caller must provide the buffer, and the function will
+            initialize it.
+ _inout : The function may freely read from and write to the buffer. The caller must
+            provide the buffer and initialize it. If used with _deref, the buffer may
+            be reallocated by the function.
+
+ Size: Describes the total size of the buffer. This may be less than the space actually
+         allocated for the buffer, in which case it describes the accessible amount.
+
+ <>      : No buffer size is given. If the type specifies the buffer size (such as
+             with LPSTR and LPWSTR), that amount is used. Otherwise, the buffer is one
+             element long. Must be used with _in, _out, or _inout.
+ _ecount : The buffer size is an explicit element count.
+ _bcount : The buffer size is an explicit byte count.
+
+ Output: Describes how much of the buffer will be initialized by the function. For
+           _inout buffers, this also describes how much is initialized at entry. Omit this
+           category for _in buffers; they must be fully initialized by the caller.
+
+ <>    : The type specifies how much is initialized. For instance, a function initializing
+           an LPWSTR must NULL-terminate the string.
+ _full : The function initializes the entire buffer.
+ _part : The function initializes part of the buffer, and explicitly indicates how much.
+
+ NullTerm: States if the present of a '\0' marks the end of valid elements in the buffer.
+ _z    : A '\0' indicated the end of the buffer
+ _nz     : The buffer may not be null terminated and a '\0' does not indicate the end of the
+          buffer.
+ Optional: Describes if the buffer itself is optional.
+
+ <>   : The pointer to the buffer must not be NULL.
+ _opt : The pointer to the buffer might be NULL. It will be checked before being dereferenced.
+
+ Parameters: Gives explicit counts for the size and length of the buffer.
+
+ <>            : There is no explicit count. Use when neither _ecount nor _bcount is used.
+ (size)        : Only the buffer's total size is given. Use with _ecount or _bcount but not _part.
+ (size,length) : The buffer's total size and initialized length are given. Use with _ecount_part
+                   and _bcount_part.
+
+ -------------------------------------------------------------------------------
+ Buffer Annotation Examples
+
+ LWSTDAPI_(BOOL) StrToIntExA(
+     __in LPCSTR pszString,
+     DWORD dwFlags,
+     __out int *piRet                     -- A pointer whose dereference will be filled in.
+ );
+
+ void MyPaintingFunction(
+     __in HWND hwndControl,               -- An initialized read-only parameter.
+     __in_opt HDC hdcOptional,            -- An initialized read-only parameter that might be NULL.
+     __inout IPropertyStore *ppsStore     -- An initialized parameter that may be freely used
+                                          --   and modified.
+ );
+
+ LWSTDAPI_(BOOL) PathCompactPathExA(
+     __out_ecount(cchMax) LPSTR pszOut,   -- A string buffer with cch elements that will
+                                          --   be NULL terminated on exit.
+     __in LPCSTR pszSrc,
+     UINT cchMax,
+     DWORD dwFlags
+ );
+
+ HRESULT SHLocalAllocBytes(
+     size_t cb,
+     __deref_bcount(cb) T **ppv           -- A pointer whose dereference will be set to an
+                                          --   uninitialized buffer with cb bytes.
+ );
+
+ __inout_bcount_full(cb) : A buffer with cb elements that is fully initialized at
+     entry and exit, and may be written to by this function.
+
+ __out_ecount_part(count, *countOut) : A buffer with count elements that will be
+     partially initialized by this function. The function indicates how much it
+     initialized by setting *countOut.
+
+ -------------------------------------------------------------------------------
+ Advanced Annotations
+
+ Advanced annotations describe behavior that is not expressible with the regular
+ buffer macros. These may be used either to annotate buffer parameters that involve
+ complex or conditional behavior, or to enrich existing annotations with additional
+ information.
+
+ __success(expr) f :
+     <expr> indicates whether function f succeeded or not. If <expr> is true at exit,
+     all the function's guarantees (as given by other annotations) must hold. If <expr>
+     is false at exit, the caller should not expect any of the function's guarantees
+     to hold. If not used, the function must always satisfy its guarantees. Added
+     automatically to functions that indicate success in standard ways, such as by
+     returning an HRESULT.
+
+ __nullterminated p :
+     Pointer p is a buffer that may be read or written up to and including the first
+     NULL character or pointer. May be used on typedefs, which marks valid (properly
+     initialized) instances of that type as being NULL-terminated.
+
+ __nullnullterminated p :
+     Pointer p is a buffer that may be read or written up to and including the first
+     sequence of two NULL characters or pointers. May be used on typedefs, which marks
+     valid instances of that type as being double-NULL terminated.
+
+ __reserved v :
+     Value v must be 0/NULL, reserved for future use.
+
+ __checkReturn v :
+     Return value v must not be ignored by callers of this function.
+
+ __typefix(ctype) v :
+     Value v should be treated as an instance of ctype, rather than its declared type.
+
+ __override f :
+     Specify C#-style 'override' behaviour for overriding virtual methods.
+
+ __callback f :
+     Function f can be used as a function pointer.
+
+ __format_string p :
+     Pointer p is a string that contains % markers in the style of printf.
+
+ __blocksOn(resource) f :
+     Function f blocks on the resource 'resource'.
+
+ __fallthrough :
+     Annotates switch statement labels where fall-through is desired, to distinguish
+     from forgotten break statements.
+
+ -------------------------------------------------------------------------------
+ Advanced Annotation Examples
+
+ __success(return != FALSE) LWSTDAPI_(BOOL) 
+ PathCanonicalizeA(__out_ecount(MAX_PATH) LPSTR pszBuf, LPCSTR pszPath) :
+    pszBuf is only guaranteed to be NULL-terminated when TRUE is returned.
+
+ typedef __nullterminated WCHAR* LPWSTR : Initialized LPWSTRs are NULL-terminated strings.
+
+ __out_ecount(cch) __typefix(LPWSTR) void *psz : psz is a buffer parameter which will be
+     a NULL-terminated WCHAR string at exit, and which initially contains cch WCHARs.
+
+ -------------------------------------------------------------------------------
+*/
+
+#define __specstrings
+
+#ifdef  __cplusplus // [
+#ifndef __nothrow // [
+# define __nothrow __declspec(nothrow)
+#endif // ]
+extern "C" {
+#else // ][
+#ifndef __nothrow // [
+# define __nothrow
+#endif // ]
+#endif  /* #ifdef __cplusplus */ // ]
+
+
+/*
+ -------------------------------------------------------------------------------
+ Helper Macro Definitions
+
+ These express behavior common to many of the high-level annotations.
+ DO NOT USE THESE IN YOUR CODE.
+ -------------------------------------------------------------------------------
+*/
+
+/*
+    The helper annotations are only understood by the compiler version used by 
+    various defect detection tools. When the regular compiler is running, they 
+    are defined into nothing, and do not affect the compiled code.
+*/
+
+#if !defined(__midl) && defined(_PREFAST_) // [
+
+    /*
+     In the primitive "SAL_*" annotations "SAL" stands for Standard
+     Annotation Language.  These "SAL_*" annotations are the
+     primitives the compiler understands and high-level MACROs
+     will decompose into these primivates.
+    */
+
+    #define _SA_SPECSTRIZE( x ) #x
+
+    /*
+     __null p
+     __notnull p
+     __maybenull p
+    
+     Annotates a pointer p. States that pointer p is null. Commonly used
+     in the negated form __notnull or the possibly null form __maybenull.
+    */
+
+#ifndef PAL_STDCPP_COMPAT
+    #define __null                  _Null_impl_
+    #define __notnull               _Notnull_impl_
+    #define __maybenull             _Maybenull_impl_
+#endif // !PAL_STDCPP_COMPAT
+
+    /*
+     __readonly l
+     __notreadonly l
+     __mabyereadonly l
+    
+     Annotates a location l. States that location l is not modified after
+     this point.  If the annotation is placed on the precondition state of
+     a function, the restriction only applies until the postcondition state
+     of the function.  __maybereadonly states that the annotated location
+     may be modified, whereas __notreadonly states that a location must be
+     modified.
+    */
+
+    #define __readonly              _Pre1_impl_(__readaccess_impl)
+    #define __notreadonly           _Pre1_impl_(__allaccess_impl)
+    #define __maybereadonly         _Pre1_impl_(__readaccess_impl)
+
+    /*
+     __valid v
+     __notvalid v
+     __maybevalid v
+    
+     Annotates any value v. States that the value satisfies all properties of
+     valid values of its type. For example, for a string buffer, valid means
+     that the buffer pointer is either NULL or points to a NULL-terminated string.
+    */
+
+    #define __valid                 _Valid_impl_
+    #define __notvalid              _Notvalid_impl_
+    #define __maybevalid            _Maybevalid_impl_
+
+    /*
+     __readableTo(extent) p
+    
+     Annotates a buffer pointer p.  If the buffer can be read, extent describes
+     how much of the buffer is readable. For a reader of the buffer, this is
+     an explicit permission to read up to that amount, rather than a restriction to
+     read only up to it.
+    */
+
+    #define __readableTo(extent)    _SA_annotes1(SAL_readableTo, extent)
+
+    /*
+    
+     __elem_readableTo(size)
+    
+     Annotates a buffer pointer p as being readable to size elements.
+    */
+
+    #define __elem_readableTo(size)   _SA_annotes1(SAL_readableTo, elementCount( size ))
+    
+    /*
+     __byte_readableTo(size)
+    
+     Annotates a buffer pointer p as being readable to size bytes.
+    */
+    #define __byte_readableTo(size)   _SA_annotes1(SAL_readableTo, byteCount(size))
+    
+    /*
+     __writableTo(extent) p
+    
+     Annotates a buffer pointer p. If the buffer can be modified, extent
+     describes how much of the buffer is writable (usually the allocation
+     size). For a writer of the buffer, this is an explicit permission to
+     write up to that amount, rather than a restriction to write only up to it.
+    */
+    #define __writableTo(size)   _SA_annotes1(SAL_writableTo, size)
+
+    /*
+     __elem_writableTo(size)
+    
+     Annotates a buffer pointer p as being writable to size elements.
+    */
+    #define __elem_writableTo(size)   _SA_annotes1(SAL_writableTo, elementCount( size ))
+    
+    /*
+     __byte_writableTo(size)
+    
+     Annotates a buffer pointer p as being writable to size bytes.
+    */
+    #define __byte_writableTo(size)   _SA_annotes1(SAL_writableTo, byteCount( size))
+
+    /*
+     __deref p
+    
+     Annotates a pointer p. The next annotation applies one dereference down
+     in the type. If readableTo(p, size) then the next annotation applies to
+     all elements *(p+i) for which i satisfies the size. If p is a pointer
+     to a struct, the next annotation applies to all fields of the struct.
+    */
+    #define __deref                 _Deref_impl_
+    
+    /*
+     __pre __next_annotation
+    
+     The next annotation applies in the precondition state
+    */
+    #define __pre                   _Pre_impl_
+    
+    /*
+     __post __next_annotation
+    
+     The next annotation applies in the postcondition state
+    */
+    #define __post                  _Post_impl_
+    
+    /*
+     __precond(<expr>)
+    
+     When <expr> is true, the next annotation applies in the precondition state
+     (currently not enabled)
+    */
+    #define __precond(expr)         __pre
+
+    /*
+     __postcond(<expr>)
+    
+     When <expr> is true, the next annotation applies in the postcondition state
+     (currently not enabled)
+    */
+    #define __postcond(expr)        __post
+
+    /*
+     __exceptthat
+    
+     Given a set of annotations Q containing __exceptthat maybeP, the effect of
+     the except clause is to erase any P or notP annotations (explicit or
+     implied) within Q at the same level of dereferencing that the except
+     clause appears, and to replace it with maybeP.
+    
+      Example 1: __valid __pre_except_maybenull on a pointer p means that the
+                 pointer may be null, and is otherwise valid, thus overriding
+                 the implicit notnull annotation implied by __valid on
+                 pointers.
+    
+      Example 2: __valid __deref __pre_except_maybenull on an int **p means
+                 that p is not null (implied by valid), but the elements
+                 pointed to by p could be null, and are otherwise valid. 
+    */
+    #define __exceptthat                __inner_exceptthat
+ 
+    /*
+     _refparam
+    
+     Added to all out parameter macros to indicate that they are all reference
+     parameters.
+    */
+    #define __refparam                  _Notref_ __deref __notreadonly
+
+    /*
+     __inner_*
+    
+     Helper macros that directly correspond to certain high-level annotations.
+    
+    */
+
+    /*
+     Macros to classify the entrypoints and indicate their category.
+    
+     Pre-defined control point categories include: RPC, LPC, DeviceDriver, UserToKernel, ISAPI, COM.
+    
+    */
+    #define __inner_control_entrypoint(category) _SA_annotes2(SAL_entrypoint, controlEntry, category)
+
+
+    /*
+     Pre-defined data entry point categories include: Registry, File, Network.
+    */
+    #define __inner_data_entrypoint(category)    _SA_annotes2(SAL_entrypoint, dataEntry, category)
+
+    #define __inner_override                    _SA_annotes0(__override)
+    #define __inner_callback                    _SA_annotes0(__callback)
+    #define __inner_blocksOn(resource)          _SA_annotes1(SAL_blocksOn, resource)
+    #define __inner_fallthrough_dec             __inline __nothrow void __FallThrough() {}
+    #define __inner_fallthrough                 __FallThrough();
+
+    #define __post_except_maybenull     __post __inner_exceptthat _Maybenull_impl_
+    #define __pre_except_maybenull      __pre  __inner_exceptthat _Maybenull_impl_
+
+    #define __post_deref_except_maybenull       __post __deref __inner_exceptthat _Maybenull_impl_
+    #define __pre_deref_except_maybenull    __pre  __deref __inner_exceptthat _Maybenull_impl_
+
+    #define __inexpressible_readableTo(size)  _Readable_elements_impl_(_Inexpressible_(size))
+    #define __inexpressible_writableTo(size)  _Writable_elements_impl_(_Inexpressible_(size))
+
+
+#else // ][
+#ifndef PAL_STDCPP_COMPAT
+    #define __null
+    #define __notnull
+#endif // !PAL_STDCPP_COMPAT
+    #define __maybenull
+    #define __readonly
+    #define __notreadonly
+    #define __maybereadonly
+    #define __valid
+    #define __notvalid
+    #define __maybevalid
+    #define __readableTo(extent)
+    #define __elem_readableTo(size)
+    #define __byte_readableTo(size)
+    #define __writableTo(size)
+    #define __elem_writableTo(size)
+    #define __byte_writableTo(size)
+    #define __deref
+    #define __pre
+    #define __post
+    #define __precond(expr)
+    #define __postcond(expr)
+    #define __exceptthat
+    #define __inner_override
+    #define __inner_callback
+    #define __inner_blocksOn(resource)
+    #define __inner_fallthrough_dec
+    #define __inner_fallthrough
+    #define __refparam
+    #define __inner_control_entrypoint(category)
+    #define __inner_data_entrypoint(category)
+
+    #define __post_except_maybenull
+    #define __pre_except_maybenull
+    #define __post_deref_except_maybenull
+    #define __pre_deref_except_maybenull
+
+    #define __inexpressible_readableTo(size)
+    #define __inexpressible_writableTo(size)
+
+#endif /* #if !defined(__midl) && defined(_PREFAST_) */ // ]
+
+/* 
+-------------------------------------------------------------------------------
+Buffer Annotation Definitions
+
+Any of these may be used to directly annotate functions, but only one should
+be used for each parameter. To determine which annotation to use for a given
+buffer, use the table in the buffer annotations section.
+-------------------------------------------------------------------------------
+*/
+
+// These macros conflict with c++ headers.
+#ifndef PAL_STDCPP_COMPAT
+#define __in                                                     _SAL1_Source_(__in, (), _In_)
+#define __out                                                    _SAL1_Source_(__out, (), _Out_)
+#endif // !PAL_STDCPP_COMPAT
+
+#define __ecount(size)                                           _SAL1_Source_(__ecount, (size), __notnull __elem_writableTo(size))
+#define __bcount(size)                                           _SAL1_Source_(__bcount, (size), __notnull __byte_writableTo(size))
+#define __in_ecount(size)                                        _SAL1_Source_(__in_ecount, (size), _In_reads_(size))
+#define __in_bcount(size)                                        _SAL1_Source_(__in_bcount, (size), _In_reads_bytes_(size))
+#define __in_z                                                   _SAL1_Source_(__in_z, (), _In_z_)
+#define __in_ecount_z(size)                                      _SAL1_Source_(__in_ecount_z, (size), _In_reads_z_(size))
+#define __in_bcount_z(size)                                      _SAL1_Source_(__in_bcount_z, (size), __in_bcount(size) __pre __nullterminated)
+#define __in_nz                                                  _SAL1_Source_(__in_nz, (), __in)
+#define __in_ecount_nz(size)                                     _SAL1_Source_(__in_ecount_nz, (size), __in_ecount(size))
+#define __in_bcount_nz(size)                                     _SAL1_Source_(__in_bcount_nz, (size), __in_bcount(size))
+#define __out_ecount(size)                                       _SAL1_Source_(__out_ecount, (size), _Out_writes_(size))
+#define __out_bcount(size)                                       _SAL1_Source_(__out_bcount, (size), _Out_writes_bytes_(size))
+#define __out_ecount_part(size,length)                           _SAL1_Source_(__out_ecount_part, (size,length), _Out_writes_to_(size,length))
+#define __out_bcount_part(size,length)                           _SAL1_Source_(__out_bcount_part, (size,length), _Out_writes_bytes_to_(size,length))
+#define __out_ecount_full(size)                                  _SAL1_Source_(__out_ecount_full, (size), _Out_writes_all_(size))
+#define __out_bcount_full(size)                                  _SAL1_Source_(__out_bcount_full, (size), _Out_writes_bytes_all_(size))
+#define __out_z                                                  _SAL1_Source_(__out_z, (), __post __valid __refparam __post __nullterminated)
+#define __out_z_opt                                              _SAL1_Source_(__out_z_opt, (), __post __valid __refparam __post __nullterminated __pre_except_maybenull)
+#define __out_ecount_z(size)                                     _SAL1_Source_(__out_ecount_z, (size), __ecount(size) __post __valid __refparam __post __nullterminated)
+#define __out_bcount_z(size)                                     _SAL1_Source_(__out_bcount_z, (size), __bcount(size) __post __valid __refparam __post __nullterminated)
+#define __out_ecount_part_z(size,length)                         _SAL1_Source_(__out_ecount_part_z, (size,length), __out_ecount_part(size,length) __post __nullterminated)
+#define __out_bcount_part_z(size,length)                         _SAL1_Source_(__out_bcount_part_z, (size,length), __out_bcount_part(size,length) __post __nullterminated)
+#define __out_ecount_full_z(size)                                _SAL1_Source_(__out_ecount_full_z, (size), __out_ecount_full(size) __post __nullterminated)
+#define __out_bcount_full_z(size)                                _SAL1_Source_(__out_bcount_full_z, (size), __out_bcount_full(size) __post __nullterminated)
+#define __out_nz                                                 _SAL1_Source_(__out_nz, (), __post __valid __refparam)
+#define __out_nz_opt                                             _SAL1_Source_(__out_nz_opt, (), __post __valid __refparam __post_except_maybenull_)
+#define __out_ecount_nz(size)                                    _SAL1_Source_(__out_ecount_nz, (size), __ecount(size) __post __valid __refparam)
+#define __out_bcount_nz(size)                                    _SAL1_Source_(__out_bcount_nz, (size), __bcount(size) __post __valid __refparam)
+#define __inout                                                  _SAL1_Source_(__inout, (), _Inout_)
+#define __inout_ecount(size)                                     _SAL1_Source_(__inout_ecount, (size), _Inout_updates_(size))
+#define __inout_bcount(size)                                     _SAL1_Source_(__inout_bcount, (size), _Inout_updates_bytes_(size))
+#define __inout_ecount_part(size,length)                         _SAL1_Source_(__inout_ecount_part, (size,length), _Inout_updates_to_(size,length))
+#define __inout_bcount_part(size,length)                         _SAL1_Source_(__inout_bcount_part, (size,length), _Inout_updates_bytes_to_(size,length))
+#define __inout_ecount_full(size)                                _SAL1_Source_(__inout_ecount_full, (size), _Inout_updates_all_(size))
+#define __inout_bcount_full(size)                                _SAL1_Source_(__inout_bcount_full, (size), _Inout_updates_bytes_all_(size))
+#define __inout_z                                                _SAL1_Source_(__inout_z, (), _Inout_z_)
+#define __inout_ecount_z(size)                                   _SAL1_Source_(__inout_ecount_z, (size), _Inout_updates_z_(size))
+#define __inout_bcount_z(size)                                   _SAL1_Source_(__inout_bcount_z, (size), __inout_bcount(size) __pre __nullterminated __post __nullterminated)
+#define __inout_nz                                               _SAL1_Source_(__inout_nz, (), __inout)
+#define __inout_ecount_nz(size)                                  _SAL1_Source_(__inout_ecount_nz, (size), __inout_ecount(size))
+#define __inout_bcount_nz(size)                                  _SAL1_Source_(__inout_bcount_nz, (size), __inout_bcount(size))
+#define __ecount_opt(size)                                       _SAL1_Source_(__ecount_opt, (size), __ecount(size)                              __pre_except_maybenull)
+#define __bcount_opt(size)                                       _SAL1_Source_(__bcount_opt, (size), __bcount(size)                              __pre_except_maybenull)
+#define __in_opt                                                 _SAL1_Source_(__in_opt, (), _In_opt_)
+#define __in_ecount_opt(size)                                    _SAL1_Source_(__in_ecount_opt, (size), _In_reads_opt_(size))
+#define __in_bcount_opt(size)                                    _SAL1_Source_(__in_bcount_opt, (size), _In_reads_bytes_opt_(size))
+#define __in_z_opt                                               _SAL1_Source_(__in_z_opt, (), _In_opt_z_)
+#define __in_ecount_z_opt(size)                                  _SAL1_Source_(__in_ecount_z_opt, (size), __in_ecount_opt(size) __pre __nullterminated)
+#define __in_bcount_z_opt(size)                                  _SAL1_Source_(__in_bcount_z_opt, (size), __in_bcount_opt(size) __pre __nullterminated)
+#define __in_nz_opt                                              _SAL1_Source_(__in_nz_opt, (), __in_opt)
+#define __in_ecount_nz_opt(size)                                 _SAL1_Source_(__in_ecount_nz_opt, (size), __in_ecount_opt(size))
+#define __in_bcount_nz_opt(size)                                 _SAL1_Source_(__in_bcount_nz_opt, (size), __in_bcount_opt(size))
+#define __out_opt                                                _SAL1_Source_(__out_opt, (), _Out_opt_)
+#define __out_ecount_opt(size)                                   _SAL1_Source_(__out_ecount_opt, (size), _Out_writes_opt_(size))
+#define __out_bcount_opt(size)                                   _SAL1_Source_(__out_bcount_opt, (size), _Out_writes_bytes_opt_(size))
+#define __out_ecount_part_opt(size,length)                       _SAL1_Source_(__out_ecount_part_opt, (size,length), __out_ecount_part(size,length)              __pre_except_maybenull)
+#define __out_bcount_part_opt(size,length)                       _SAL1_Source_(__out_bcount_part_opt, (size,length), __out_bcount_part(size,length)              __pre_except_maybenull)
+#define __out_ecount_full_opt(size)                              _SAL1_Source_(__out_ecount_full_opt, (size), __out_ecount_full(size)                     __pre_except_maybenull)
+#define __out_bcount_full_opt(size)                              _SAL1_Source_(__out_bcount_full_opt, (size), __out_bcount_full(size)                     __pre_except_maybenull)
+#define __out_ecount_z_opt(size)                                 _SAL1_Source_(__out_ecount_z_opt, (size), __out_ecount_opt(size) __post __nullterminated)
+#define __out_bcount_z_opt(size)                                 _SAL1_Source_(__out_bcount_z_opt, (size), __out_bcount_opt(size) __post __nullterminated)
+#define __out_ecount_part_z_opt(size,length)                     _SAL1_Source_(__out_ecount_part_z_opt, (size,length), __out_ecount_part_opt(size,length) __post __nullterminated)
+#define __out_bcount_part_z_opt(size,length)                     _SAL1_Source_(__out_bcount_part_z_opt, (size,length), __out_bcount_part_opt(size,length) __post __nullterminated)
+#define __out_ecount_full_z_opt(size)                            _SAL1_Source_(__out_ecount_full_z_opt, (size), __out_ecount_full_opt(size) __post __nullterminated)
+#define __out_bcount_full_z_opt(size)                            _SAL1_Source_(__out_bcount_full_z_opt, (size), __out_bcount_full_opt(size) __post __nullterminated)
+#define __out_ecount_nz_opt(size)                                _SAL1_Source_(__out_ecount_nz_opt, (size), __out_ecount_opt(size) __post __nullterminated)
+#define __out_bcount_nz_opt(size)                                _SAL1_Source_(__out_bcount_nz_opt, (size), __out_bcount_opt(size) __post __nullterminated)
+#define __inout_opt                                              _SAL1_Source_(__inout_opt, (), _Inout_opt_)
+#define __inout_ecount_opt(size)                                 _SAL1_Source_(__inout_ecount_opt, (size), __inout_ecount(size)                        __pre_except_maybenull)
+#define __inout_bcount_opt(size)                                 _SAL1_Source_(__inout_bcount_opt, (size), __inout_bcount(size)                        __pre_except_maybenull)
+#define __inout_ecount_part_opt(size,length)                     _SAL1_Source_(__inout_ecount_part_opt, (size,length), __inout_ecount_part(size,length)            __pre_except_maybenull)
+#define __inout_bcount_part_opt(size,length)                     _SAL1_Source_(__inout_bcount_part_opt, (size,length), __inout_bcount_part(size,length)            __pre_except_maybenull)
+#define __inout_ecount_full_opt(size)                            _SAL1_Source_(__inout_ecount_full_opt, (size), __inout_ecount_full(size)                   __pre_except_maybenull)
+#define __inout_bcount_full_opt(size)                            _SAL1_Source_(__inout_bcount_full_opt, (size), __inout_bcount_full(size)                   __pre_except_maybenull)
+#define __inout_z_opt                                            _SAL1_Source_(__inout_z_opt, (), __inout_opt __pre __nullterminated __post __nullterminated)
+#define __inout_ecount_z_opt(size)                               _SAL1_Source_(__inout_ecount_z_opt, (size), __inout_ecount_opt(size) __pre __nullterminated __post __nullterminated)
+#define __inout_ecount_z_opt(size)                               _SAL1_Source_(__inout_ecount_z_opt, (size), __inout_ecount_opt(size) __pre __nullterminated __post __nullterminated)
+#define __inout_bcount_z_opt(size)                               _SAL1_Source_(__inout_bcount_z_opt, (size), __inout_bcount_opt(size))
+#define __inout_nz_opt                                           _SAL1_Source_(__inout_nz_opt, (), __inout_opt)
+#define __inout_ecount_nz_opt(size)                              _SAL1_Source_(__inout_ecount_nz_opt, (size), __inout_ecount_opt(size))
+#define __inout_bcount_nz_opt(size)                              _SAL1_Source_(__inout_bcount_nz_opt, (size), __inout_bcount_opt(size))
+#define __deref_ecount(size)                                     _SAL1_Source_(__deref_ecount, (size), _Notref_ __ecount(1) __post _Notref_ __elem_readableTo(1) __post _Notref_ __deref _Notref_ __notnull __post __deref __elem_writableTo(size))
+#define __deref_bcount(size)                                     _SAL1_Source_(__deref_bcount, (size), _Notref_ __ecount(1) __post _Notref_ __elem_readableTo(1) __post _Notref_ __deref _Notref_ __notnull __post __deref __byte_writableTo(size))
+#define __deref_out                                              _SAL1_Source_(__deref_out, (), _Outptr_)
+#define __deref_out_ecount(size)                                 _SAL1_Source_(__deref_out_ecount, (size), _Outptr_result_buffer_(size))
+#define __deref_out_bcount(size)                                 _SAL1_Source_(__deref_out_bcount, (size), _Outptr_result_bytebuffer_(size))
+#define __deref_out_ecount_part(size,length)                     _SAL1_Source_(__deref_out_ecount_part, (size,length), _Outptr_result_buffer_to_(size,length))
+#define __deref_out_bcount_part(size,length)                     _SAL1_Source_(__deref_out_bcount_part, (size,length), _Outptr_result_bytebuffer_to_(size,length))
+#define __deref_out_ecount_full(size)                            _SAL1_Source_(__deref_out_ecount_full, (size), __deref_out_ecount_part(size,size))
+#define __deref_out_bcount_full(size)                            _SAL1_Source_(__deref_out_bcount_full, (size), __deref_out_bcount_part(size,size))
+#define __deref_out_z                                            _SAL1_Source_(__deref_out_z, (), _Outptr_result_z_)
+#define __deref_out_ecount_z(size)                               _SAL1_Source_(__deref_out_ecount_z, (size), __deref_out_ecount(size) __post __deref __nullterminated)
+#define __deref_out_bcount_z(size)                               _SAL1_Source_(__deref_out_bcount_z, (size), __deref_out_bcount(size) __post __deref __nullterminated)
+#define __deref_out_nz                                           _SAL1_Source_(__deref_out_nz, (), __deref_out)
+#define __deref_out_ecount_nz(size)                              _SAL1_Source_(__deref_out_ecount_nz, (size), __deref_out_ecount(size))
+#define __deref_out_bcount_nz(size)                              _SAL1_Source_(__deref_out_bcount_nz, (size), __deref_out_ecount(size))
+#define __deref_inout                                            _SAL1_Source_(__deref_inout, (), _Notref_ __notnull _Notref_ __elem_readableTo(1) __pre __deref __valid __post _Notref_ __deref __valid __refparam)
+#define __deref_inout_z                                          _SAL1_Source_(__deref_inout_z, (), __deref_inout __pre __deref __nullterminated __post _Notref_ __deref __nullterminated)
+#define __deref_inout_ecount(size)                               _SAL1_Source_(__deref_inout_ecount, (size), __deref_inout __pre __deref __elem_writableTo(size) __post _Notref_ __deref __elem_writableTo(size))
+#define __deref_inout_bcount(size)                               _SAL1_Source_(__deref_inout_bcount, (size), __deref_inout __pre __deref __byte_writableTo(size) __post _Notref_ __deref __byte_writableTo(size))
+#define __deref_inout_ecount_part(size,length)                   _SAL1_Source_(__deref_inout_ecount_part, (size,length), __deref_inout_ecount(size) __pre __deref __elem_readableTo(length) __post __deref __elem_readableTo(length))
+#define __deref_inout_bcount_part(size,length)                   _SAL1_Source_(__deref_inout_bcount_part, (size,length), __deref_inout_bcount(size) __pre __deref __byte_readableTo(length) __post __deref __byte_readableTo(length))
+#define __deref_inout_ecount_full(size)                          _SAL1_Source_(__deref_inout_ecount_full, (size), __deref_inout_ecount_part(size,size))
+#define __deref_inout_bcount_full(size)                          _SAL1_Source_(__deref_inout_bcount_full, (size), __deref_inout_bcount_part(size,size))
+#define __deref_inout_ecount_z(size)                             _SAL1_Source_(__deref_inout_ecount_z, (size), __deref_inout_ecount(size) __pre __deref __nullterminated __post __deref __nullterminated)
+#define __deref_inout_bcount_z(size)                             _SAL1_Source_(__deref_inout_bcount_z, (size), __deref_inout_bcount(size) __pre __deref __nullterminated __post __deref __nullterminated)
+#define __deref_inout_nz                                         _SAL1_Source_(__deref_inout_nz, (), __deref_inout)
+#define __deref_inout_ecount_nz(size)                            _SAL1_Source_(__deref_inout_ecount_nz, (size), __deref_inout_ecount(size))
+#define __deref_inout_bcount_nz(size)                            _SAL1_Source_(__deref_inout_bcount_nz, (size), __deref_inout_ecount(size))
+#define __deref_ecount_opt(size)                                 _SAL1_Source_(__deref_ecount_opt, (size), __deref_ecount(size)                        __post_deref_except_maybenull)
+#define __deref_bcount_opt(size)                                 _SAL1_Source_(__deref_bcount_opt, (size), __deref_bcount(size)                        __post_deref_except_maybenull)
+#define __deref_out_opt                                          _SAL1_Source_(__deref_out_opt, (), __deref_out                                 __post_deref_except_maybenull)
+#define __deref_out_ecount_opt(size)                             _SAL1_Source_(__deref_out_ecount_opt, (size), __deref_out_ecount(size)                    __post_deref_except_maybenull)
+#define __deref_out_bcount_opt(size)                             _SAL1_Source_(__deref_out_bcount_opt, (size), __deref_out_bcount(size)                    __post_deref_except_maybenull)
+#define __deref_out_ecount_part_opt(size,length)                 _SAL1_Source_(__deref_out_ecount_part_opt, (size,length), __deref_out_ecount_part(size,length)        __post_deref_except_maybenull)
+#define __deref_out_bcount_part_opt(size,length)                 _SAL1_Source_(__deref_out_bcount_part_opt, (size,length), __deref_out_bcount_part(size,length)        __post_deref_except_maybenull)
+#define __deref_out_ecount_full_opt(size)                        _SAL1_Source_(__deref_out_ecount_full_opt, (size), __deref_out_ecount_full(size)               __post_deref_except_maybenull)
+#define __deref_out_bcount_full_opt(size)                        _SAL1_Source_(__deref_out_bcount_full_opt, (size), __deref_out_bcount_full(size)               __post_deref_except_maybenull)
+#define __deref_out_z_opt                                        _SAL1_Source_(__deref_out_z_opt, (), _Outptr_result_maybenull_z_)
+#define __deref_out_ecount_z_opt(size)                           _SAL1_Source_(__deref_out_ecount_z_opt, (size), __deref_out_ecount_opt(size) __post __deref __nullterminated)
+#define __deref_out_bcount_z_opt(size)                           _SAL1_Source_(__deref_out_bcount_z_opt, (size), __deref_out_bcount_opt(size) __post __deref __nullterminated)
+#define __deref_out_nz_opt                                       _SAL1_Source_(__deref_out_nz_opt, (), __deref_out_opt)
+#define __deref_out_ecount_nz_opt(size)                          _SAL1_Source_(__deref_out_ecount_nz_opt, (size), __deref_out_ecount_opt(size))
+#define __deref_out_bcount_nz_opt(size)                          _SAL1_Source_(__deref_out_bcount_nz_opt, (size), __deref_out_bcount_opt(size))
+#define __deref_inout_opt                                        _SAL1_Source_(__deref_inout_opt, (), __deref_inout                               __pre_deref_except_maybenull __post_deref_except_maybenull)
+#define __deref_inout_ecount_opt(size)                           _SAL1_Source_(__deref_inout_ecount_opt, (size), __deref_inout_ecount(size)                  __pre_deref_except_maybenull __post_deref_except_maybenull)
+#define __deref_inout_bcount_opt(size)                           _SAL1_Source_(__deref_inout_bcount_opt, (size), __deref_inout_bcount(size)                  __pre_deref_except_maybenull __post_deref_except_maybenull)
+#define __deref_inout_ecount_part_opt(size,length)               _SAL1_Source_(__deref_inout_ecount_part_opt, (size,length), __deref_inout_ecount_part(size,length)      __pre_deref_except_maybenull __post_deref_except_maybenull)
+#define __deref_inout_bcount_part_opt(size,length)               _SAL1_Source_(__deref_inout_bcount_part_opt, (size,length), __deref_inout_bcount_part(size,length)      __pre_deref_except_maybenull __post_deref_except_maybenull)
+#define __deref_inout_ecount_full_opt(size)                      _SAL1_Source_(__deref_inout_ecount_full_opt, (size), __deref_inout_ecount_full(size)             __pre_deref_except_maybenull __post_deref_except_maybenull)
+#define __deref_inout_bcount_full_opt(size)                      _SAL1_Source_(__deref_inout_bcount_full_opt, (size), __deref_inout_bcount_full(size)             __pre_deref_except_maybenull __post_deref_except_maybenull)
+#define __deref_inout_z_opt                                      _SAL1_Source_(__deref_inout_z_opt, (), __deref_inout_opt __pre __deref __nullterminated __post __deref __nullterminated)
+#define __deref_inout_ecount_z_opt(size)                         _SAL1_Source_(__deref_inout_ecount_z_opt, (size), __deref_inout_ecount_opt(size) __pre __deref __nullterminated __post __deref __nullterminated)
+#define __deref_inout_bcount_z_opt(size)                         _SAL1_Source_(__deref_inout_bcount_z_opt, (size), __deref_inout_bcount_opt(size) __pre __deref __nullterminated __post __deref __nullterminated)
+#define __deref_inout_nz_opt                                     _SAL1_Source_(__deref_inout_nz_opt, (), __deref_inout_opt)
+#define __deref_inout_ecount_nz_opt(size)                        _SAL1_Source_(__deref_inout_ecount_nz_opt, (size), __deref_inout_ecount_opt(size))
+#define __deref_inout_bcount_nz_opt(size)                        _SAL1_Source_(__deref_inout_bcount_nz_opt, (size), __deref_inout_bcount_opt(size))
+#define __deref_opt_ecount(size)                                 _SAL1_Source_(__deref_opt_ecount, (size), __deref_ecount(size)                        __pre_except_maybenull)
+#define __deref_opt_bcount(size)                                 _SAL1_Source_(__deref_opt_bcount, (size), __deref_bcount(size)                        __pre_except_maybenull)
+#define __deref_opt_out                                          _SAL1_Source_(__deref_opt_out, (), _Outptr_opt_)
+#define __deref_opt_out_z                                        _SAL1_Source_(__deref_opt_out_z, (), _Outptr_opt_result_z_)
+#define __deref_opt_out_ecount(size)                             _SAL1_Source_(__deref_opt_out_ecount, (size), __deref_out_ecount(size)                    __pre_except_maybenull)
+#define __deref_opt_out_bcount(size)                             _SAL1_Source_(__deref_opt_out_bcount, (size), __deref_out_bcount(size)                    __pre_except_maybenull)
+#define __deref_opt_out_ecount_part(size,length)                 _SAL1_Source_(__deref_opt_out_ecount_part, (size,length), __deref_out_ecount_part(size,length)        __pre_except_maybenull)
+#define __deref_opt_out_bcount_part(size,length)                 _SAL1_Source_(__deref_opt_out_bcount_part, (size,length), __deref_out_bcount_part(size,length)        __pre_except_maybenull)
+#define __deref_opt_out_ecount_full(size)                        _SAL1_Source_(__deref_opt_out_ecount_full, (size), __deref_out_ecount_full(size)               __pre_except_maybenull)
+#define __deref_opt_out_bcount_full(size)                        _SAL1_Source_(__deref_opt_out_bcount_full, (size), __deref_out_bcount_full(size)               __pre_except_maybenull)
+#define __deref_opt_inout                                        _SAL1_Source_(__deref_opt_inout, (), _Inout_opt_)
+#define __deref_opt_inout_ecount(size)                           _SAL1_Source_(__deref_opt_inout_ecount, (size), __deref_inout_ecount(size)                  __pre_except_maybenull)
+#define __deref_opt_inout_bcount(size)                           _SAL1_Source_(__deref_opt_inout_bcount, (size), __deref_inout_bcount(size)                  __pre_except_maybenull)
+#define __deref_opt_inout_ecount_part(size,length)               _SAL1_Source_(__deref_opt_inout_ecount_part, (size,length), __deref_inout_ecount_part(size,length)      __pre_except_maybenull)
+#define __deref_opt_inout_bcount_part(size,length)               _SAL1_Source_(__deref_opt_inout_bcount_part, (size,length), __deref_inout_bcount_part(size,length)      __pre_except_maybenull)
+#define __deref_opt_inout_ecount_full(size)                      _SAL1_Source_(__deref_opt_inout_ecount_full, (size), __deref_inout_ecount_full(size)             __pre_except_maybenull)
+#define __deref_opt_inout_bcount_full(size)                      _SAL1_Source_(__deref_opt_inout_bcount_full, (size), __deref_inout_bcount_full(size)             __pre_except_maybenull)
+#define __deref_opt_inout_z                                      _SAL1_Source_(__deref_opt_inout_z, (), __deref_opt_inout __pre __deref __nullterminated __post __deref __nullterminated)
+#define __deref_opt_inout_ecount_z(size)                         _SAL1_Source_(__deref_opt_inout_ecount_z, (size), __deref_opt_inout_ecount(size) __pre __deref __nullterminated __post __deref __nullterminated)
+#define __deref_opt_inout_bcount_z(size)                         _SAL1_Source_(__deref_opt_inout_bcount_z, (size), __deref_opt_inout_bcount(size) __pre __deref __nullterminated __post __deref __nullterminated)
+#define __deref_opt_inout_nz                                     _SAL1_Source_(__deref_opt_inout_nz, (), __deref_opt_inout)
+#define __deref_opt_inout_ecount_nz(size)                        _SAL1_Source_(__deref_opt_inout_ecount_nz, (size), __deref_opt_inout_ecount(size))
+#define __deref_opt_inout_bcount_nz(size)                        _SAL1_Source_(__deref_opt_inout_bcount_nz, (size), __deref_opt_inout_bcount(size))
+#define __deref_opt_ecount_opt(size)                             _SAL1_Source_(__deref_opt_ecount_opt, (size), __deref_ecount_opt(size)                    __pre_except_maybenull)
+#define __deref_opt_bcount_opt(size)                             _SAL1_Source_(__deref_opt_bcount_opt, (size), __deref_bcount_opt(size)                    __pre_except_maybenull)
+#define __deref_opt_out_opt                                      _SAL1_Source_(__deref_opt_out_opt, (), _Outptr_opt_result_maybenull_)
+#define __deref_opt_out_ecount_opt(size)                         _SAL1_Source_(__deref_opt_out_ecount_opt, (size), __deref_out_ecount_opt(size)                __pre_except_maybenull)
+#define __deref_opt_out_bcount_opt(size)                         _SAL1_Source_(__deref_opt_out_bcount_opt, (size), __deref_out_bcount_opt(size)                __pre_except_maybenull)
+#define __deref_opt_out_ecount_part_opt(size,length)             _SAL1_Source_(__deref_opt_out_ecount_part_opt, (size,length), __deref_out_ecount_part_opt(size,length)    __pre_except_maybenull)
+#define __deref_opt_out_bcount_part_opt(size,length)             _SAL1_Source_(__deref_opt_out_bcount_part_opt, (size,length), __deref_out_bcount_part_opt(size,length)    __pre_except_maybenull)
+#define __deref_opt_out_ecount_full_opt(size)                    _SAL1_Source_(__deref_opt_out_ecount_full_opt, (size), __deref_out_ecount_full_opt(size)           __pre_except_maybenull)
+#define __deref_opt_out_bcount_full_opt(size)                    _SAL1_Source_(__deref_opt_out_bcount_full_opt, (size), __deref_out_bcount_full_opt(size)           __pre_except_maybenull)
+#define __deref_opt_out_z_opt                                    _SAL1_Source_(__deref_opt_out_z_opt, (), __post __deref __valid __refparam __pre_except_maybenull __pre_deref_except_maybenull __post_deref_except_maybenull __post __deref __nullterminated)
+#define __deref_opt_out_ecount_z_opt(size)                       _SAL1_Source_(__deref_opt_out_ecount_z_opt, (size), __deref_opt_out_ecount_opt(size) __post __deref __nullterminated)
+#define __deref_opt_out_bcount_z_opt(size)                       _SAL1_Source_(__deref_opt_out_bcount_z_opt, (size), __deref_opt_out_bcount_opt(size) __post __deref __nullterminated)
+#define __deref_opt_out_nz_opt                                   _SAL1_Source_(__deref_opt_out_nz_opt, (), __deref_opt_out_opt)
+#define __deref_opt_out_ecount_nz_opt(size)                      _SAL1_Source_(__deref_opt_out_ecount_nz_opt, (size), __deref_opt_out_ecount_opt(size))
+#define __deref_opt_out_bcount_nz_opt(size)                      _SAL1_Source_(__deref_opt_out_bcount_nz_opt, (size), __deref_opt_out_bcount_opt(size))
+#define __deref_opt_inout_opt                                    _SAL1_Source_(__deref_opt_inout_opt, (), __deref_inout_opt                           __pre_except_maybenull)
+#define __deref_opt_inout_ecount_opt(size)                       _SAL1_Source_(__deref_opt_inout_ecount_opt, (size), __deref_inout_ecount_opt(size)              __pre_except_maybenull)
+#define __deref_opt_inout_bcount_opt(size)                       _SAL1_Source_(__deref_opt_inout_bcount_opt, (size), __deref_inout_bcount_opt(size)              __pre_except_maybenull)
+#define __deref_opt_inout_ecount_part_opt(size,length)           _SAL1_Source_(__deref_opt_inout_ecount_part_opt, (size,length), __deref_inout_ecount_part_opt(size,length)  __pre_except_maybenull)
+#define __deref_opt_inout_bcount_part_opt(size,length)           _SAL1_Source_(__deref_opt_inout_bcount_part_opt, (size,length), __deref_inout_bcount_part_opt(size,length)  __pre_except_maybenull)
+#define __deref_opt_inout_ecount_full_opt(size)                  _SAL1_Source_(__deref_opt_inout_ecount_full_opt, (size), __deref_inout_ecount_full_opt(size)         __pre_except_maybenull)
+#define __deref_opt_inout_bcount_full_opt(size)                  _SAL1_Source_(__deref_opt_inout_bcount_full_opt, (size), __deref_inout_bcount_full_opt(size)         __pre_except_maybenull)
+#define __deref_opt_inout_z_opt                                  _SAL1_Source_(__deref_opt_inout_z_opt, (), __deref_opt_inout_opt  __pre __deref __nullterminated __post __deref __nullterminated)
+#define __deref_opt_inout_ecount_z_opt(size)                     _SAL1_Source_(__deref_opt_inout_ecount_z_opt, (size), __deref_opt_inout_ecount_opt(size)  __pre __deref __nullterminated __post __deref __nullterminated)
+#define __deref_opt_inout_bcount_z_opt(size)                     _SAL1_Source_(__deref_opt_inout_bcount_z_opt, (size), __deref_opt_inout_bcount_opt(size)  __pre __deref __nullterminated __post __deref __nullterminated)
+#define __deref_opt_inout_nz_opt                                 _SAL1_Source_(__deref_opt_inout_nz_opt, (), __deref_opt_inout_opt)
+#define __deref_opt_inout_ecount_nz_opt(size)                    _SAL1_Source_(__deref_opt_inout_ecount_nz_opt, (size), __deref_opt_inout_ecount_opt(size))
+#define __deref_opt_inout_bcount_nz_opt(size)                    _SAL1_Source_(__deref_opt_inout_bcount_nz_opt, (size), __deref_opt_inout_bcount_opt(size))
+
+/*
+-------------------------------------------------------------------------------
+Advanced Annotation Definitions
+
+Any of these may be used to directly annotate functions, and may be used in
+combination with each other or with regular buffer macros. For an explanation
+of each annotation, see the advanced annotations section.
+-------------------------------------------------------------------------------
+*/
+
+#define __success(expr)                      _Success_(expr)
+#define __nullterminated                     _Null_terminated_
+#define __nullnullterminated
+#define __clr_reserved                       _SAL1_Source_(__reserved, (), _Reserved_)
+#define __checkReturn                        _SAL1_Source_(__checkReturn, (), _Check_return_)
+#define __typefix(ctype)                     _SAL1_Source_(__typefix, (ctype), __inner_typefix(ctype))
+#define __override                           __inner_override
+#define __callback                           __inner_callback
+#define __format_string                      _Printf_format_string_
+#define __blocksOn(resource)                 __inner_blocksOn(resource)
+#define __control_entrypoint(category)       __inner_control_entrypoint(category)
+#define __data_entrypoint(category)          __inner_data_entrypoint(category)
+#define __useHeader                          _Use_decl_anno_impl_
+#define __on_failure(annotes)                _On_failure_impl_(annotes _SAL_nop_impl_)
+  
+#ifndef __fallthrough // [
+    __inner_fallthrough_dec
+    #define __fallthrough __inner_fallthrough
+#endif // ]
+
+#ifndef __analysis_assume // [
+#ifdef _PREFAST_ // [
+#define __analysis_assume(expr) __assume(expr)
+#else // ][
+#define __analysis_assume(expr) 
+#endif // ]
+#endif // ]
+
+#ifndef _Analysis_assume_ // [
+#ifdef _PREFAST_ // [
+#define _Analysis_assume_(expr) __assume(expr)
+#else // ][
+#define _Analysis_assume_(expr) 
+#endif // ]
+#endif // ]
+
+#define _Analysis_noreturn_    _SAL2_Source_(_Analysis_noreturn_, (), _SA_annotes0(SAL_terminates))
+
+#ifdef _PREFAST_ // [
+__inline __nothrow 
+void __AnalysisAssumeNullterminated(_Post_ __nullterminated void *p);
+
+#define _Analysis_assume_nullterminated_(x) __AnalysisAssumeNullterminated(x)
+#else // ][
+#define _Analysis_assume_nullterminated_(x)
+#endif // ]
+
+//
+// Set the analysis mode (global flags to analysis).
+// They take effect at the point of declaration; use at global scope
+// as a declaration.
+//
+
+// Synthesize a unique symbol.
+#define ___MKID(x, y) x ## y
+#define __MKID(x, y) ___MKID(x, y)
+#define __GENSYM(x) __MKID(x, __COUNTER__)
+
+__ANNOTATION(SAL_analysisMode(__AuToQuOtE __In_impl_ char *mode);)
+
+#define _Analysis_mode_impl_(mode) _SA_annotes1(SAL_analysisMode, #mode)
+
+#define _Analysis_mode_(mode)                                                 \
+    typedef _Analysis_mode_impl_(mode) int                                    \
+        __GENSYM(__prefast_analysis_mode_flag);
+
+// The following are predefined:
+//  _Analysis_operator_new_throw_   (operator new throws)
+//  _Analysis_operator_new_null_        (operator new returns null)
+//  _Analysis_operator_new_never_fails_ (operator new never fails)
+//
+
+// Function class annotations.
+__ANNOTATION(SAL_functionClassNew(__In_impl_ char*);)
+__PRIMOP(int, _In_function_class_(__In_impl_ char*);)
+#define _In_function_class_(x)  _In_function_class_(#x)
+
+#define _Function_class_(x)  _SA_annotes1(SAL_functionClassNew, #x)
+
+/*
+ * interlocked operand used in interlocked instructions
+ */
+//#define _Interlocked_operand_ _Pre_ _SA_annotes0(SAL_interlocked)
+
+#define _Enum_is_bitflag_    _SA_annotes0(SAL_enumIsBitflag)
+#define _Strict_type_match_  _SA_annotes0(SAL_strictType2)
+
+#define _Maybe_raises_SEH_exception_   _Pre_ _SA_annotes1(SAL_inTry,__yes)
+#define _Raises_SEH_exception_         _Group_(_Maybe_raises_SEH_exception_ _Analysis_noreturn_)
+
+#ifdef  __cplusplus // [
+}
+#endif // ]
+
+// Rotor doesn't need concurrency sal.
+// #include <ConcurrencySal.h>
+
+#define _Interlocked_operand_
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/specstrings.h b/src/coreclr/src/nativeaot/Runtime/unix/specstrings.h
new file mode 100644
index 0000000000000..cbab8237961f1
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/specstrings.h
@@ -0,0 +1,535 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+
+//
+#ifndef SPECSTRINGS_H
+#define SPECSTRINGS_H
+
+/***
+*specstrings.h - support for markers for documenting the semantics of APIs
+*
+
+*
+*       [Public]
+****/
+
+/*************************************************************************
+* See specstrings_strict.h for documentation of all user visible macros.
+*************************************************************************/
+
+#if _MSC_VER
+#pragma once
+#endif
+
+#if !defined(_SAL_VERSION_SAL2)
+
+ #if defined(__BUILDMACHINE__) || defined(_USE_SAL2_ONLY)
+  #define _SAL_VERSION_SAL2(_A) SAL_2_Clean_Violation_using ## _A
+ #else
+  #define _SAL_VERSION_SAL2(_A)
+ #endif
+
+ #ifdef _USE_SAL2_ONLY
+  #define _SAL2_STRICT
+  #define _SAL_VERSION_CHECK(_A) _SAL_VERSION_SAL2(_A)
+ #else
+  #define _SAL_VERSION_CHECK(_A)
+ #endif
+
+ #ifndef SAL_VERSION_CHECK
+  #define SAL_VERSION_CHECK(_A) _SAL_VERSION_CHECK(_A)
+  #define SAL_VERSION_SAL2(_A) _SAL_VERSION_SAL2(_A)
+ #endif
+
+#endif
+
+#include <sal.h>
+
+#ifndef __SAL_H_FULL_VER
+#define __SAL_H_FULL_VER 140050727
+#endif
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+/* version specific fixes to bring sal.h upto date */
+#if __SAL_H_FULL_VER <= 140050727
+
+#if !defined(__midl) && defined(_PREFAST_) && _MSC_VER >= 1000 // [
+
+/* Missing from RTM sal.h */
+#define __inner_bound                     _SA_annotes0(SAL_bound)
+#define __inner_range(lb,ub)              _SA_annotes2(SAL_range,lb,ub)
+#define __inner_assume_bound_dec          __inline __nothrow void __AssumeBoundInt(_Post_ __inner_bound int i) {i;}
+#define __inner_assume_bound(i)           __AssumeBoundInt(i);
+#define __inner_allocator                 _SA_annotes0(SAL_allocator)
+
+#define __static_context(ctx, annotes) \
+    _SA_annotes1(SAL_context,ctx) _Group_(__nop_impl(annotes))
+
+#define __failure(x) __static_context(SAL_return_convention, \
+    _SA_annotes1(SAL_failure,x))
+
+__ANNOTATION(SAL_valueUndefined());
+#define __valueUndefined _SA_annotes0(SAL_valueUndefined)
+
+enum __SAL_failureKind{__failureUnspecified = 0, __failureUndefined = 1};
+
+__ANNOTATION(SAL_failureDefault(enum __SAL_failureKind));
+#define __failureDefault(kind) __static_context(SAL_return_convention,  \
+        _SA_annotes1(SAL_failureDefault,kind))
+
+#else // ][
+
+#define __inner_bound
+#define __inner_range(lb,ub)
+#define __inner_assume_bound_dec
+#define __inner_assume_bound(i)
+#define __inner_allocator
+
+#define __static_context(ctx, annotes)
+#define __failure(x)
+#define __valueUndefined
+#define __failureDefault(x)
+
+#endif // ]
+
+#define __xcount(size)                                          __notnull __inexpressible_writableTo(size)
+#define __in_xcount(size)                                       __in _Pre_ __inexpressible_readableTo(size)
+#define __out_xcount(size)                                      __xcount(size) _Post_ __valid __refparam
+#define __out_xcount_part(size,length)                          __out_xcount(size) _Post_ __inexpressible_readableTo(length)
+#define __out_xcount_full(size)                                 __out_xcount_part(size,size)
+#define __inout_xcount(size)                                    __out_xcount(size) _Pre_ __valid
+#define __inout_xcount_part(size,length)                        __out_xcount_part(size,length) _Pre_ __valid _Pre_ __inexpressible_readableTo(length)
+#define __inout_xcount_full(size)                               __inout_xcount_part(size,size)
+#define __xcount_opt(size)                                      __xcount(size)                              __exceptthat __maybenull
+#define __in_xcount_opt(size)                                   __in_xcount(size)                           __exceptthat __maybenull
+#define __out_xcount_opt(size)                                  __out_xcount(size)                          __exceptthat __maybenull
+#define __out_xcount_part_opt(size,length)                      __out_xcount_part(size,length)              __exceptthat __maybenull
+#define __out_xcount_full_opt(size)                             __out_xcount_full(size)                     __exceptthat __maybenull
+#define __inout_xcount_opt(size)                                __inout_xcount(size)                        __exceptthat __maybenull
+#define __inout_xcount_part_opt(size,length)                    __inout_xcount_part(size,length)            __exceptthat __maybenull
+#define __inout_xcount_full_opt(size)                           __inout_xcount_full(size)                   __exceptthat __maybenull
+#define __deref_xcount(size)                                    __ecount(1) _Post_ __elem_readableTo(1) _Post_ __deref __notnull _Post_ __deref __inexpressible_writableTo(size)
+#define __deref_in                                              __in _Pre_ __deref __deref __readonly
+#define __deref_in_ecount(size)                                 __deref_in _Pre_ __deref __elem_readableTo(size)
+#define __deref_in_bcount(size)                                 __deref_in _Pre_ __deref __byte_readableTo(size)
+#define __deref_in_xcount(size)                                 __deref_in _Pre_ __deref __inexpressible_readableTo(size)
+#define __deref_out_xcount(size)                                __deref_xcount(size) _Post_ __deref __valid __refparam
+#define __deref_out_xcount_part(size,length)                    __deref_out_xcount(size) _Post_ __deref __inexpressible_readableTo(length)
+#define __deref_out_xcount_full(size)                           __deref_out_xcount_part(size,size)
+#define __deref_out_xcount(size)                                __deref_xcount(size) _Post_ __deref __valid __refparam
+#define __inout_xcount_opt(size)                                __inout_xcount(size)                        __exceptthat __maybenull
+#define __inout_xcount_part_opt(size,length)                    __inout_xcount_part(size,length)            __exceptthat __maybenull
+#define __inout_xcount_full_opt(size)                           __inout_xcount_full(size)                   __exceptthat __maybenull
+#define __deref_xcount(size)                                    __ecount(1) _Post_ __elem_readableTo(1) _Post_ __deref __notnull _Post_ __deref __inexpressible_writableTo(size)
+#define __deref_in                                              __in _Pre_ __deref __deref __readonly
+#define __deref_in_ecount(size)                                 __deref_in _Pre_ __deref __elem_readableTo(size)
+#define __deref_in_bcount(size)                                 __deref_in _Pre_ __deref __byte_readableTo(size)
+#define __deref_in_xcount(size)                                 __deref_in _Pre_ __deref __inexpressible_readableTo(size)
+#define __deref_out_xcount(size)                                __deref_xcount(size) _Post_ __deref __valid __refparam
+#define __deref_out_xcount_part(size,length)                    __deref_out_xcount(size) _Post_ __deref __inexpressible_readableTo(length)
+#define __deref_out_xcount_full(size)                           __deref_out_xcount_part(size,size)
+#define __deref_out_xcount(size)                                __deref_xcount(size) _Post_ __deref __valid __refparam
+#define __deref_inout_xcount(size)                              __deref_inout _Pre_ __deref __inexpressible_writableTo(size) _Post_ __deref __inexpressible_writableTo(size)
+#define __deref_inout_xcount_part(size,length)                  __deref_inout_xcount(size) _Pre_ __deref __inexpressible_readableTo(length) _Post_ __deref __inexpressible_readableTo(length)
+#define __deref_inout_xcount_full(size)                         __deref_inout_xcount_part(size,size)
+#define __deref_xcount_opt(size)                                __deref_xcount(size)                        _Post_ __deref __exceptthat __maybenull
+#define __deref_in_opt                                          __deref_in                                  _Pre_ __deref __exceptthat __maybenull
+#define __deref_in_opt_out                                      __deref_inout                               _Pre_ __deref __exceptthat __maybenull  _Post_ __deref __notnull
+#define __deref_in_ecount_opt(size)                             __deref_in_ecount(size)                     _Pre_ __deref __exceptthat __maybenull
+#define __deref_in_bcount_opt(size)                             __deref_in_bcount(size)                     _Pre_ __deref __exceptthat __maybenull
+#define __deref_in_xcount_opt(size)                             __deref_in_xcount(size)                     _Pre_ __deref __exceptthat __maybenull
+#define __deref_out_xcount_opt(size)                            __deref_out_xcount(size)                    _Post_ __deref __exceptthat __maybenull
+#define __deref_out_xcount_part_opt(size,length)                __deref_out_xcount_part(size,length)        _Post_ __deref __exceptthat __maybenull
+#define __deref_out_xcount_full_opt(size)                       __deref_out_xcount_full(size)               _Post_ __deref __exceptthat __maybenull
+#define __deref_inout_xcount_opt(size)                          __deref_inout_xcount(size)                  _Pre_ __deref __exceptthat __maybenull _Post_ __deref __exceptthat __maybenull
+#define __deref_inout_xcount_part_opt(size,length)              __deref_inout_xcount_part(size,length)      _Pre_ __deref __exceptthat __maybenull _Post_ __deref __exceptthat __maybenull
+#define __deref_inout_xcount_full_opt(size)                     __deref_inout_xcount_full(size)             _Pre_ __deref __exceptthat __maybenull _Post_ __deref __exceptthat __maybenull
+#define __deref_opt_xcount(size)                                __deref_xcount(size)                        __exceptthat __maybenull
+#define __deref_opt_in                                          __deref_in                                  __exceptthat __maybenull
+#define __deref_opt_in_ecount(size)                             __deref_in_ecount(size)                     __exceptthat __maybenull
+#define __deref_opt_in_bcount(size)                             __deref_in_bcount(size)                     __exceptthat __maybenull
+#define __deref_opt_in_xcount(size)                             __deref_in_xcount(size)                     __exceptthat __maybenull
+#define __deref_opt_out_xcount(size)                            __deref_out_xcount(size)                    __exceptthat __maybenull
+#define __deref_opt_out_xcount_part(size,length)                __deref_out_xcount_part(size,length)        __exceptthat __maybenull
+#define __deref_opt_out_xcount_full(size)                       __deref_out_xcount_full(size)               __exceptthat __maybenull
+#define __deref_opt_inout_xcount(size)                          __deref_inout_xcount(size)                  __exceptthat __maybenull
+#define __deref_opt_inout_xcount_part(size,length)              __deref_inout_xcount_part(size,length)      __exceptthat __maybenull
+#define __deref_opt_inout_xcount_full(size)                     __deref_inout_xcount_full(size)             __exceptthat __maybenull
+#define __deref_opt_xcount_opt(size)                            __deref_xcount_opt(size)                    __exceptthat __maybenull
+#define __deref_opt_in_opt                                      __deref_in_opt                              __exceptthat __maybenull
+#define __deref_opt_in_ecount_opt(size)                         __deref_in_ecount_opt(size)                 __exceptthat __maybenull
+#define __deref_opt_in_bcount_opt(size)                         __deref_in_bcount_opt(size)                 __exceptthat __maybenull
+#define __deref_opt_in_xcount_opt(size)                         __deref_in_xcount_opt(size)                 __exceptthat __maybenull
+#define __deref_opt_out_xcount_opt(size)                        __deref_out_xcount_opt(size)                __exceptthat __maybenull
+#define __deref_opt_out_xcount_part_opt(size,length)            __deref_out_xcount_part_opt(size,length)    __exceptthat __maybenull
+#define __deref_opt_out_xcount_full_opt(size)                   __deref_out_xcount_full_opt(size)           __exceptthat __maybenull
+#define __deref_opt_inout_xcount_opt(size)                      __deref_inout_xcount_opt(size)              __exceptthat __maybenull
+#define __deref_opt_inout_xcount_part_opt(size,length)          __deref_inout_xcount_part_opt(size,length)  __exceptthat __maybenull
+#define __deref_opt_inout_xcount_full_opt(size)                 __deref_inout_xcount_full_opt(size)         __exceptthat __maybenull
+
+#define __deref_in_ecount_iterator(size, incr)                  __inout _Pre_ __deref __elem_readableTo(size) __deref_out_range(==, _Old_(*_Curr_) + incr)
+#define __deref_out_ecount_iterator(size, incr)                 __inout _Pre_ __deref __elem_writableTo(size) __deref_out_range(==, _Old_(*_Curr_) + incr)
+#define __deref_inout_ecount_iterator(size, incr)               __inout _Pre_ __deref __elem_readableTo(size) _Pre_ __deref __elem_writableTo(size) __deref_out_range(==, _Old_(*_Curr_) + incr)
+
+#define __post_bcount(size)                                     _Post_ __byte_writableTo(size)
+#define __post_ecount(size)                                     _Post_ __elem_writableTo(size)
+
+#define __deref_realloc_bcount(insize, outsize)                 __inout _Pre_ __deref __byte_readableTo(insize) _Post_ __deref __byte_writableTo(outsize)
+
+/* __in_ecount_or_z(c) specifies semantics like strncmp, where a string
+ * parameter is either null terminated, or valid up to c elements.
+ */
+#define __in_ecount_or_z(c)            _When_(_String_length_(_Curr_) < (c), __in_z) \
+                                       _When_(_String_length_(_Curr_) >= (c), __in_ecount(c))
+
+
+/* Provide default definition to be overridden when needed */
+#define __post_nullnullterminated
+
+/* Must protect redfinitions of macros to workaround rc.exe issues. */
+#ifndef RC_INVOKED
+
+#undef __nullnullterminated
+#define __nullnullterminated __inexpressible_readableTo("string terminated by two nulls") __nullterminated
+
+#undef __post_nullnullterminated
+#define __post_nullnullterminated _Post_ __inexpressible_readableTo("string terminated by two nulls") _Post_ __nullterminated
+
+#endif
+#endif  //__SAL_H_FULL_VER <= 140050727
+
+/************************************************************************
+ New extensions to sal.h follow here.
+*************************************************************************/
+
+#if (_MSC_VER >= 1000) && !defined(__midl) && defined(_PREFAST_)
+
+#define __file_parser(typ)                  _SA_annotes2(SAL_file_parser,"function",typ)
+#define __file_parser_class(typ)            _SA_annotes2(SAL_file_parser,"class",typ)
+#define __file_parser_library(typ)          extern int _SA_annotes2(SAL_file_parser, "library", typ) __iSALFileParserLibrary##typ;
+#define __source_code_content(typ)          extern int _SA_annotes1(SAL_source_code_content, typ) __iSAL_Source_Code_Content##typ;
+#define __class_code_content(typ)           _SA_annotes1(SAL_class_code_content, typ)
+#define __analysis_assert(e)                __assume(e)
+#define __analysis_hint(hint)               _SA_annotes1(SAL_analysisHint, hint)
+// For "breakpoint": doesn't return as far as analysis is concerned.
+#define __analysis_noreturn                 __declspec(noreturn)
+/* Internal defintions */
+#define __inner_data_source(src_raw)        _SA_annotes1(SAL_untrusted_data_source,src_raw)
+#define __inner_this_data_source(src_raw)   _SA_annotes1(SAL_untrusted_data_source_this,src_raw)
+#define __inner_out_validated(typ_raw)      _Post_ _SA_annotes1(SAL_validated,typ_raw)
+#define __inner_this_out_validated(typ_raw) _SA_annotes1(SAL_validated_this,typ_raw)
+#define __inner_assume_validated_dec        __inline __nothrow void __AssumeValidated(__inner_out_validated("BY_DESIGN") const void *p) {p;}
+#define __inner_assume_validated(p)         __AssumeValidated(p)
+#define __inner_transfer(formal)            _SA_annotes1(SAL_transfer_adt_property_from,formal)
+#define __inner_encoded                     _SA_annotes0(SAL_encoded)
+
+#if defined(_MSC_EXTENSIONS) || defined(_PREFAST_) || defined(OACR)
+#define __inner_adt_prop(adt,prop)               _SA_annotes2(SAL_adt, adt,prop)
+#define __inner_adt_add_prop(adt,prop)           _SA_annotes2(SAL_add_adt_property,adt,prop)
+#define __inner_adt_remove_prop(adt,prop)        _SA_annotes2(SAL_remove_adt_property,adt,prop)
+#define __inner_adt_transfer_prop(arg)           _SA_annotes1(SAL_transfer_adt_property_from,arg)
+#define __inner_adt_type_props(typ)              _SA_annotes1(SAL_post_type,typ)
+#define __inner_volatile                         _SA_annotes0(SAL_volatile)
+#define __inner_nonvolatile                      _SA_annotes0(SAL_nonvolatile)
+#define __inner_possibly_notnullterminated       _SA_annotes1(SAL_nullTerminated,__maybe)
+#define __inner_analysis_assume_nullterminated_dec __inline __nothrow void __AnalysisAssumeNullterminated(_Post_ __nullterminated void *p) {*(char*)p=0;}
+#define __inner_analysis_assume_nullterminated(x) __AnalysisAssumeNullterminated(x);
+#endif
+
+#else
+
+#define __file_parser(typ)
+#define __file_parser_class(typ)
+#define __file_parser_library(typ)
+#define __source_code_content(typ)
+#define __class_code_content(typ)
+#define __analysis_assert(e)
+#define __analysis_hint(hint)
+#define __analysis_noreturn
+/* Internal defintions */
+#define __inner_data_source(src_raw)
+#define __inner_this_data_source(src_raw)
+#define __inner_out_validated(typ_raw)
+#define __inner_this_out_validated(typ_raw)
+#define __inner_assume_validated_dec
+#define __inner_assume_validated(p)
+#define __inner_transfer(formal)
+#define __inner_encoded
+#define __inner_adt_prop(adt,prop)
+#define __inner_adt_add_prop(adt,prop)
+#define __inner_adt_remove_prop(adt,prop)
+#define __inner_adt_transfer_prop(arg)
+#define __inner_adt_type_props(typ)
+#define __inner_volatile
+#define __inner_nonvolatile
+#define __inner_possibly_notnullterminated
+#define __inner_analysis_assume_nullterminated_dec
+#define __inner_analysis_assume_nullterminated(x)
+
+#endif // #if (_MSC_VER >= 1000) && !defined(__midl) && defined(_PREFAST_)
+
+#define __field_ecount(size)                __notnull __elem_writableTo(size)
+#define __field_bcount(size)                __notnull __byte_writableTo(size)
+#define __field_xcount(size)                __notnull __inexpressible_writableTo(size)
+
+#define __field_ecount_opt(size)            __maybenull __elem_writableTo(size)
+#define __field_bcount_opt(size)            __maybenull __byte_writableTo(size)
+#define __field_xcount_opt(size)            __maybenull __inexpressible_writableTo(size)
+
+#define __field_ecount_part(size,init)      __notnull __elem_writableTo(size) __elem_readableTo(init)
+#define __field_bcount_part(size,init)      __notnull __byte_writableTo(size) __byte_readableTo(init)
+#define __field_xcount_part(size,init)      __notnull __inexpressible_writableTo(size) __inexpressible_readableTo(init)
+
+#define __field_ecount_part_opt(size,init)  __maybenull __elem_writableTo(size) __elem_readableTo(init)
+#define __field_bcount_part_opt(size,init)  __maybenull __byte_writableTo(size) __byte_readableTo(init)
+#define __field_xcount_part_opt(size,init)  __maybenull __inexpressible_writableTo(size) __inexpressible_readableTo(init)
+
+#define __field_ecount_full(size)           __field_ecount_part(size,size)
+#define __field_bcount_full(size)           __field_bcount_part(size,size)
+#define __field_xcount_full(size)           __field_xcount_part(size,size)
+
+#define __field_ecount_full_opt(size)       __field_ecount_part_opt(size,size)
+#define __field_bcount_full_opt(size)       __field_bcount_part_opt(size,size)
+#define __field_xcount_full_opt(size)       __field_xcount_part_opt(size,size)
+
+#define __field_nullterminated              __nullterminated
+
+#define __struct_bcount(size)               __byte_writableTo(size)
+#define __struct_xcount(size)               __inexpressible_writableTo(size)
+
+#define __out_awcount(expr,size)            _Pre_ __notnull \
+					    __byte_writableTo((expr) ? (size) : (size) * 2) \
+                                            _Post_ __valid __refparam
+#define __in_awcount(expr,size)             _Pre_ __valid \
+                                            _Pre_ _Notref_ __deref __readonly \
+				            __byte_readableTo((expr) ? (size) : (size) * 2)
+#define __post_invalid                      _Post_ __notvalid
+/* integer related macros */
+#define __allocator                         __inner_allocator
+#ifndef PAL_STDCPP_COMPAT
+#define __deallocate(kind)                  _Pre_ __notnull __post_invalid
+#define __deallocate_opt(kind)              _Pre_ __maybenull __post_invalid
+#endif
+#define __bound                             __inner_bound
+#define __range(lb,ub)                      __inner_range(lb,ub)
+#define __in_bound                          _Pre_ __inner_bound
+#define __out_bound                         _Post_ __inner_bound
+#define __deref_out_bound                   _Post_ __deref __inner_bound
+#define __in_range(lb,ub)                   _Pre_ __inner_range(lb,ub)
+#define __out_range(lb,ub)                  _Post_ __inner_range(lb,ub)
+#define __deref_in_range(lb,ub)             _Pre_ __deref __inner_range(lb,ub)
+#define __deref_out_range(lb,ub)            _Post_ __deref __inner_range(lb,ub)
+#define __deref_inout_range(lb,ub)          __deref_in_range(lb,ub) __deref_out_range(lb,ub)
+#define __field_range(lb,ub)                __range(lb,ub)
+#define __field_data_source(src_sym)        __inner_data_source(#src_sym)
+
+#define __range_max(a,b)                    __range(==, a > b ? a : b)
+#define __range_min(a,b)                    __range(==, a < b ? a : b)
+
+
+/* Penetration review macros */
+#define __in_data_source(src_sym)           _Pre_ __inner_data_source(#src_sym)
+#define __out_data_source(src_sym)          _Post_ __inner_data_source(#src_sym)
+#define __out_validated(typ_sym)            __inner_out_validated(#typ_sym)
+#define __this_out_data_source(src_sym)     __inner_this_data_source(#src_sym)
+#define __this_out_validated(typ_sym)       __inner_this_out_validated(#typ_sym)
+#define __transfer(formal)                  _Post_ __inner_transfer(formal)
+#define __rpc_entry                         __inner_control_entrypoint(RPC)
+#define __kernel_entry                      __inner_control_entrypoint(UserToKernel)
+#define __gdi_entry                         __inner_control_entrypoint(GDI)
+#define __encoded_pointer                   __inner_encoded
+#define __encoded_array                     __inner_encoded
+#define __field_encoded_pointer             __inner_encoded
+#define __field_encoded_array               __inner_encoded
+#if defined(_MSC_EXTENSIONS) || defined(_PREFAST_) || defined(OACR)
+#define __type_has_adt_prop(adt,prop)       __inner_adt_prop(adt,prop)
+#define __out_has_adt_prop(adt,prop)        _Post_ __inner_adt_add_prop(adt,prop)
+#define __out_not_has_adt_prop(adt,prop)    _Post_ __inner_adt_remove_prop(adt,prop)
+#define __out_transfer_adt_prop(arg)        _Post_ __inner_adt_transfer_prop(arg)
+#define __out_has_type_adt_props(typ)       _Post_ __inner_adt_type_props(typ)
+
+/* useful PFD related macros */
+#define __possibly_notnullterminated        __inner_possibly_notnullterminated
+
+/* Windows Internal */
+#define __volatile                          __inner_volatile
+#define __nonvolatile                       __inner_nonvolatile
+#else
+#define __out_has_type_adt_props(typ)       /* nothing */
+#endif
+#define __deref_volatile                    __deref __volatile
+#define __deref_nonvolatile                 __deref __nonvolatile
+
+/* declare stub functions for macros */
+__inner_assume_validated_dec
+__inner_assume_bound_dec
+__inner_analysis_assume_nullterminated_dec
+#define __analysis_assume_nullterminated(x) __inner_analysis_assume_nullterminated(x)
+#define __assume_validated(p) __inner_assume_validated(p)
+#define __assume_bound(i) __inner_assume_bound(i)
+
+
+/**************************************************************************
+* SAL 2 extensions for Windows-specific APIs.
+***************************************************************************/
+
+// Annotation for parameters that are not used in any way by the function.
+// Unlike _Reserved_, an _Unreferenced_parameter_ pointer need not be NULL.
+#ifndef _Unreferenced_parameter_
+#define _Unreferenced_parameter_  _Const_
+#endif
+
+// Pointer parameters that are freed by the function, and thus the pointed-to
+// memory should not be used after return.
+#ifndef _Frees_ptr_
+#define _Frees_ptr_               _Pre_notnull_ _Post_ptr_invalid_
+#endif
+#ifndef _Frees_ptr_opt_
+#define _Frees_ptr_opt_           _Pre_maybenull_ _Post_ptr_invalid_
+#endif
+
+// NLS APIs allow strings to be specified either by an element count or
+// null termination. Unlike _In_reads_or_z_, this is not whichever comes
+// first, but based on whether the size is negative or not.
+#define _In_NLS_string_(size)     _When_((size) < 0,  _In_z_)           \
+                                  _When_((size) >= 0, _In_reads_(size))
+
+
+// Minifilter CompletionContext parameters on the pre-operation callback
+// default to NULL.  For return type FLT_PREOP_SUCCESS_WITH_CALLBACK or
+// FLT_PREOP_SYNCHRONIZE, it may be set to NULL or a valid pointer.  For all
+// other returns, it must be NULL.
+#define _Flt_CompletionContext_Outptr_   \
+           _Outptr_result_maybenull_ _Pre_valid_ \
+           _At_(*_Curr_, _Pre_null_ \
+               _When_(return != FLT_PREOP_SUCCESS_WITH_CALLBACK && return != FLT_PREOP_SYNCHRONIZE, _Post_null_))
+
+// Minifilter ConnectionCookie parameters on the port connect notify callback
+// default to NULL.  On successful return, it may be set to NULL or non-NULL,
+// but it must be NULL on failure.
+#define _Flt_ConnectionCookie_Outptr_      \
+     _Outptr_result_maybenull_ _Pre_valid_ \
+     _At_(*_Curr_, _Pre_null_ _On_failure_(_Post_null_))
+
+
+//
+// A common pattern is to pass an "_Inout_ PCHAR* ppBuf" of size "_Inout_ DWORD* pSize"
+// to a function that writes to **pBuf, incrementing *ppBuf to point to one
+// past the last written byte. Thus the length of the write is
+// (*ppBuf - Old(*ppBuf)). The size of the remaining unwritten capacity 
+// is written to *pSize.
+//
+// This pattern is frequently used when progressively filling a
+// large buffer in chunks
+// (e.g. when reading from a network interface in a driver).
+//
+// It is expected that these supplementary annotations would be used inside an
+// _At_, like so:
+//
+// _At_(*ppBuf, _Writes_and_advances_ptr_(*pBufSize))
+// HRESULT WriteChunkOfData(_Inout_ PCHAR* ppBuf, _Inout_ DWORD* pBufSize);
+//
+#ifndef _Writes_and_advances_ptr_
+#define _Writes_and_advances_ptr_(size) \
+                                _At_((void*)_Curr_, _Inout_) \
+                                _At_(_Curr_, \
+                                    _Pre_writable_size_(size) \
+                                    _Post_writable_size_(size) \
+                                    _Post_satisfies_(_Curr_ - _Old_(_Curr_) == size)) \
+                                _At_(_Old_(_Curr_), \
+                                    _Post_readable_size_(_Old_(size) - size))
+#endif
+
+#ifndef _Writes_bytes_and_advances_ptr_
+#define _Writes_bytes_and_advances_ptr_(size) \
+                                _At_((void*)_Curr_, _Inout_) \
+                                _At_(_Curr_, \
+                                    _Pre_writable_byte_size_(size) \
+                                    _Post_writable_byte_size_(size) \
+                                    _Post_satisfies_(((char*)_Curr_) - ((void*)_Old_(_Curr_)) == size)) \
+                                _At_(_Old_(_Curr_), \
+                                    _Post_readable_byte_size_(_Old_(size) - size))
+#endif
+
+//
+// Gets the current error code (as returned by GetLastError()), and stores
+// in _Curr_ as a postcondition. This is currently approximated by assuming
+// that GetLastError() always returns a failed error code. This is not a
+// completely accurate approximation, but reasonable.
+//
+#define _Post_equals_last_error_     _Post_satisfies_(_Curr_ != 0)
+                                
+#ifdef  __cplusplus
+}
+#endif
+
+#ifdef _PREFIX_
+/**************************************************************************
+* Defintion of __pfx_assume and __pfx_assert. Thse should be the only
+* defintions of these functions.
+***************************************************************************/
+#if __cplusplus
+extern "C" void __pfx_assert(bool, const char *);
+extern "C" void __pfx_assume(bool, const char *);
+#else
+void __pfx_assert(int, const char *);
+void __pfx_assume(int, const char *);
+#endif
+/**************************************************************************
+* Redefintion of __analysis_assume and __analysis_assert for PREFIX build
+**************************************************************************/
+#undef  __analysis_assume
+#undef  __analysis_assert
+#define __analysis_assume(e) (__pfx_assume(e,"pfx_assume"),__assume(e));
+#define __analysis_assert(e) (__pfx_assert(e,"pfx_assert"),__assume(e));
+#endif /* ifdef _PREFIX_ */
+
+/**************************************************************************
+* This include should always be the last thing in this file.
+* Must avoid redfinitions of macros to workaround rc.exe issues.
+***************************************************************************/
+#if !(defined(RC_INVOKED) || defined(SORTPP_PASS))
+#include <specstrings_strict.h>
+#endif /* if !(defined(RC_INVOKED) || defined(SORTPP_PASS)) */
+
+/*
+ If no SAL 2 appears to have been defined (_Outptr_ is a representative choice)
+ then we must be operating in a downlevel build environment (such as VS10).
+ We also test against the compiler version to identify a downlevel environment,
+ as VS11 is the minimum required for SAL 2 support.
+
+ If we are operating in a downlevel build environment (such as VS10)
+ we need to undefine the following symbols before including driverspecs.h
+ or we will end up referencing SAL 2 implementation symbols and cause
+ build failures.
+*/
+#if (!defined(_Outptr_) || _MSC_VER <= 1600) && !( defined( MIDL_PASS ) || defined(__midl) || defined(RC_INVOKED) ) /*IFSTRIP=IGN*/
+#undef __ANNOTATION
+#define __ANNOTATION(fun) /* fun */
+#undef __PRIMOP
+#define __PRIMOP(type, fun)
+#endif /* !defined(_Outptr_) || _MSC_VER <= 1600 */
+
+// ROTOR doesn't need driverspecs.h
+// #include <driverspecs.h>
+
+/*
+ If no SAL 2 appears to have been defined (_Outptr_ is a representative choice)
+ then we must be operating in a downlevel build environment (such as VS10).
+ We also test against the compiler version to identify a downlevel environment,
+ as VS11 is the minimum required for SAL 2 support.
+
+ If we are in a downlevel environment, we can go ahead and include no_sal2.h
+ to make all of SAL 2 no-ops to ensure no build failures. 
+*/
+#if (!defined(_Outptr_) || _MSC_VER <= 1600) && !( defined( MIDL_PASS ) || defined(__midl) || defined(RC_INVOKED) ) && !( defined( _SDV_ ) ) /*IFSTRIP=IGN*/
+#include <no_sal2.h>
+#endif /* !defined(_Outptr_) || _MSC_VER <= 1600 */
+
+#endif /* #ifndef SPECSTRINGS_H */
+
+
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/specstrings_strict.h b/src/coreclr/src/nativeaot/Runtime/unix/specstrings_strict.h
new file mode 100644
index 0000000000000..04d08e21c4c77
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/specstrings_strict.h
@@ -0,0 +1,1189 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+/************************************************************************* 
+*  This file documents all the macros approved for use in windows source
+*  code. It includes some experimental macros which should only be used by
+*  experts.
+*
+*  DO NOT include this file directly.  This file is include after
+*  specstrings.h. So we can undefine every possible old definition including
+*  private internal macros people should not be using, as well as macros from
+*  sal.h.  Macros are redefined here in a way to cause syntax errors when used
+*  incorrectly during a normal build when specstrings.h is included and
+*  __SPECSTRINGS_STRICT_LEVEL is defined.
+*
+*  There are several levels of strictness, each level includes the behavior of
+*  all previous levels.
+*
+*  0 - Disable strict checking 
+*  1 - Break on unapproved macros and misuse of statement 
+*      macros such as __fallthrough (default)
+*  2 - Deprecated some old macros that should not be used
+*  3 - Use VS 2005 Source Annotation to make sure every macro 
+*      is used in the right context. For example placing __in on a return 
+*      parameter will result in an error.
+*
+
+*
+************************************************************************/
+#ifndef __SPECSTRINGS_STRICT_LEVEL
+#define __SPECSTRINGS_STRICT_LEVEL 1
+#endif
+/************************************************************************
+*  Introduction
+*
+*  specstrings.h provides a set of annotations to describe how a function uses
+*  its parameters - the assumptions it makes about them, and the guarantees it
+*  makes upon finishing.
+* 
+*  Annotations must be placed before a function parameter's type or its return
+*  type. There are two basic classes of common annotations buffer annotations
+*  and advanced annotations.  Buffer annotations describe how functions use
+*  their pointer parameters, and advanced annotations either describe
+*  complex/unusual buffer behavior, or provide additional information about a
+*  parameter that is not otherwise expressible.
+* 
+*  Buffer Annotations
+* 
+*  The most important annotations in SpecStrings.h provide a consistent way to
+*  annotate buffer parameters or return values for a function. Each of these
+*  annotations describes a single buffer (which could be a string, a
+*  fixed-length or variable-length array, or just a pointer) that the function
+*  interacts with: where it is, how large it is, how much is initialized, and
+*  what the function does with it.
+* 
+*  The appropriate macro for a given buffer can be constructed using the table
+*  below.  Just pick the appropriate values from each category, and combine
+*  them together with a leading underscore. Some combinations of values do not
+*  make sense as buffer annotations. Only meaningful annotations can be added
+*  to your code; for a list of these, see the buffer annotation definitions
+*  section.
+* 
+*  Only a single buffer annotation should be used for each parameter.
+* 
+*  |------------|------------|---------|--------|----------|---------------|
+*  |   Level    |   Usage    |  Size   | Output | Optional |  Parameters   |
+*  |------------|------------|---------|--------|----------|---------------|
+*  | <>         | <>         | <>      | <>     | <>       | <>            |
+*  | _deref     | _in        | _ecount | _full  | _opt     | (size)        |
+*  | _deref_opt | _out       | _bcount | _part  |          | (size,length) |
+*  |            | _inout     |         |        |          |               |
+*  |            |            |         |        |          |               |
+*  |------------|------------|---------|--------|----------|---------------|
+*
+*  Note: "<>" represents the empty string.
+* 
+*  Level: Describes the buffer pointer's level of indirection from the
+*  parameter or return value 'p'.
+* 
+*  <>         : p is the buffer pointer.
+*  _deref     : *p is the buffer pointer. p must not be NULL.
+*  _deref_opt : *p may be the buffer pointer. p may be NULL, in which case the 
+*               rest of the annotation is ignored.
+* 
+*  Usage: Describes how the function uses the buffer.
+* 
+*  <> : The buffer is not accessed. If used on the return value or with
+*  _deref, the function will provide the buffer, and it will be uninitialized
+*  at exit.  Otherwise, the caller must provide the buffer. This should only
+*  be used for alloc and free functions.
+*
+*  _in : The function will only read from the buffer. The caller must provide
+*  the buffer and initialize it.
+*
+*  _out : The function will only write to the buffer. If used on the return
+*  value or with _deref, the function will provide the buffer and initialize
+*  it.  Otherwise, the caller must provide the buffer, and the function will
+*  initialize it.
+*
+*  _inout : The function may freely read from and write to the buffer. The
+*  caller must provide the buffer and initialize it. If used with _deref, the
+*  buffer may be reallocated by the function.
+*
+*  Size: Describes the total size of the buffer. This may be less than the
+*  space actually allocated for the buffer, in which case it describes the
+*  accessible amount.
+* 
+*  <> : No buffer size is given. If the type specifies the buffer size (such
+*  as with LPSTR and LPWSTR), that amount is used. Otherwise, the buffer is
+*  one element long. Must be used with _in, _out, or _inout.
+*
+*  _ecount : The buffer size is an explicit element count.
+*
+*  _bcount : The buffer size is an explicit byte count.
+* 
+*  Output: Describes how much of the buffer will be initialized by the
+*  function. For _inout buffers, this also describes how much is initialized
+*  at entry. Omit this category for _in buffers; they must be fully
+*  initialized by the caller.
+* 
+*  <> : The type specifies how much is initialized. For instance, a function
+*  initializing an LPWSTR must NULL-terminate the string.
+*
+*  _full : The function initializes the entire buffer.
+*
+*  _part : The function initializes part of the buffer, and explicitly
+*  indicates how much.
+* 
+*  Optional: Describes if the buffer itself is optional.
+* 
+*  <>   : The pointer to the buffer must not be NULL.
+*
+*  _opt : The pointer to the buffer might be NULL. It will be checked before
+*  being dereferenced.
+* 
+*  Parameters: Gives explicit counts for the size and length of the buffer.
+* 
+*  <> : There is no explicit count. Use when neither _ecount nor _bcount is
+*  used.
+*
+*  (size) : Only the buffer's total size is given. Use with _ecount or _bcount
+*  but not _part.
+*
+*  (size,length) : The buffer's total size and initialized length are
+*  given. Use with _ecount_part and _bcount_part.
+* 
+*  ----------------------------------------------------------------------------
+*  Buffer Annotation Examples
+* 
+*  LWSTDAPI_(BOOL) StrToIntExA(
+*      LPCSTR pszString,  //  No annotation required, const implies __in.
+*      DWORD dwFlags,
+*      __out int *piRet   // A pointer whose dereference will be filled in.
+*  );
+* 
+*  void MyPaintingFunction(
+*      __in HWND hwndControl,     //  An initialized read-only parameter.
+*      __in_opt HDC hdcOptional,  //  An initialized read-only parameter that 
+*                                 //  might be NULL.
+*      __inout IPropertyStore *ppsStore // An initialized parameter that 
+*                                       // may be freely used and modified.
+*  );
+* 
+*  LWSTDAPI_(BOOL) PathCompactPathExA(
+*      __out_ecount(cchMax) LPSTR pszOut, //  A string buffer with cch elements
+*                                         //  that will be '\0' terminated 
+*                                         //  on exit.
+*      LPCSTR pszSrc,                     //  No annotation required, 
+*                                         //  const implies __in.
+*      UINT cchMax,                              
+*      DWORD dwFlags
+*  );
+* 
+*  HRESULT SHLocalAllocBytes(
+*      size_t cb,
+*      __deref_bcount(cb) T **ppv //  A pointer whose dereference will be set
+*                                 //  to an uninitialized buffer with cb bytes.
+*  );
+* 
+*  __inout_bcount_full(cb) : A buffer with cb elements that is fully
+*  initialized at entry and exit, and may be written to by this function.
+* 
+*  __out_ecount_part(count, *countOut) : A buffer with count elements that
+*  will be partially initialized by this function. The function indicates how
+*  much it initialized by setting *countOut.
+* 
+************************************************************************/
+
+#if (_MSC_VER >= 1400) && !defined(__midl) && !defined(_PREFAST_) && (__SPECSTRINGS_STRICT_LEVEL > 0)
+#pragma once
+#include <specstrings_undef.h>
+#define __ecount(size)                                _SAL_VERSION_CHECK(__ecount)
+#define __bcount(size)                                _SAL_VERSION_CHECK(__bcount)
+#define __xcount(size)                                _SAL_VERSION_CHECK(__xcount)
+#define __in                                          _SAL_VERSION_CHECK(__in)
+#define __in_ecount(size)                             _SAL_VERSION_CHECK(__in_ecount)
+#define __in_bcount(size)                             _SAL_VERSION_CHECK(__in_bcount)
+#define __in_xcount(size)                             _SAL_VERSION_CHECK(__in_xcount)
+#define __in_z                                        _SAL_VERSION_CHECK(__in_z)
+#define __in_ecount_z(size)                           _SAL_VERSION_CHECK(__in_ecount_z)
+#define __in_bcount_z(size)                           _SAL_VERSION_CHECK(__in_bcount_z)
+#define __out                                         _SAL_VERSION_CHECK(__out)
+#define __out_ecount(size)                            _SAL_VERSION_CHECK(__out_ecount)
+#define __out_bcount(size)                            _SAL_VERSION_CHECK(__out_bcount)
+#define __out_xcount(size)                            _SAL_VERSION_CHECK(__out_xcount)
+#define __out_ecount_part(size,len)                   _SAL_VERSION_CHECK(__out_ecount_part)
+#define __out_bcount_part(size,len)                   _SAL_VERSION_CHECK(__out_bcount_part)
+#define __out_xcount_part(size,len)                   _SAL_VERSION_CHECK(__out_xcount_part)
+#define __out_ecount_full(size)                       _SAL_VERSION_CHECK(__out_ecount_full)
+#define __out_bcount_full(size)                       _SAL_VERSION_CHECK(__out_bcount_full)
+#define __out_xcount_full(size)                       _SAL_VERSION_CHECK(__out_xcount_full)
+#define __out_z				              _SAL_VERSION_CHECK(__out_z)
+#define __out_ecount_z(size)                          _SAL_VERSION_CHECK(__out_ecount_z)
+#define __out_bcount_z(size)                          _SAL_VERSION_CHECK(__out_bcount_z)
+#define __inout                                       _SAL_VERSION_CHECK(__inout)
+#define __inout_ecount(size)                          _SAL_VERSION_CHECK(__inout_ecount)
+#define __inout_bcount(size)                          _SAL_VERSION_CHECK(__inout_bcount)
+#define __inout_xcount(size)                          _SAL_VERSION_CHECK(__inout_xcount)
+#define __inout_ecount_part(size,len)                 _SAL_VERSION_CHECK(__inout_ecount_part)
+#define __inout_bcount_part(size,len)                 _SAL_VERSION_CHECK(__inout_bcount_part)
+#define __inout_xcount_part(size,len)                 _SAL_VERSION_CHECK(__inout_xcount_part)
+#define __inout_ecount_full(size)                     _SAL_VERSION_CHECK(__inout_ecount_full)
+#define __inout_bcount_full(size)                     _SAL_VERSION_CHECK(__inout_bcount_full)
+#define __inout_xcount_full(size)                     _SAL_VERSION_CHECK(__inout_xcount_full)
+#define __inout_z                                     __allowed(on_parameter)
+#define __inout_ecount_z(size)                        __allowed(on_parameter)
+#define __inout_bcount_z(size)                        __allowed(on_parameter)
+#define __ecount_opt(size)                            __allowed(on_parameter)
+#define __bcount_opt(size)                            __allowed(on_parameter)
+#define __xcount_opt(size)                            __allowed(on_parameter)
+#define __in_opt                                      _SAL_VERSION_CHECK(__in_opt)
+#define __in_ecount_opt(size)                         _SAL_VERSION_CHECK(__in_ecount_opt)
+#define __in_bcount_opt(size)                         _SAL_VERSION_CHECK(__in_bcount_opt)
+#define __in_z_opt                                    __allowed(on_parameter)
+#define __in_ecount_z_opt(size)                       __allowed(on_parameter)
+#define __in_bcount_z_opt(size)                       __allowed(on_parameter)
+#define __in_xcount_opt(size)                         __allowed(on_parameter)
+#define __out_opt                                     _SAL_VERSION_CHECK(__out_opt)
+#define __out_ecount_opt(size)                        _SAL_VERSION_CHECK(__out_ecount_opt)
+#define __out_bcount_opt(size)                        _SAL_VERSION_CHECK(__out_bcount_opt)
+#define __out_xcount_opt(size)                        __allowed(on_parameter)
+#define __out_ecount_part_opt(size,len)               __allowed(on_parameter)
+#define __out_bcount_part_opt(size,len)               __allowed(on_parameter)
+#define __out_xcount_part_opt(size,len)               __allowed(on_parameter)
+#define __out_ecount_full_opt(size)                   __allowed(on_parameter)
+#define __out_bcount_full_opt(size)                   __allowed(on_parameter)
+#define __out_xcount_full_opt(size)                   __allowed(on_parameter)
+#define __out_ecount_z_opt(size)                      __allowed(on_parameter)
+#define __out_bcount_z_opt(size)                      __allowed(on_parameter)
+#define __inout_opt                                   _SAL_VERSION_CHECK(__inout_opt)
+#define __inout_ecount_opt(size)                      _SAL_VERSION_CHECK(__inout_ecount_opt)
+#define __inout_bcount_opt(size)                      _SAL_VERSION_CHECK(__inout_bcount_opt)
+#define __inout_xcount_opt(size)                      _SAL_VERSION_CHECK(__inout_xcount_opt)
+#define __inout_ecount_part_opt(size,len)             _SAL_VERSION_CHECK(__inout_ecount_part_opt)
+#define __inout_bcount_part_opt(size,len)             _SAL_VERSION_CHECK(__inout_bcount_part_opt)
+#define __inout_xcount_part_opt(size,len)             _SAL_VERSION_CHECK(__inout_xcount_part_opt)
+#define __inout_ecount_full_opt(size)                 _SAL_VERSION_CHECK(__inout_ecount_full_opt)
+#define __inout_bcount_full_opt(size)                 _SAL_VERSION_CHECK(__inout_bcount_full_opt)
+#define __inout_xcount_full_opt(size)                 _SAL_VERSION_CHECK(__inout_xcount_full_opt)
+#define __inout_z_opt                                 __allowed(on_parameter)
+#define __inout_ecount_z_opt(size)                    __allowed(on_parameter)
+#define __inout_ecount_z_opt(size)                    __allowed(on_parameter)
+#define __inout_bcount_z_opt(size)                    __allowed(on_parameter)
+#define __deref_ecount(size)                          __allowed(on_parameter)
+#define __deref_bcount(size)                          __allowed(on_parameter)
+#define __deref_xcount(size)                          __allowed(on_parameter)
+#define __deref_in                                    _SAL_VERSION_CHECK(__deref_in)
+#define __deref_in_ecount(size)                       _SAL_VERSION_CHECK(__deref_in_ecount)
+#define __deref_in_bcount(size)                       _SAL_VERSION_CHECK(__deref_in_bcount)
+#define __deref_in_xcount(size)                       _SAL_VERSION_CHECK(__deref_in_xcount)
+#define __deref_out                                   _SAL_VERSION_CHECK(__deref_out)
+#define __deref_out_ecount(size)                      _SAL_VERSION_CHECK(__deref_out_ecount)
+#define __deref_out_bcount(size)                      _SAL_VERSION_CHECK(__deref_out_bcount)
+#define __deref_out_xcount(size)                      _SAL_VERSION_CHECK(__deref_out_xcount)
+#define __deref_out_ecount_part(size,len)             _SAL_VERSION_CHECK(__deref_out_ecount_part)
+#define __deref_out_bcount_part(size,len)             _SAL_VERSION_CHECK(__deref_out_bcount_part)
+#define __deref_out_xcount_part(size,len)             _SAL_VERSION_CHECK(__deref_out_xcount_part)
+#define __deref_out_ecount_full(size)                 _SAL_VERSION_CHECK(__deref_out_ecount_full)
+#define __deref_out_bcount_full(size)                 _SAL_VERSION_CHECK(__deref_out_bcount_full)
+#define __deref_out_xcount_full(size)                 _SAL_VERSION_CHECK(__deref_out_xcount_full)
+#define __deref_out_z                                 __allowed(on_parameter)
+#define __deref_out_ecount_z(size)                    __allowed(on_parameter)
+#define __deref_out_bcount_z(size)                    __allowed(on_parameter)
+#define __deref_inout                                 _SAL_VERSION_CHECK(__deref_inout)
+#define __deref_inout_ecount(size)                    _SAL_VERSION_CHECK(__deref_inout_ecount)
+#define __deref_inout_bcount(size)                    _SAL_VERSION_CHECK(__deref_inout_bcount)
+#define __deref_inout_xcount(size)                    _SAL_VERSION_CHECK(__deref_inout_xcount)
+#define __deref_inout_ecount_part(size,len)           __allowed(on_parameter)
+#define __deref_inout_bcount_part(size,len)           __allowed(on_parameter)
+#define __deref_inout_xcount_part(size,len)           __allowed(on_parameter)
+#define __deref_inout_ecount_full(size)               __allowed(on_parameter)
+#define __deref_inout_bcount_full(size)               __allowed(on_parameter)
+#define __deref_inout_xcount_full(size)               __allowed(on_parameter)
+#define __deref_inout_z                               __allowed(on_parameter)
+#define __deref_inout_ecount_z(size)                  __allowed(on_parameter)
+#define __deref_inout_bcount_z(size)                  __allowed(on_parameter)
+#define __deref_ecount_opt(size)                      __allowed(on_parameter)
+#define __deref_bcount_opt(size)                      __allowed(on_parameter)
+#define __deref_xcount_opt(size)                      __allowed(on_parameter)
+#define __deref_in_opt                                __allowed(on_parameter)
+#define __deref_in_opt_out                            __allowed(on_parameter)
+#define __deref_in_ecount_opt(size)                   __allowed(on_parameter)
+#define __deref_in_bcount_opt(size)                   __allowed(on_parameter)
+#define __deref_in_xcount_opt(size)                   __allowed(on_parameter)
+#define __deref_out_opt                               _SAL_VERSION_CHECK(__deref_out_opt)
+#define __deref_out_ecount_opt(size)                  _SAL_VERSION_CHECK(__deref_out_ecount_opt)
+#define __deref_out_bcount_opt(size)                  _SAL_VERSION_CHECK(__deref_out_bcount_opt)
+#define __deref_out_xcount_opt(size)                  _SAL_VERSION_CHECK(__deref_out_xcount_opt)
+#define __deref_out_ecount_part_opt(size,len)         _SAL_VERSION_CHECK(__deref_out_ecount_part_opt)
+#define __deref_out_bcount_part_opt(size,len)         _SAL_VERSION_CHECK(__deref_out_bcount_part_opt)
+#define __deref_out_xcount_part_opt(size,len)         _SAL_VERSION_CHECK(__deref_out_xcount_part_opt)
+#define __deref_out_ecount_full_opt(size)             _SAL_VERSION_CHECK(__deref_out_ecount_full_opt)
+#define __deref_out_bcount_full_opt(size)             _SAL_VERSION_CHECK(__deref_out_bcount_full_opt)
+#define __deref_out_xcount_full_opt(size)             _SAL_VERSION_CHECK(__deref_out_xcount_full_opt)
+#define __deref_out_z_opt                             __allowed(on_parameter)
+#define __deref_out_ecount_z_opt(size)                __allowed(on_parameter)
+#define __deref_out_bcount_z_opt(size)                __allowed(on_parameter)
+#define __deref_inout_opt                             __allowed(on_parameter)
+#define __deref_inout_ecount_opt(size)                __allowed(on_parameter)
+#define __deref_inout_bcount_opt(size)                __allowed(on_parameter)
+#define __deref_inout_xcount_opt(size)                __allowed(on_parameter)
+#define __deref_inout_ecount_part_opt(size,len)       __allowed(on_parameter)
+#define __deref_inout_bcount_part_opt(size,len)       __allowed(on_parameter)
+#define __deref_inout_xcount_part_opt(size,len)       __allowed(on_parameter)
+#define __deref_inout_ecount_full_opt(size)           __allowed(on_parameter)
+#define __deref_inout_bcount_full_opt(size)           __allowed(on_parameter)
+#define __deref_inout_xcount_full_opt(size)           __allowed(on_parameter)
+#define __deref_inout_z_opt                           __allowed(on_parameter)
+#define __deref_inout_ecount_z_opt(size)              __allowed(on_parameter)
+#define __deref_inout_bcount_z_opt(size)              __allowed(on_parameter)
+#define __deref_opt_ecount(size)                      __allowed(on_parameter)
+#define __deref_opt_bcount(size)                      __allowed(on_parameter)
+#define __deref_opt_xcount(size)                      __allowed(on_parameter)
+#define __deref_opt_in                                __allowed(on_parameter)
+#define __deref_opt_in_ecount(size)                   __allowed(on_parameter)
+#define __deref_opt_in_bcount(size)                   __allowed(on_parameter)
+#define __deref_opt_in_xcount(size)                   __allowed(on_parameter)
+#define __deref_opt_out                               _SAL_VERSION_CHECK(__deref_opt_out)
+#define __deref_opt_out_ecount(size)                  _SAL_VERSION_CHECK(__deref_opt_out_ecount)
+#define __deref_opt_out_bcount(size)                  _SAL_VERSION_CHECK(__deref_opt_out_bcount)
+#define __deref_opt_out_xcount(size)                  _SAL_VERSION_CHECK(__deref_opt_out_xcount)
+#define __deref_opt_out_ecount_part(size,len)         __allowed(on_parameter)
+#define __deref_opt_out_bcount_part(size,len)         __allowed(on_parameter)
+#define __deref_opt_out_xcount_part(size,len)         __allowed(on_parameter)
+#define __deref_opt_out_ecount_full(size)             __allowed(on_parameter)
+#define __deref_opt_out_bcount_full(size)             __allowed(on_parameter)
+#define __deref_opt_out_xcount_full(size)             __allowed(on_parameter)
+#define __deref_opt_inout                             __allowed(on_parameter)
+#define __deref_opt_inout_ecount(size)                __allowed(on_parameter)
+#define __deref_opt_inout_bcount(size)                __allowed(on_parameter)
+#define __deref_opt_inout_xcount(size)                __allowed(on_parameter)
+#define __deref_opt_inout_ecount_part(size,len)       __allowed(on_parameter)
+#define __deref_opt_inout_bcount_part(size,len)       __allowed(on_parameter)
+#define __deref_opt_inout_xcount_part(size,len)       __allowed(on_parameter)
+#define __deref_opt_inout_ecount_full(size)           __allowed(on_parameter)
+#define __deref_opt_inout_bcount_full(size)           __allowed(on_parameter)
+#define __deref_opt_inout_xcount_full(size)           __allowed(on_parameter)
+#define __deref_opt_inout_z                           __allowed(on_parameter)
+#define __deref_opt_inout_ecount_z(size)              __allowed(on_parameter)
+#define __deref_opt_inout_bcount_z(size)              __allowed(on_parameter)
+#define __deref_opt_ecount_opt(size)                  __allowed(on_parameter)
+#define __deref_opt_bcount_opt(size)                  __allowed(on_parameter)
+#define __deref_opt_xcount_opt(size)                  __allowed(on_parameter)
+#define __deref_opt_in_opt                            __allowed(on_parameter)
+#define __deref_opt_in_ecount_opt(size)               __allowed(on_parameter)
+#define __deref_opt_in_bcount_opt(size)               __allowed(on_parameter)
+#define __deref_opt_in_xcount_opt(size)               __allowed(on_parameter)
+#define __deref_opt_out_opt                           __allowed(on_parameter)
+#define __deref_opt_out_ecount_opt(size)              __allowed(on_parameter)
+#define __deref_opt_out_bcount_opt(size)              __allowed(on_parameter)
+#define __deref_opt_out_xcount_opt(size)              __allowed(on_parameter)
+#define __deref_opt_out_ecount_part_opt(size,len)     __allowed(on_parameter)
+#define __deref_opt_out_bcount_part_opt(size,len)     __allowed(on_parameter)
+#define __deref_opt_out_xcount_part_opt(size,len)     __allowed(on_parameter)
+#define __deref_opt_out_ecount_full_opt(size)         __allowed(on_parameter)
+#define __deref_opt_out_bcount_full_opt(size)         __allowed(on_parameter)
+#define __deref_opt_out_xcount_full_opt(size)         __allowed(on_parameter)  
+#define __deref_opt_out_z_opt                         __allowed(on_parameter)
+#define __deref_opt_out_ecount_z_opt(size)            __allowed(on_parameter)
+#define __deref_opt_out_bcount_z_opt(size)            __allowed(on_parameter)
+#define __deref_opt_inout_opt                         __allowed(on_parameter)
+#define __deref_opt_inout_ecount_opt(size)            __allowed(on_parameter)
+#define __deref_opt_inout_bcount_opt(size)            __allowed(on_parameter)
+#define __deref_opt_inout_xcount_opt(size)            __allowed(on_parameter)
+#define __deref_opt_inout_ecount_part_opt(size,len)   __allowed(on_parameter) 
+#define __deref_opt_inout_bcount_part_opt(size,len)   __allowed(on_parameter)
+#define __deref_opt_inout_xcount_part_opt(size,len)   __allowed(on_parameter)
+#define __deref_opt_inout_ecount_full_opt(size)       __allowed(on_parameter)
+#define __deref_opt_inout_bcount_full_opt(size)       __allowed(on_parameter)
+#define __deref_opt_inout_xcount_full_opt(size)       __allowed(on_parameter)
+#define __deref_opt_inout_z_opt                       __allowed(on_parameter)
+#define __deref_opt_inout_ecount_z_opt(size)          __allowed(on_parameter)
+#define __deref_opt_inout_bcount_z_opt(size)          __allowed(on_parameter)
+#define __deref_in_ecount_iterator(size,incr)         __allowed(on_parameter)
+#define __deref_out_ecount_iterator(size,incr)        __allowed(on_parameter)
+#define __deref_inout_ecount_iterator(size,incr)      __allowed(on_parameter)
+#define __deref_realloc_bcount(insize,outsize)        __allowed(on_parameter)
+
+/************************************************************************
+*  SAL 2 _Ouptr_ family of annotations
+************************************************************************/
+
+#define _Outptr_                                       __allowed(on_parameter)
+#define _Outptr_result_maybenull_                      __allowed(on_parameter)
+#define _Outptr_opt_                                   __allowed(on_parameter)
+#define _Outptr_opt_result_maybenull_                  __allowed(on_parameter)
+#define _Outptr_result_z_                              __allowed(on_parameter)
+#define _Outptr_opt_result_z_                          __allowed(on_parameter)
+#define _Outptr_result_maybenull_z_                    __allowed(on_parameter)
+#define _Outptr_opt_result_maybenull_z_                __allowed(on_parameter)
+#define _Outptr_result_nullonfailure_                  __allowed(on_parameter)
+#define _Outptr_opt_result_nullonfailure_              __allowed(on_parameter)
+#define _COM_Outptr_                                   __allowed(on_parameter)
+#define _COM_Outptr_result_maybenull_                  __allowed(on_parameter)
+#define _COM_Outptr_opt_                               __allowed(on_parameter)
+#define _COM_Outptr_opt_result_maybenull_              __allowed(on_parameter)
+#define _Outptr_result_buffer_(size)                   __allowed(on_parameter)
+#define _Outptr_opt_result_buffer_(size)               __allowed(on_parameter)
+#define _Outptr_result_buffer_to_(size, count)         __allowed(on_parameter)
+#define _Outptr_opt_result_buffer_to_(size, count)     __allowed(on_parameter)
+#define _Outptr_result_buffer_all_(size)               __allowed(on_parameter)
+#define _Outptr_opt_result_buffer_all_(size)           __allowed(on_parameter)
+#define _Outptr_result_buffer_maybenull_(size)         __allowed(on_parameter)
+#define _Outptr_opt_result_buffer_maybenull_(size)     __allowed(on_parameter)
+#define _Outptr_result_buffer_to_maybenull_(size, count)      __allowed(on_parameter)
+#define _Outptr_opt_result_buffer_to_maybenull_(size, count)  __allowed(on_parameter)
+#define _Outptr_result_buffer_all_maybenull_(size)     __allowed(on_parameter)
+#define _Outptr_opt_result_buffer_all_maybenull_(size) __allowed(on_parameter)
+#define _Outptr_result_bytebuffer_(size)               __allowed(on_parameter)
+#define _Outptr_opt_result_bytebuffer_(size)           __allowed(on_parameter)
+#define _Outptr_result_bytebuffer_to_(size, count)     __allowed(on_parameter)
+#define _Outptr_opt_result_bytebuffer_to_(size, count) __allowed(on_parameter)
+#define _Outptr_result_bytebuffer_all_(size)           __allowed(on_parameter)
+#define _Outptr_opt_result_bytebuffer_all_(size)       __allowed(on_parameter)
+#define _Outptr_result_bytebuffer_maybenull_(size)     __allowed(on_parameter)
+#define _Outptr_opt_result_bytebuffer_maybenull_(size) __allowed(on_parameter)
+#define _Outptr_result_bytebuffer_to_maybenull_(size, count)       __allowed(on_parameter)
+#define _Outptr_opt_result_bytebuffer_to_maybenull_(size, count)   __allowed(on_parameter)
+#define _Outptr_result_bytebuffer_all_maybenull_(size)             __allowed(on_parameter)
+#define _Outptr_opt_result_bytebuffer_all_maybenull_(size)         __allowed(on_parameter)
+
+/************************************************************************
+*  Orcas SAL
+************************************************************************/
+#define _Deref_out_                                   _SAL_VERSION_CHECK(_Deref_out_)
+#define _Deref_out_opt_                               _SAL_VERSION_CHECK(_Deref_out_opt_)
+#define _Deref_opt_out_                               _SAL_VERSION_CHECK(_Deref_opt_out_)
+#define _Deref_opt_out_opt_                           _SAL_VERSION_CHECK(_Deref_opt_out_opt_)
+#define _In_count_(size)                              _SAL_VERSION_CHECK(_In_count_)
+#define _In_opt_count_(size)                          _SAL_VERSION_CHECK(_In_opt_count_)
+#define _In_bytecount_(size)                          _SAL_VERSION_CHECK(_In_bytecount_)
+#define _In_opt_bytecount_(size)                      _SAL_VERSION_CHECK(_In_opt_bytecount_)
+#define _Out_cap_(size)                               _SAL_VERSION_CHECK(_Out_cap_)
+#define _Out_opt_cap_(size)                           _SAL_VERSION_CHECK(_Out_opt_cap_)
+#define _Out_bytecap_(size)                           _SAL_VERSION_CHECK(_Out_bytecap_)
+#define _Out_opt_bytecap_(size)                       _SAL_VERSION_CHECK(_Out_opt_bytecap_)
+#define _Deref_post_count_(size)                      _SAL_VERSION_CHECK(_Deref_post_count_)
+#define _Deref_post_opt_count_(size)                  _SAL_VERSION_CHECK(_Deref_post_opt_count_)
+#define _Deref_post_bytecount_(size)                  _SAL_VERSION_CHECK(_Deref_post_bytecount_)
+#define _Deref_post_opt_bytecount_(size)              _SAL_VERSION_CHECK(_Deref_post_opt_bytecount_)
+#define _Deref_post_cap_(size)                        _SAL_VERSION_CHECK(_Deref_post_cap_)
+#define _Deref_post_opt_cap_(size)                    _SAL_VERSION_CHECK(_Deref_post_opt_cap_)
+#define _Deref_post_bytecap_(size)                    _SAL_VERSION_CHECK(_Deref_post_bytecap_)
+#define _Deref_post_opt_bytecap_(size)                _SAL_VERSION_CHECK(_Deref_post_opt_bytecap_)
+
+/************************************************************************
+*  Advanced Annotations
+* 
+*  Advanced annotations describe behavior that is not expressible with the
+*  regular buffer macros. These may be used either to annotate buffer
+*  parameters that involve complex or conditional behavior, or to enrich
+*  existing annotations with additional information.
+* 
+*  _At_(expr, annotes) : annotation list annotes applies to target 'expr'
+*
+*  _When_(expr, annotes) : annotation list annotes applies when 'expr' is true
+*
+*  __success(expr) T f() : <expr> indicates whether function f succeeded or
+*  not. If <expr> is true at exit, all the function's guarantees (as given
+*  by other annotations) must hold. If <expr> is false at exit, the caller
+*  should not expect any of the function's guarantees to hold. If not used,
+*  the function must always satisfy its guarantees. Added automatically to
+*  functions that indicate success in standard ways, such as by returning an
+*  HRESULT.
+* 
+*  __out_awcount(expr, size) T *p : Pointer p is a buffer whose size may be
+*  given in either bytes or elements. If <expr> is true, this acts like
+*  __out_bcount. If <expr> is false, this acts like __out_ecount. This
+*  should only be used to annotate old APIs.
+* 
+*  __in_awcount(expr, size) T* p : Pointer p is a buffer whose size may be given
+*  in either bytes or elements. If <expr> is true, this acts like
+*  __in_bcount. If <expr> is false, this acts like __in_ecount. This should
+*  only be used to annotate old APIs.
+* 
+*  __nullterminated T* p : Pointer p is a buffer that may be read or written
+*  up to and including the first '\0' character or pointer. May be used on
+*  typedefs, which marks valid (properly initialized) instances of that type
+*  as being null-terminated.
+* 
+*  __nullnullterminated T* p : Pointer p is a buffer that may be read or
+*  written up to and including the first sequence of two '\0' characters or
+*  pointers. May be used on typedefs, which marks valid instances of that
+*  type as being double-null terminated.
+* 
+*  __reserved T v : Value v must be 0/NULL, reserved for future use.
+* 
+*  __checkReturn T f(); : Return value of f must not be ignored by callers
+*  of this function.
+* 
+*  __typefix(ctype) T v : Value v should be treated as an instance of ctype,
+*  rather than its declared type when considering validity.
+* 
+*  __override T f(); : Specify C#-style 'override' behaviour for overriding
+*  virtual methods.
+* 
+*  __callback T f(); : Function f can be used as a function pointer.
+* 
+*  __format_string T p : Pointer p is a string that contains % markers in
+*  the style of printf.
+* 
+*  __blocksOn(resource) f(); : Function f blocks on the resource 'resource'.
+* 
+*  __fallthrough : Annotates switch statement labels where fall-through is
+*  desired, to distinguish from forgotten break statements.
+* 
+*  __range(low_bnd, up_bnd) int f(): The return from the function "f" must
+*  be in the inclusive numeric range [low_bnd, up_bnd].
+*
+*  __in_range(low_bnd, up_bnd) int i : Precondition that integer i must be
+*  in the inclusive numeric range [low_bnd, up_bnd].
+* 
+*  __out_range(low_bnd, up_bnd) int i : Postcondition that integer i must be
+*  in the inclusive numeric range [low_bnd, up_bnd].
+* 
+*  __deref_in_range(low_bnd, up_bnd) int* pi : Precondition that integer *pi
+*  must be in the inclusive numeric range [low_bnd, up_bnd].
+*
+*  __deref_out_range(low_bnd, up_bnd) int* pi : Postcondition that integer
+*  *pi must be in the inclusive numeric range [low_bnd, up_bnd].
+*
+*  __deref_inout_range(low_bnd, up_bnd) int* pi : Invariant that the integer
+*  *pi must be in the inclusive numeric range [low_bnd, up_bnd].
+*
+*  The first argument of a range macro may also be a C relational operator
+*  (<,>,!=, ==, <=, >=).
+*  
+*  __range(rel_op, j) int f(): Postcondition that "f() rel_op j" must be
+*  true.  Note that j may be a expression known only at runtime.
+*
+*  __in_range(rel_op, j) int i : Precondition that "i rel_op j" must be
+*  true.  Note that j may be a expression known only at runtime.
+* 
+*  __out_range(rel_op, j) int i : Postcondition that integer "i rel_op j"
+*  must be true.  Note that j may be a expression known only at runtime.
+* 
+*  __deref_in_range(rel_op, j) int *pi : Precondition that "*pi rel_op j"
+*  must be true.  Note that j may be a expression known only at runtime.
+*
+*  __deref_out_range(rel_op, j) int *pi : Postcondition that "*pi rel_op j"
+*  must be true.  Note that j may be a expression known only at runtime.
+*
+*  __deref_inout_range(rel_op, j) int *pi : Invariant that "*pi rel_op j"
+*  must be true.  Note that j may be a expression known only at runtime.
+*
+*  __range_max(a, b) int f(): Postcondition f acts as 'max', returns larger
+*  of a and b.  Note that a and b may be expressions known only at runtime.
+*
+*  __range_min(a, b) int f(): Postcondition f acts as 'min', returns smaller
+*  of a and b.  Note that a and b may be expressions known only at runtime.
+*
+*  __in_bound int i : Precondition that integer i must be bound, but the
+*  exact range can't be specified at compile time.  __in_range should be
+*  used if the range can be explicitly stated.
+*
+*  __out_bound int i : Postcondition that integer i must be bound, but the
+*  exact range can't be specified at compile time.  __out_range should be
+*  used if the range can be explicitly stated.
+* 
+*  __deref_out_bound int pi : Postcondition that integer *pi must be bound,
+*  but the exact range can't be specified at compile time.
+*  __deref_out_range should be used if the range can be explicitly stated.
+* 
+*  __assume_bound(expr); : Assume that the expression is bound to some known
+*  range. This can be used to suppress integer overflow warnings on integral
+*  expressions that are known to be bound due to reasons not explicit in the
+*  code. Use as a statement in the body of a function.
+*
+*  __analysis_assume_nulltermianted(expr); : Assume that the expression is
+*  a null terminated buffer. Use this to suppress tool noise specific to
+*  nulltermination warnings, and capture deeper invariants tools can not
+*  discover. 
+* 
+*  __allocator void f(): Function allocates memory using an integral size
+*  argument
+*
+*  void myfree(__deallocate(Mem) void *p) : Memory is freed, no longer usable
+*  upon return, and p may not be null.
+*
+*  void myfree(__deallocate_opt(Mem) void *p) : Memory is freed, no longer
+*  usable upon return, and p may be null.
+*
+*  void free(__post_invalid void* x): Mark memory as untouchable when 
+*  function returns.
+*
+*  ----------------------------------------------------------------------------
+*  Advanced Annotation Examples
+* 
+*  __success(return == TRUE) LWSTDAPI_(BOOL) 
+*  PathCanonicalizeA(__out_ecount(MAX_PATH) LPSTR pszBuf, LPCSTR pszPath);
+*  //  pszBuf is only guaranteed to be null-terminated when TRUE is returned.
+* 
+*  // Initialized LPWSTRs are null-terminated strings.
+*  typedef __nullterminated WCHAR* LPWSTR;
+* 
+*  __out_ecount(cch) __typefix(LPWSTR) void *psz;
+*  // psz is a buffer parameter which will be a null-terminated WCHAR string 
+*  // at exit, and which initially contains cch WCHARs.
+* 
+************************************************************************/
+#define _At_(expr, annotes)      __allowed(on_parameter_or_return)
+#define _When_(expr, annotes)    __allowed(on_parameter_or_return)
+#define __success(expr)          _SAL_VERSION_CHECK(__success)
+#define __out_awcount(expr,size) __allowed(on_parameter) 
+#define __in_awcount(expr,size)  __allowed(on_parameter)   
+#define __nullterminated         _SAL_VERSION_CHECK(__nullterminated)
+#define __nullnullterminated     _SAL_VERSION_CHECK(__nullnullterminated)
+#define __reserved               _SAL_VERSION_CHECK(__reserved)
+#define __checkReturn            _SAL_VERSION_CHECK(__checkReturn)
+#define __typefix(ctype)         __allowed(on_parameter_or_return) 
+#define __override               __allowed(on_function) 
+#define __callback               __allowed(on_function) 
+#define __format_string          __allowed(on_parameter_or_return) 
+#define __blocksOn(resource)     __allowed(on_function) 
+#define __fallthrough            __allowed(as_statement)
+#define __range(lb,ub)           __allowed(on_return) 
+#define __in_range(lb,ub)        _SAL_VERSION_CHECK(__in_range)
+#define __out_range(lb,ub)       _SAL_VERSION_CHECK(__out_range)
+#define __deref_in_range(lb,ub)  __allowed(on_parameter) 
+#define __deref_out_range(lb,ub) _SAL_VERSION_CHECK(__deref_out_range)
+#define __deref_inout_range(lb,ub) __allowed(on_parameter) 
+#define __field_range(lb,ub)     _SAL_VERSION_CHECK(__field_range)
+#define __range_max(a,b)         __allowed(on_return) 
+#define __range_min(a,b)         __allowed(on_return) 
+#define __bound                  __allowed(on_return) 
+#define __in_bound               __allowed(on_parameter) 
+#define __out_bound              __allowed(on_parameter) 
+#define __deref_out_bound        __allowed(on_parameter) 
+#define __assume_bound(i)        __allowed(as_statement_with_arg(i))
+#define __analysis_assume_nullterminated(x) \
+                                 __allowed(as_statement_with_arg(x))
+#define __allocator              __allowed(on_function) 
+#define __deallocate(kind)       __allowed(on_parameter) 
+#define __deallocate_opt(kind)   __allowed(on_parameter) 
+#define __post_invalid           __allowed(on_parameter_or_return) 
+#define __post_nullnullterminated           \
+                                 __allowed(on_parameter_or_return)
+/*************************************************************************** 
+* Expert Macros
+***************************************************************************/
+#define __null                  __allowed(on_typedecl)
+#define __notnull               __allowed(on_typedecl)
+#define __maybenull             __allowed(on_typedecl)
+#define __exceptthat            __allowed(on_typedecl)
+/*************************************************************************** 
+* Macros to classify fields of structures.
+*                          Structure Annotations
+*
+*   The buffer annotations are a convenient way of describing
+*   relationships between buffers and their size on a function by
+*   function basis. Very often struct or class data members have similar
+*   invariants, which can be expressed directly on the type.
+*
+*   Similar to our buffer annotations we can summarize all the various
+*   structure annotations by one choosing an element from each column of
+*   this table to build a composite annotation.
+*
+*           +--------------------------------------------------+
+*           | Selector |  Units  |    Size/Init     | Optional |
+*           |----------+---------+------------------+----------|
+*           | __field  | _ecount | (size)           | empty    |
+*           |----------+---------+------------------+----------|
+*           | __struct | _bcount | _full(size)      | _opt     |
+*           |----------+---------+------------------+----------|
+*           |          | _xcount | _part(size,init) |          |
+*           +--------------------------------------------------+
+*
+*   Note that empty represents the empty string. Sometime arguments need
+*   to be "floated" to the left to give us a valid annotation name. For
+*   example the naive combination __field_ecount(size)_opt is actually
+*   written as __field_ecount_opt(size). Not all possible combinations
+*   are currently supported or sensible. See specstrings_strict.h for
+*   the currently supported set. Those that are supported are documented
+*   below.
+*
+*Summary of Elements
+*
+*   Selector
+*
+*                __field
+*                        The annotation should only be placed in front
+*                        of data members of structures and classes. The
+*                        data members are pointers to a block of data.
+*                        The annotations describe properties about the
+*                        size of the block of data. This can be used for
+*
+*                __struct
+*                        The annotation should only be placed at the
+*                        beginning of the definition of a structure or
+*                        class. These annotations are used when a struct
+*                        or class is used as a "header" that is
+*                        allocated inline with a block of data and there
+*                        is no apparent field that represents the tail
+*                        end of the structure.
+*
+*   Units
+*
+*                _ecount
+*                        All size and initialization values are in terms
+*                        of elements of the appropriate type
+*
+*                _bcount
+*                        All size and initialization values are in terms
+*                        of raw byte sizes.
+*
+*                _xcount
+*                        The size or initialization values cannot be
+*                        properly expressed as a simple byte or element
+*                        count, and instead a place holder is used to
+*                        document the relationship.
+*
+*   Size/Init
+*           All the size/init expressions can contain references to
+*           other fields in the struct or class.
+*
+*                (size)
+*                        The size of the buffer is determined by the
+*                        expression size. Unless, the type of the buffer
+*                        provides more information nothing is know about
+*                        how much of this data is initialized. For
+*                        example, if the data member happens to be a
+*                        string type such as LPSTR. It is assumed that
+*                        the data is initialized to the first '\0'.
+*
+*                _full(size)
+*                        The size of the buffer is determined by the
+*                        expression size and all the data in the buffer
+*                        is guaranteed to be initialized.
+*
+*                _part(size,init)
+*                        The size of the buffer is determined by the
+*                        expression size and all the data in the buffer
+*                        is guaranteed to be initialized up to init
+*                        elements or bytes.
+*
+*   Optional
+*
+*                empty
+*                        The pointer to the block of memory is never
+*                        NULL
+*
+*                _opt
+*                        The pointer to the block of memory is may be
+*                        NULL
+*
+*     
+*   // Basic Usage of Struct Annotations                         
+*   #include <stdio.h>                                           
+*   #include <stdlib.h>                                          
+*   struct buf_s {                                               
+*    int sz;                                                     
+*    __field_bcount_full(sz)                                     
+*    char *buf;                                                  
+*   };                                                           
+*   void InitBuf(__out struct *buf_s b,int sz) {                 
+*        b->buf = calloc(sz,sizeof(char));                       
+*        b->sz = sz;                                             
+*   }                                                            
+*   void WriteBuf(__in FILE *fp,__in struct *buf_s b) {          
+*     fwrite(b->buf,b->sz,sizeof(char),fp);                      
+*   }                                                            
+*   void ReadBuf(__in FILE *fp,__inout struct *buf_s b) {        
+*     fread(b->buf,b->sz,sizeof(char),fp);                       
+*   }                                                            
+*                                                                 
+*                                                                 
+*                                                                 
+*   // Inline Allocated Buffer                                   
+*   struct buf_s {                                               
+*    int sz;                                                     
+*    __field_bcount(sz)                                          
+*    char buf[1];                                                
+*   };                                                           
+*   void WriteBuf(__in FILE *fp,__in struct *buf_s b) {          
+*     fwrite(&(b->buf),b->sz,sizeof(char),fp);                   
+*   }                                                            
+*   void ReadBuf(__in FILE *fp,__inout struct *buf_s b) {        
+*     fread(&(b->buf),b->sz,sizeof(char),fp);                    
+*   }                                                            
+*                                                                 
+*                                                                 
+*                                                                 
+*   // Embedded Header Structure                                 
+*   __struct_bcount(sz)                                          
+*   struct buf_s {                                               
+*    int sz;                                                     
+*   };                                                           
+*   void WriteBuf(__in FILE *fp,__in struct *buf_s b) {          
+*     fwrite(&b,b->sz,sizeof(char),fp);                          
+*   }                                                            
+*   void ReadBuf(__in FILE *fp,__inout struct *buf_s b) {        
+*     fread(&b,b->sz,sizeof(char),fp);                           
+*   }                                                            
+*
+*
+****************************************************************************/
+#define __field_ecount(size)               _SAL_VERSION_CHECK(__field_ecount)
+#define __field_bcount(size)               _SAL_VERSION_CHECK(__field_bcount)
+#define __field_xcount(size)               __allowed(on_field)
+#define __field_ecount_opt(size)           __allowed(on_field)
+#define __field_bcount_opt(size)           __allowed(on_field)
+#define __field_xcount_opt(size)           __allowed(on_field)
+#define __field_ecount_part(size,init)     __allowed(on_field)
+#define __field_bcount_part(size,init)     __allowed(on_field)
+#define __field_xcount_part(size,init)     __allowed(on_field)
+#define __field_ecount_part_opt(size,init) __allowed(on_field)
+#define __field_bcount_part_opt(size,init) __allowed(on_field)
+#define __field_xcount_part_opt(size,init) __allowed(on_field)
+#define __field_ecount_full(size)          __allowed(on_field)
+#define __field_bcount_full(size)          __allowed(on_field)
+#define __field_xcount_full(size)          __allowed(on_field)
+#define __field_ecount_full_opt(size)      __allowed(on_field)
+#define __field_bcount_full_opt(size)      __allowed(on_field) 
+#define __field_xcount_full_opt(size)      __allowed(on_field)
+#define __field_nullterminated             __allowed(on_field)
+#define __struct_bcount(size)              __allowed(on_struct) 
+#define __struct_xcount(size)              __allowed(on_struct) 
+
+/*************************************************************************** 
+* Macros to classify the entrypoints and indicate their category.
+*
+* Pre-defined control point categories include: RPC, KERNEL, GDI.
+*
+* Pre-defined control point macros include:
+*  __rpc_entry, __kernel_entry, __gdi_entry.
+***************************************************************************/
+#define __control_entrypoint(category)     __allowed(on_function) 
+#define __rpc_entry                        __allowed(on_function) 
+#define __kernel_entry                     __allowed(on_function) 
+#define __gdi_entry                        __allowed(on_function)  
+
+/*************************************************************************** 
+* Macros to track untrusted data and their validation. The list of untrusted
+* sources include:
+*
+* FILE                     - File reading stream or API
+* NETWORK                  - Socket readers
+* INTERNET                 - WinInet and WinHttp readers
+* USER_REGISTRY            - HKCU portions of the registry
+* USER_MODE                - Parameters to kernel entry points
+* RPC                      - Parameters to RPC entry points 
+* DRIVER                   - Device driver 
+***************************************************************************/
+#define __in_data_source(src_sym)       __allowed(on_parameter) 
+#define __out_data_source(src_sym)      __allowed(on_parameter) 
+#define __field_data_source(src_sym)    __allowed(on_field)
+#define __this_out_data_source(src_syn) __allowed(on_function)
+
+/************************************************************************** 
+* Macros to tag file parsing code. Predefined formats include:
+*  PNG                     - Portable Network Graphics
+*  JPEG                    - Joint Photographic Experts Group
+*  BMP                     - Bitmap
+*  RC_BMP                  - Resource bitmap
+*  WMF                     - Windows Metafile
+*  EMF                     - Windows Enhanced Metafile
+*  GIF                     - Graphics Interchange Format
+*  MIME_TYPE               - MIME type from header tokens
+*  MAIL_MONIKER            - MAIL information refered by URL moniker
+*  HTML                    - HyperText Markup Language
+*  WMPHOTO                 - Windows media photo
+*  OE_VCARD                - Outlook Express virtual card
+*  OE_CONTACT              - Outlook Express contact
+*  MIDI                    - Musical Instrument Digital Interface
+*  LDIF                    - LDAP Data Interchange Format
+*  AVI                     - Audio Visual Interchange
+*  ACM                     - Audio Compression Manager
+**************************************************************************/
+#define __out_validated(filetype_sym)         __allowed(on_parameter) 
+#define __this_out_validated(filetype_sym)    __allowed(on_function)   
+#define __file_parser(filetype_sym)           __allowed(on_function) 
+#define __file_parser_class(filetype_sym)     __allowed(on_struct)  
+#define __file_parser_library(filetype_sym)   __allowed(as_global_decl)  
+
+/*************************************************************************** 
+* Macros to track the code content in the file. The type of code
+* contents currently tracked:
+*
+* NDIS_DRIVER                   - NDIS Device driver 
+***************************************************************************/
+#define __source_code_content(codetype_sym)     __allowed(as_global_decl) 
+
+/*************************************************************************** 
+* Macros to track the code content in the class. The type of code
+* contents currently tracked:
+*
+* DCOM                          - Class implementing DCOM
+***************************************************************************/
+#define __class_code_content(codetype_sym)    __allowed(on_struct) 
+
+/*************************************************************************
+* Macros to tag encoded function pointers
+**************************************************************************/
+#define __encoded_pointer                 
+#define __encoded_array                   
+#define __field_encoded_pointer           __allowed(on_field)
+#define __field_encoded_array             __allowed(on_field)
+
+#define __transfer(formal)                __allowed(on_parameter_or_return) 
+#define __assume_validated(exp)           __allowed(as_statement_with_arg(exp))
+
+/************************************************************************* 
+* __analysis_assume(expr) : Expert macro use only when directed. Use this to
+* tell static analysis tools like PREfix and PREfast about a non-coded
+* assumption that you wish the tools to assume. The assumption will be
+* understood by those tools. By default there is no dynamic checking or
+* static checking of the assumption in any build.
+*
+* To obtain dynamic checking wrap this macro in your local version of a debug
+* assert.
+* Please do not put function calls in the expression because this is not
+* supported by all tools:
+*  __analysis_assume(GetObject () != NULL); // DO NOT DO THIS
+*
+*************************************************************************/
+#define __analysis_assume(expr) __allowed(as_statement_with_arg(expr))
+#define __analysis_assert(expr) __allowed(as_statement_with_arg(expr))
+
+/************************************************************************* 
+* __analysis_hint(hint_sym) : Expert macro use only when
+* directed. Use this to influence certain analysis heuristics
+* used by the tools. These hints do not describe the semantics
+* of functions but simply direct the tools to act in a certain
+* way.
+*
+* Current hints that are supported are:
+*
+* INLINE   - inline this function during analysis overrides any
+*            default heuristics 
+* NOINLINE - do not inline this function during analysis overrides 
+*            and default heuristics
+*************************************************************************/
+#define __analysis_hint(hint) __allowed(on_function)
+
+/************************************************************************* 
+* Macros to encode abstract properties of values. Used by SALadt.h
+*************************************************************************/
+#define __type_has_adt_prop(adt,prop)     __allowed(on_typdecl)
+#define __out_has_adt_prop(adt,prop)      __allowed(on_parameter)
+#define __out_not_has_adt_prop(adt,prop)  __allowed(on_parameter)
+#define __out_transfer_adt_prop(arg)      __allowed(on_parameter)
+#define __out_has_type_adt_props(typ)     __allowed(on_parameter)
+
+/************************************************************************* 
+* Macros used by Prefast for Drivers 
+* 
+*  __possibly_notnullterminated :
+*
+*  Used for return values of parameters or functions that do not
+*  guarantee nulltermination in all cases.
+*
+*************************************************************************/
+#define __possibly_notnullterminated    __allowed(on_parameter_or_return)
+
+/************************************************************************* 
+* Advanced macros
+* 
+*  __volatile 
+* The __volatile annotation identifies a global variable or
+* structure field that: 
+*   1) is not declared volatile; 
+*   2) is accessed concurrently by multiple threads.
+*
+* The __deref_volatile annotation identifies a global variable
+* or structure field that stores a pointer to some data that:
+*   1) is not declared volatile; 
+*   2) is accessed concurrently by multiple threads.
+*
+* Prefast uses these annotations to find patterns of code that
+* may result in unexpected re-fetching of the global variable
+* into a local variable.
+*
+* We also provide two complimentary annotations __nonvolatile
+* and __deref_nonvolatile that could be used to suppress Prefast
+*
+* re-fetching warnings on variables that are known either:
+*   1) not to be in danger of being re-fetched or,
+*   2) not to lead to incorrect results if they are re-fetched
+*
+*************************************************************************/
+#define __volatile                       __allowed(on_global_or_field)
+#define __deref_volatile                 __allowed(on_global_or_field)
+#define __nonvolatile                    __allowed(on_global_or_field)
+#define __deref_nonvolatile              __allowed(on_global_or_field)
+
+/************************************************************************* 
+* Macros deprecated with strict level greater then 1.
+**************************************************************************/
+#if (__SPECSTRINGS_STRICT_LEVEL > 1)
+/* Must come before macro defintions */
+#pragma deprecated(__in_nz)
+#pragma deprecated(__in_ecount_nz)
+#pragma deprecated(__in_bcount_nz)
+#pragma deprecated(__out_nz)
+#pragma deprecated(__out_nz_opt)
+#pragma deprecated(__out_ecount_nz)
+#pragma deprecated(__out_bcount_nz)
+#pragma deprecated(__inout_nz)
+#pragma deprecated(__inout_ecount_nz)
+#pragma deprecated(__inout_bcount_nz)
+#pragma deprecated(__in_nz_opt)          
+#pragma deprecated(__in_ecount_nz_opt)
+#pragma deprecated(__in_bcount_nz_opt)
+#pragma deprecated(__out_ecount_nz_opt)
+#pragma deprecated(__out_bcount_nz_opt)
+#pragma deprecated(__inout_nz_opt)       
+#pragma deprecated(__inout_ecount_nz_opt)
+#pragma deprecated(__inout_bcount_nz_opt)
+#pragma deprecated(__deref_out_nz)                 
+#pragma deprecated(__deref_out_ecount_nz)
+#pragma deprecated(__deref_out_bcount_nz)
+#pragma deprecated(__deref_inout_nz)               
+#pragma deprecated(__deref_inout_ecount_nz)
+#pragma deprecated(__deref_inout_bcount_nz)
+#pragma deprecated(__deref_out_nz_opt)             
+#pragma deprecated(__deref_out_ecount_nz_opt)
+#pragma deprecated(__deref_out_bcount_nz_opt)
+#pragma deprecated(__deref_inout_nz_opt)           
+#pragma deprecated(__deref_inout_ecount_nz_opt)
+#pragma deprecated(__deref_inout_bcount_nz_opt)
+#pragma deprecated(__deref_opt_inout_nz)           
+#pragma deprecated(__deref_opt_inout_ecount_nz)
+#pragma deprecated(__deref_opt_inout_bcount_nz)
+#pragma deprecated(__deref_opt_out_nz_opt)         
+#pragma deprecated(__deref_opt_out_ecount_nz_opt)
+#pragma deprecated(__deref_opt_out_bcount_nz_opt)
+#pragma deprecated(__deref_opt_inout_nz_opt)       
+#pragma deprecated(__deref_opt_inout_ecount_nz_opt)
+#pragma deprecated(__deref_opt_inout_bcount_nz_opt)
+#pragma deprecated(__deref)
+#pragma deprecated(__pre)
+#pragma deprecated(__post)
+#pragma deprecated(__readableTo)
+#pragma deprecated(__writableTo)
+#pragma deprecated(__maybevalid)
+#pragma deprecated(__data_entrypoint)
+#pragma deprecated(__inexpressible_readableTo)
+#pragma deprecated(__readonly)
+#pragma deprecated(__byte_writableTo)
+#pragma deprecated(__byte_readableTo)
+#pragma deprecated(__elem_readableTo)
+#pragma deprecated(__elem_writableTo)
+#pragma deprecated(__valid)
+#pragma deprecated(__notvalid)
+#pragma deprecated(__refparam)
+#pragma deprecated(__precond)
+#endif
+/* Define soon to be deprecated macros to nops. */
+#define __in_nz                                       
+#define __in_ecount_nz(size)                          
+#define __in_bcount_nz(size)                          
+#define __out_nz                                      
+#define __out_nz_opt                                  
+#define __out_ecount_nz(size)                         
+#define __out_bcount_nz(size)                         
+#define __inout_nz                                    
+#define __inout_ecount_nz(size)                       
+#define __inout_bcount_nz(size)                       
+#define __in_nz_opt                                   
+#define __in_ecount_nz_opt(size)                      
+#define __in_bcount_nz_opt(size)                      
+#define __out_ecount_nz_opt(size)                     
+#define __out_bcount_nz_opt(size)                     
+#define __inout_nz_opt                                
+#define __inout_ecount_nz_opt(size)                   
+#define __inout_bcount_nz_opt(size)                   
+#define __deref_out_nz                                
+#define __deref_out_ecount_nz(size)                   
+#define __deref_out_bcount_nz(size)                   
+#define __deref_inout_nz                              
+#define __deref_inout_ecount_nz(size)                 
+#define __deref_inout_bcount_nz(size)                 
+#define __deref_out_nz_opt                            
+#define __deref_out_ecount_nz_opt(size)               
+#define __deref_out_bcount_nz_opt(size)               
+#define __deref_inout_nz_opt                          
+#define __deref_inout_ecount_nz_opt(size)             
+#define __deref_inout_bcount_nz_opt(size)             
+#define __deref_opt_inout_nz                          
+#define __deref_opt_inout_ecount_nz(size)             
+#define __deref_opt_inout_bcount_nz(size)             
+#define __deref_opt_out_nz_opt                        
+#define __deref_opt_out_ecount_nz_opt(size)           
+#define __deref_opt_out_bcount_nz_opt(size)           
+#define __deref_opt_inout_nz_opt                      
+#define __deref_opt_inout_ecount_nz_opt(size)         
+#define __deref_opt_inout_bcount_nz_opt(size)         
+#define __deref             
+#define __pre               
+#define __post              
+#define __readableTo(count) 
+#define __writableTo(count) 
+#define __maybevalid        
+#define __inexpressible_readableTo(string) 
+#define __data_entrypoint(category)
+#define __readonly
+#define __byte_writableTo(count)
+#define __byte_readableTo(count)
+#define __elem_readableTo(count)
+#define __elem_writableTo(count)
+#define __valid
+#define __notvalid
+#define __refparam
+#define __precond(condition)
+
+/************************************************************************* 
+* Definitions to force a compile error when macros are used improperly.
+* Relies on VS 2005 source annotations.
+*************************************************************************/
+#if !defined(_MSC_EXTENSIONS) && !defined(_PREFAST_) && !defined(OACR)
+#define __allowed(p) /* nothing */
+#else
+#define __allowed(p) __$allowed_##p
+#define __$allowed_as_global_decl /* empty */
+#define __$allowed_as_statement_with_arg(x) \
+    __pragma(warning(push)) __pragma(warning(disable : 4548)) \
+        do {__noop(x);} while((0,0) __pragma(warning(pop)) )
+#define __$allowed_as_statement __$allowed_as_statement_with_arg(1)
+
+/**************************************************************************
+*  This should go away. It's only for __success which we should split into.
+*  __success and __typdecl_sucess
+***************************************************************************/
+#define __$allowed_on_function_or_typedecl /* empty */
+#if (__SPECSTRINGS_STRICT_LEVEL == 1) || (__SPECSTRINGS_STRICT_LEVEL == 2)
+#define __$allowed_on_typedecl /* empty */
+#define __$allowed_on_return /* empty */
+#define __$allowed_on_parameter /* empty */
+#define __$allowed_on_function /* empty */
+#define __$allowed_on_struct /* empty */
+#define __$allowed_on_field /* empty */
+#define __$allowed_on_parameter_or_return /* empty */
+#define __$allowed_on_global_or_field /* empty */
+#elif __SPECSTRINGS_STRICT_LEVEL == 3
+#define __$allowed_on_typedecl /* empty */
+/* Define dummy source attributes. Still needs more testing */
+#define __$allowed_on_return [returnvalue: OnReturnOnly]
+#define __$allowed_on_parameter [OnParameterOnly]
+#define __$allowed_on_function [method: OnFunctionOnly]
+#define __$allowed_on_struct [OnStructOnly]
+#define __$allowed_on_field [OnFieldOnly]
+#define __$allowed_on_parameter_or_return [OnParameterOrReturnOnly] 
+#define __$allowed_on_global_or_field /* empty */
+#pragma push_macro( "DECL_SA" )
+#pragma push_macro( "SA" )
+#ifdef __cplusplus
+#define SA(x) x
+#define DECL_SA(name,loc) \
+  [repeatable] \
+  [source_annotation_attribute( loc )] \
+  struct name##Attribute { name##Attribute(); const char* ignored; }; 
+#else
+#define SA(x) SA_##x
+#define DECL_SA(name,loc) \
+  [source_annotation_attribute( loc )] \
+  struct name { const char* ignored; };\
+  typedef struct name name;
+#endif  /* #endif  __cplusplus */
+DECL_SA(OnParameterOnly,SA(Parameter));
+DECL_SA(OnReturnOnly,SA(ReturnValue));
+DECL_SA(OnFunctionOnly,SA(Method));
+DECL_SA(OnStructOnly,SA(Struct));
+DECL_SA(OnFieldOnly,SA(Field));
+DECL_SA(OnParameterOrReturnOnly,SA(Parameter) | SA(ReturnValue));
+#pragma pop_macro( "SA" )
+#pragma pop_macro( "DECL_SA" )
+#endif 
+#endif 
+#endif
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacros.inc b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacros.inc
new file mode 100644
index 0000000000000..ef6d393fd248b
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacros.inc
@@ -0,0 +1,37 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#define INVALIDGCVALUE 0xCCCCCCCD
+
+#if defined(__APPLE__)
+#define C_FUNC(name) _##name
+#define EXTERNAL_C_FUNC(name) C_FUNC(name)
+#define LOCAL_LABEL(name) L##name
+#else
+#define C_FUNC(name) name
+#define EXTERNAL_C_FUNC(name) C_FUNC(name)@plt
+#define LOCAL_LABEL(name) .L##name
+#endif
+
+#if defined(__APPLE__)
+#define C_PLTFUNC(name) _##name
+#else
+#define C_PLTFUNC(name) name@PLT
+#endif
+
+.macro END_PROLOGUE
+.endm
+
+.macro SETALIAS New, Old
+        .equiv \New, \Old
+.endm
+
+#if defined(HOST_AMD64)
+#include "unixasmmacrosamd64.inc"
+#elif defined(HOST_ARM)
+#include "unixasmmacrosarm.inc"
+#elif defined(HOST_ARM64)
+#include "unixasmmacrosarm64.inc"
+#elif defined(HOST_X86)
+#include "unixasmmacrosx86.inc"
+#endif
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosamd64.inc
new file mode 100644
index 0000000000000..b45c31007f0f1
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosamd64.inc
@@ -0,0 +1,351 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#define C_VAR(Name) rip + C_FUNC(Name)
+
+.macro NESTED_ENTRY Name, Section, Handler
+        LEAF_ENTRY \Name, \Section
+        .ifnc \Handler, NoHandler
+#if defined(__APPLE__)
+        .cfi_personality 0x9b, C_FUNC(\Handler) // 0x9b == DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4
+#else
+        .cfi_personality 0, C_FUNC(\Handler) // 0 == DW_EH_PE_absptr
+#endif
+        .endif
+.endm
+
+.macro NESTED_END Name, Section
+        LEAF_END \Name, \Section
+#if defined(__APPLE__)
+        .set LOCAL_LABEL(\Name\()_Size), . - C_FUNC(\Name)
+        .section __LD,__compact_unwind,regular,debug
+        .quad C_FUNC(\Name)
+        .long LOCAL_LABEL(\Name\()_Size) 
+        .long 0x04000000 # DWARF
+        .quad 0
+        .quad 0
+#endif
+.endm
+
+.macro PATCH_LABEL Name
+        .global C_FUNC(\Name)
+C_FUNC(\Name):
+.endm
+
+.macro ALTERNATE_ENTRY Name
+        .global C_FUNC(\Name)
+C_FUNC(\Name):
+.endm
+
+.macro LEAF_ENTRY Name, Section
+        .global C_FUNC(\Name)
+#if defined(__APPLE__)
+        .text
+#else
+        .global C_FUNC(_\Name)
+        .type \Name, %function
+#endif
+C_FUNC(\Name):
+        .cfi_startproc
+.endm
+
+.macro LEAF_END Name, Section
+#if !defined(__APPLE__)
+        .size \Name, .-\Name
+#endif
+        .cfi_endproc
+.endm
+
+.macro push_nonvol_reg Register
+        push \Register
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset \Register, 0
+.endm
+
+.macro pop_nonvol_reg Register
+        pop \Register
+        .cfi_adjust_cfa_offset -8
+        .cfi_restore \Register
+.endm
+
+.macro alloc_stack Size
+.att_syntax
+        lea -(\Size)(%rsp), %rsp
+.intel_syntax noprefix
+        .cfi_adjust_cfa_offset (\Size)
+.endm
+
+.macro free_stack Size
+.att_syntax
+        lea (\Size)(%rsp), %rsp
+.intel_syntax noprefix
+        .cfi_adjust_cfa_offset -(\Size)
+.endm
+
+.macro set_cfa_register Reg, Offset
+        .cfi_def_cfa_register \Reg
+        .cfi_def_cfa_offset \Offset
+.endm
+
+.macro save_reg_postrsp Reg, Offset
+        __Offset = \Offset
+        mov     qword ptr [rsp + __Offset], \Reg
+        .cfi_rel_offset \Reg, __Offset
+.endm
+
+.macro restore_reg Reg, Offset
+        __Offset = \Offset
+        mov             \Reg, [rsp + __Offset]
+        .cfi_restore \Reg
+.endm
+
+.macro save_xmm128_postrsp Reg, Offset
+        __Offset = \Offset
+        movdqa  xmmword ptr [rsp + __Offset], \Reg
+        // NOTE: We cannot use ".cfi_rel_offset \Reg, __Offset" here, 
+        // the xmm registers are not supported by the libunwind
+.endm
+
+.macro restore_xmm128 Reg, ofs
+        __Offset = \ofs
+        movdqa          \Reg, xmmword ptr [rsp + __Offset]
+        // NOTE: We cannot use ".cfi_restore \Reg" here, 
+        // the xmm registers are not supported by the libunwind
+        
+.endm
+
+.macro RESET_FRAME_WITH_RBP
+
+        mov     rsp, rbp
+        set_cfa_register rsp, 16
+        pop_nonvol_reg rbp
+        .cfi_same_value rbp
+
+.endm
+
+.macro PUSH_CALLEE_SAVED_REGISTERS
+
+        push_register rbp
+        push_register rbx
+        push_register r15
+        push_register r14
+        push_register r13
+        push_register r12
+
+.endm
+
+.macro POP_CALLEE_SAVED_REGISTERS
+
+        pop_nonvol_reg r12
+        pop_nonvol_reg r13
+        pop_nonvol_reg r14
+        pop_nonvol_reg r15
+        pop_nonvol_reg rbx
+        pop_nonvol_reg rbp
+
+.endm
+
+.macro push_register Reg
+        push            \Reg
+        .cfi_adjust_cfa_offset 8
+.endm
+
+.macro push_imm imm
+.att_syntax
+        push            $\imm
+.intel_syntax noprefix
+        .cfi_adjust_cfa_offset 8
+.endm
+
+.macro push_eflags
+        pushfq
+        .cfi_adjust_cfa_offset 8
+.endm
+
+.macro push_argument_register Reg
+        push_register \Reg
+.endm
+
+.macro PUSH_ARGUMENT_REGISTERS
+
+        push_argument_register r9
+        push_argument_register r8
+        push_argument_register rcx
+        push_argument_register rdx
+        push_argument_register rsi
+        push_argument_register rdi
+
+.endm
+
+.macro pop_register Reg
+        pop            \Reg
+        .cfi_adjust_cfa_offset -8
+.endm
+
+.macro pop_eflags
+        popfq
+        .cfi_adjust_cfa_offset -8
+.endm
+
+.macro pop_argument_register Reg
+        pop_register \Reg
+.endm
+
+.macro POP_ARGUMENT_REGISTERS
+
+        pop_argument_register rdi
+        pop_argument_register rsi
+        pop_argument_register rdx
+        pop_argument_register rcx
+        pop_argument_register r8
+        pop_argument_register r9
+
+.endm
+
+#define SIZEOF_FP_REGS 0x80
+
+.macro SAVE_FLOAT_ARGUMENT_REGISTERS ofs
+
+        save_xmm128_postrsp xmm0, \ofs
+        save_xmm128_postrsp xmm1, \ofs + 0x10
+        save_xmm128_postrsp xmm2, \ofs + 0x20
+        save_xmm128_postrsp xmm3, \ofs + 0x30
+        save_xmm128_postrsp xmm4, \ofs + 0x40
+        save_xmm128_postrsp xmm5, \ofs + 0x50
+        save_xmm128_postrsp xmm6, \ofs + 0x60
+        save_xmm128_postrsp xmm7, \ofs + 0x70
+
+.endm
+
+.macro RESTORE_FLOAT_ARGUMENT_REGISTERS ofs
+
+        restore_xmm128  xmm0, \ofs
+        restore_xmm128  xmm1, \ofs + 0x10
+        restore_xmm128  xmm2, \ofs + 0x20
+        restore_xmm128  xmm3, \ofs + 0x30
+        restore_xmm128  xmm4, \ofs + 0x40
+        restore_xmm128  xmm5, \ofs + 0x50
+        restore_xmm128  xmm6, \ofs + 0x60
+        restore_xmm128  xmm7, \ofs + 0x70
+
+.endm
+
+.macro EXPORT_POINTER_TO_ADDRESS Name
+
+// NOTE: The label is intentionally left as 2 - otherwise on OSX 0b or 1b will be incorrectly interpreted as binary integers
+
+2:
+
+        .data
+        .align      8
+C_FUNC(\Name):
+        .quad       2b
+        .global     C_FUNC(\Name)
+        .text
+
+.endm
+
+//
+// CONSTANTS -- INTEGER
+//
+#define TSF_Attached                    0x01
+#define TSF_SuppressGcStress            0x08
+#define TSF_DoNotTriggerGc              0x10
+
+//
+// Rename fields of nested structs
+//
+#define OFFSETOF__Thread__m_alloc_context__alloc_ptr    OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr
+#define OFFSETOF__Thread__m_alloc_context__alloc_limit  OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit
+
+// GC type flags
+#define GC_ALLOC_FINALIZE           1
+
+// Note: these must match the defs in PInvokeTransitionFrameFlags
+#define PTFF_SAVE_RBX            0x00000001
+#define PTFF_SAVE_R12            0x00000010
+#define PTFF_SAVE_R13            0x00000020
+#define PTFF_SAVE_R14            0x00000040
+#define PTFF_SAVE_R15            0x00000080
+#define PTFF_SAVE_ALL_PRESERVED  0x000000F1   // NOTE: RBP is not included in this set!
+#define PTFF_SAVE_RSP            0x00008000
+#define PTFF_SAVE_RAX            0x00000100   // RAX is saved if it contains a GC ref and we're in hijack handler
+#define PTFF_SAVE_ALL_SCRATCH    0x00007F00
+#define PTFF_RAX_IS_GCREF        0x00010000   // iff PTFF_SAVE_RAX: set -> eax is Object, clear -> eax is scalar
+#define PTFF_RAX_IS_BYREF        0x00020000   // iff PTFF_SAVE_RAX: set -> eax is ByRef, clear -> eax is Object or scalar
+#define PTFF_THREAD_ABORT        0x00040000   // indicates that ThreadAbortException should be thrown when returning from the transition
+
+// These must match the TrapThreadsFlags enum
+#define TrapThreadsFlags_None            0
+#define TrapThreadsFlags_AbortInProgress 1
+#define TrapThreadsFlags_TrapThreads     2
+
+.macro INLINE_GET_TLS_VAR Var
+       .att_syntax
+#if defined(__APPLE__)
+        movq    _\Var@TLVP(%rip), %rdi
+        callq   *(%rdi)
+#else
+        leaq    \Var@TLSLD(%rip), %rdi
+        callq   __tls_get_addr@PLT
+        addq    $\Var@DTPOFF, %rax
+#endif 
+       .intel_syntax noprefix
+.endm
+
+
+.macro INLINE_GETTHREAD
+        // Inlined version of call C_FUNC(RhpGetThread)
+        INLINE_GET_TLS_VAR tls_CurrentThread
+.endm
+
+.macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2
+        //
+        // Thread::Unhijack()
+        //
+        mov         \trashReg1, [\threadReg + OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        cmp         \trashReg1, 0
+        je          1f
+
+        mov         \trashReg2, [\threadReg + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        mov         [\trashReg2], \trashReg1
+        mov         qword ptr [\threadReg + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0
+        mov         qword ptr [\threadReg + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0
+
+1:
+.endm
+
+DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP
+
+.macro PUSH_COOP_PINVOKE_FRAME trashReg
+    push_nonvol_reg rbp                         // push RBP frame
+    mov             rbp, rsp
+    lea             \trashReg, [rsp + 0x10]
+    push_register   \trashReg                   // save caller's RSP
+    push_nonvol_reg r15                         // save preserved registers
+    push_nonvol_reg r14                         //   ..
+    push_nonvol_reg r13                         //   ..
+    push_nonvol_reg r12                         //   ..
+    push_nonvol_reg rbx                         //   ..
+    push_imm        DEFAULT_FRAME_SAVE_FLAGS    // save the register bitmask
+    push_register   \trashReg                   // Thread * (unused by stackwalker)
+    mov             \trashReg, [rsp + 8*8]      // Find and save the callers RBP
+    push_register   \trashReg
+    mov             \trashReg, [rsp + 10*8]     // Find and save the return address
+    push_register   \trashReg
+    lea             \trashReg, [rsp]            // trashReg == address of frame
+.endm
+
+.macro POP_COOP_PINVOKE_FRAME
+    pop_register r10    // discard RIP
+    pop_nonvol_reg rbp  // restore RBP
+    pop_register r10    // discard thread
+    pop_register r10    // discard bitmask
+    pop_nonvol_reg rbx
+    pop_nonvol_reg r12
+    pop_nonvol_reg r13
+    pop_nonvol_reg r14
+    pop_nonvol_reg r15
+    pop_register r10    // discard caller RSP
+    pop_register r10    // discard RBP frame
+.endm
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm.inc b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm.inc
new file mode 100644
index 0000000000000..73a9968f3b8d6
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm.inc
@@ -0,0 +1,306 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// CONSTANTS -- INTEGER
+//
+
+// GC type flags
+#define GC_ALLOC_FINALIZE 1
+#define GC_ALLOC_ALIGN8_BIAS 4
+#define GC_ALLOC_ALIGN8 8
+
+#define TSF_Attached                    0x01
+#define TSF_SuppressGcStress            0x08
+#define TSF_DoNotTriggerGc              0x10
+
+#define PTFF_SAVE_ALL_PRESERVED 0x0000007F  // NOTE: R11 is not included in this set!
+#define PTFF_SAVE_SP 0x00000100
+#define DEFAULT_FRAME_SAVE_FLAGS (PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP)
+
+// These must match the TrapThreadsFlags enum
+#define TrapThreadsFlags_None            0
+#define TrapThreadsFlags_AbortInProgress 1
+#define TrapThreadsFlags_TrapThreads     2
+
+// Rename fields of nested structs
+#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr)
+#define OFFSETOF__Thread__m_alloc_context__alloc_limit (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit)
+
+// GC minimal sized object. We use this to switch between 4 and 8 byte alignment in the GC heap (see AllocFast.asm).
+#define SIZEOF__MinObject 12
+
+// Maximum subsection number in .text section
+#define MAX_NUMBER_SUBSECTION_TEXT 0x2000
+
+.macro NESTED_ENTRY Name, Section, Handler
+        LEAF_ENTRY \Name, \Section
+        .ifnc \Handler, NoHandler
+        .personality C_FUNC(\Handler)
+        .endif
+.endm
+
+.macro NESTED_END Name, Section
+        LEAF_END \Name, \Section
+.endm
+
+.macro PATCH_LABEL Name
+        .thumb_func
+        .global C_FUNC(\Name)
+C_FUNC(\Name):
+.endm
+
+.macro ALTERNATE_ENTRY Name
+        .global C_FUNC(\Name)
+C_FUNC(\Name):
+.endm
+
+.macro LEAF_ENTRY Name, Section
+        .thumb_func
+        .global C_FUNC(\Name)
+        .type \Name, %function
+C_FUNC(\Name):
+        .fnstart
+.endm
+
+.macro LEAF_END Name, Section
+        .size \Name, .-\Name
+        .fnend
+.endm
+
+.macro PREPARE_EXTERNAL_VAR Name, HelperReg
+        ldr \HelperReg, [pc, #C_FUNC(\Name)@GOTPCREL]
+.endm
+
+.macro push_nonvol_reg Register
+        push \Register
+        .save \Register
+.endm
+
+.macro pop_nonvol_reg Register
+        pop \Register
+.endm
+
+.macro vpush_nonvol_reg Register
+        vpush \Register
+        .vsave \Register
+.endm
+
+.macro vpop_nonvol_reg Register
+        vpop \Register
+.endm
+
+.macro alloc_stack Size
+        sub sp, sp, (\Size)
+        .pad #(\Size)
+.endm
+
+.macro free_stack Size
+        add sp, sp, (\Size)
+        .pad #-(\Size)
+.endm
+
+.macro POP_CALLEE_SAVED_REGISTERS
+        pop_nonvol_reg "{r4-r11, lr}"
+.endm
+
+.macro PUSH_CALLEE_SAVED_REGISTERS
+        push_nonvol_reg "{r4-r11, lr}"
+.endm
+
+.macro push_register Reg
+        push \Reg
+.endm
+
+.macro push_argument_register Reg
+        push_register \Reg
+.endm
+
+.macro PUSH_ARGUMENT_REGISTERS
+        push {r0-r3}
+.endm
+
+.macro pop_register Reg
+        pop \Reg
+.endm
+
+.macro pop_argument_register Reg
+        pop_register \Reg
+.endm
+
+.macro POP_ARGUMENT_REGISTERS
+        pop {r0-r3}
+.endm
+
+.macro EMIT_BREAKPOINT
+        .inst.w 0xde01
+.endm
+
+.macro PROLOG_PUSH RegList
+        push_nonvol_reg "\RegList"
+.endm
+
+.macro PROLOG_VPUSH RegList
+        vpush_nonvol_reg "\RegList"
+.endm
+
+.macro PROLOG_STACK_SAVE Register
+        .setfp \Register, sp
+        mov \Register, sp
+.endm
+
+.macro EPILOG_STACK_RESTORE Register
+        mov sp, \Register
+.endm
+
+.macro EPILOG_POP RegList
+        pop_nonvol_reg "\RegList"
+.endm
+
+.macro EPILOG_VPOP RegList
+        vpop_nonvol_reg "\RegList"
+.endm
+
+.macro PROLOG_STACK_ALLOC Size
+        sub sp, sp, #\Size
+        .pad #\Size
+.endm
+
+.macro EPILOG_STACK_FREE Size
+        add sp, sp, #\Size
+        .pad #-\Size
+.endm
+
+//-----------------------------------------------------------------------------
+// Macro used to check (in debug builds only) whether the stack is 64-bit aligned (a requirement before calling
+// out into C++/OS code). Invoke this directly after your prolog (if the stack frame size is fixed) or directly
+// before a call (if you have a frame pointer and a dynamic stack). A breakpoint will be invoked if the stack
+// is misaligned.
+//
+.macro CHECK_STACK_ALIGNMENT
+
+#ifdef _DEBUG
+        push {r0}
+        add r0, sp, #4
+        tst r0, #7
+        pop {r0}
+        beq 0f
+        EMIT_BREAKPOINT
+0:
+#endif
+.endm
+
+// Loads a 32bit constant into destination register
+.macro MOV32 DestReg, Constant
+        movw \DestReg, #((\Constant) & 0xFFFF)
+        movt \DestReg, #((\Constant) >> 16)
+.endm
+
+.macro EXPORT_POINTER_TO_ADDRESS Name
+
+1:
+
+        .data
+        .align      4
+C_FUNC(\Name):
+        .word       1b + 1            // Add 1 to indicate thumb code
+        .global     C_FUNC(\Name)
+        .text
+
+.endm
+
+//
+// Macro used from unmanaged helpers called from managed code where the helper does not transition immediately
+// into pre-emptive mode but may cause a GC and thus requires the stack is crawlable. This is typically the
+// case for helpers that meddle in GC state (e.g. allocation helpers) where the code must remain in
+// cooperative mode since it handles object references and internal GC state directly but a garbage collection
+// may be inevitable. In these cases we need to be able to transition to pre-meptive mode deep within the
+// unmanaged code but still be able to initialize the stack iterator at the first stack frame which may hold
+// interesting GC references. In all our helper cases this corresponds to the most recent managed frame (e.g.
+// the helper's caller).
+//
+// This macro builds a frame describing the current state of managed code.
+//
+// INVARIANTS
+// - The macro assumes it defines the method prolog, it should typically be the first code in a method and
+//   certainly appear before any attempt to alter the stack pointer.
+// - This macro uses trashReg (after its initial value has been saved in the frame) and upon exit trashReg
+//   will contain the address of transition frame.
+//
+.macro PUSH_COOP_PINVOKE_FRAME trashReg
+
+        PROLOG_STACK_ALLOC 4          // Save space for caller's SP
+        PROLOG_PUSH "{r4-r10}"        // Save preserved registers
+        PROLOG_STACK_ALLOC 8          // Save space for flags and Thread*
+        PROLOG_PUSH "{r11}"           // Save caller's FP
+        PROLOG_PUSH "{r11,lr}"        // Save caller's frame-chain pointer and PC
+
+        // Compute SP value at entry to this method and save it in the last slot of the frame (slot #12).
+        add         \trashReg, sp, #(13 * 4)
+        str         \trashReg, [sp, #(12 * 4)]
+
+        // Record the bitmask of saved registers in the frame (slot #4).
+        mov         \trashReg, #DEFAULT_FRAME_SAVE_FLAGS
+        str         \trashReg, [sp, #(4 * 4)]
+
+        mov         \trashReg, sp
+.endm
+
+// Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME
+.macro POP_COOP_PINVOKE_FRAME
+        EPILOG_POP  "{r11,lr}"        // Restore caller's frame-chain pointer and PC (return address)
+        EPILOG_POP  "{r11}"           // Restore caller's FP
+        EPILOG_STACK_FREE 8           // Discard flags and Thread*
+        EPILOG_POP  "{r4-r10}"        // Restore preserved registers
+        EPILOG_STACK_FREE 4           // Discard caller's SP
+.endm
+
+#ifdef _DEBUG
+.macro GEN_ASSERT message, fileName, funcName
+        ldr r0, =\message
+        ldr r1, =\fileName
+        ldr r2, =\funcName
+        bl C_FUNC(NYI_Assert)
+.endm
+#endif
+
+// thumb with PIC version
+.macro INLINE_GET_TLS_VAR Var
+        ldr     r0, 2f
+1:
+        add     r0, pc, r0
+        bl      __tls_get_addr(PLT)
+        // push data at the end of text section
+        .pushsection .text, MAX_NUMBER_SUBSECTION_TEXT, "aM", %progbits, 4
+        .balign 4
+2:
+        .4byte  \Var(TLSGD) + (. - 1b - 4)
+        .popsection
+.endm
+
+.macro INLINE_GETTHREAD
+        // Inlined version of call C_FUNC(RhpGetThread)
+        INLINE_GET_TLS_VAR tls_CurrentThread
+.endm
+
+.macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2
+        //
+        // Thread::Unhijack()
+        //
+        ldr         \trashReg1, [\threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+        cbz         \trashReg1, 1f
+
+        ldr         \trashReg2, [\threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        str         \trashReg1, [\trashReg2]
+        mov         \trashReg1, #0
+        str         \trashReg1, [\threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
+        str         \trashReg1, [\threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
+
+1:
+.endm
+
+.macro EPILOG_BRANCH_REG reg
+
+        bx          \reg
+
+.endm
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm64.inc b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm64.inc
new file mode 100644
index 0000000000000..d031a77085e2c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm64.inc
@@ -0,0 +1,139 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.macro NESTED_ENTRY Name, Section, Handler
+        LEAF_ENTRY \Name, \Section
+        .ifnc \Handler, NoHandler
+        .cfi_personality 0, C_FUNC(\Handler) // 0 == DW_EH_PE_absptr
+        .endif
+.endm
+
+.macro NESTED_END Name, Section
+        LEAF_END \Name, \Section
+.endm
+
+.macro PATCH_LABEL Name
+        .global C_FUNC(\Name)
+C_FUNC(\Name):
+.endm
+
+.macro ALTERNATE_ENTRY Name
+        .global C_FUNC(\Name)
+C_FUNC(\Name):
+.endm
+
+.macro LEAF_ENTRY Name, Section
+        .global C_FUNC(\Name)
+        .type \Name, %function
+C_FUNC(\Name):
+        .cfi_startproc
+.endm
+
+.macro LEAF_END Name, Section
+        .size \Name, .-\Name
+        .cfi_endproc
+.endm
+
+.macro PREPARE_EXTERNAL_VAR Name, HelperReg
+        ldr \HelperReg, [pc, #C_FUNC(\Name)@GOTPCREL]
+.endm
+
+.macro PROLOG_STACK_ALLOC Size
+        sub sp, sp, \Size
+        .cfi_adjust_cfa_offset \Size
+.endm
+
+.macro EPILOG_STACK_FREE Size
+        add sp, sp, \Size
+.endm
+
+.macro EPILOG_STACK_RESTORE
+        mov sp, fp
+.endm
+
+.macro PROLOG_SAVE_REG reg, ofs
+        str \reg, [sp, \ofs]
+.endm
+
+.macro PROLOG_SAVE_REG_PAIR reg1, reg2, ofs
+        stp \reg1, \reg2, [sp, \ofs]
+        .ifc \reg1, fp
+        mov fp, sp
+        .endif
+.endm
+
+.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ofs
+        stp \reg1, \reg2, [sp, \ofs]!
+        .ifc \reg1, fp
+        mov fp, sp
+        .endif
+.endm
+
+.macro EPILOG_RESTORE_REG reg, ofs
+        ldr \reg, [sp, \ofs]
+.endm
+
+.macro EPILOG_RESTORE_REG_PAIR reg1, reg2, ofs
+        ldp \reg1, \reg2, [sp, \ofs]
+.endm
+
+.macro EPILOG_RESTORE_REG_PAIR_INDEXED reg1, reg2, ofs
+        ldp \reg1, \reg2, [sp], \ofs
+.endm
+
+.macro EPILOG_RETURN
+        ret
+.endm
+
+.macro EMIT_BREAKPOINT
+        brk #0
+.endm
+
+//-----------------------------------------------------------------------------
+// The Following sets of SAVE_*_REGISTERS expect the memory to be reserved and 
+// base address to be passed in $reg
+//
+
+// Reserve 64 bytes of memory before calling  SAVE_ARGUMENT_REGISTERS
+.macro SAVE_ARGUMENT_REGISTERS reg, ofs 
+
+        stp                    x0, x1, [\reg, #(\ofs)]
+        stp                    x2, x3, [\reg, #(\ofs + 16)]
+        stp                    x4, x5, [\reg, #(\ofs + 32)]
+        stp                    x6, x7, [\reg, #(\ofs + 48)]
+
+.endm
+
+// Reserve 64 bytes of memory before calling  SAVE_FLOAT_ARGUMENT_REGISTERS
+.macro SAVE_FLOAT_ARGUMENT_REGISTERS reg, ofs 
+
+        stp                    d0, d1, [\reg, #(\ofs)]
+        stp                    d2, d3, [\reg, #(\ofs + 16)]
+        stp                    d4, d5, [\reg, #(\ofs + 32)]
+        stp                    d6, d7, [\reg, #(\ofs + 48)]
+
+.endm
+
+.macro RESTORE_ARGUMENT_REGISTERS reg, ofs 
+
+        ldp                    x0, x1, [\reg, #(\ofs)]
+        ldp                    x2, x3, [\reg, #(\ofs + 16)]
+        ldp                    x4, x5, [\reg, #(\ofs + 32)]
+        ldp                    x6, x7, [\reg, #(\ofs + 48)]
+
+.endm
+
+.macro RESTORE_FLOAT_ARGUMENT_REGISTERS reg, ofs 
+
+        ldp                    d0, d1, [\reg, #(\ofs)]
+        ldp                    d2, d3, [\reg, #(\ofs + 16)]
+        ldp                    d4, d5, [\reg, #(\ofs + 32)]
+        ldp                    d6, d7, [\reg, #(\ofs + 48)]
+
+.endm
+
+.macro EPILOG_BRANCH_REG reg
+
+        br \reg
+
+.endm
diff --git a/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosx86.inc b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosx86.inc
new file mode 100644
index 0000000000000..08d57bb93be0c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosx86.inc
@@ -0,0 +1,117 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+.macro NESTED_ENTRY Name, Section, Handler
+        LEAF_ENTRY \Name, \Section
+        .ifnc \Handler, NoHandler
+        .cfi_personality 0x1b, C_FUNC(\Handler) // 0x1b == DW_EH_PE_pcrel | DW_EH_PE_sdata4
+        .endif
+.endm
+
+.macro NESTED_END Name, Section
+        LEAF_END \Name, \Section
+.endm
+
+.macro LEAF_ENTRY Name, Section
+        .global C_FUNC(\Name)
+        .type \Name, %function
+C_FUNC(\Name):
+        .cfi_startproc
+.endm
+
+.macro PATCH_LABEL Name
+        .global C_FUNC(\Name)
+C_FUNC(\Name):
+.endm
+
+.macro LEAF_END Name, Section
+        .size \Name, .-\Name
+        .cfi_endproc
+.endm
+
+.macro LEAF_END_MARKED Name, Section
+C_FUNC(\Name\()_End):
+        .global C_FUNC(\Name\()_End)
+        LEAF_END \Name, \Section
+.endm
+
+.macro PROLOG_BEG
+        push ebp
+        .cfi_def_cfa_offset 8
+        .cfi_offset ebp, -8
+        mov ebp, esp
+.endm
+
+.macro PROLOG_PUSH Reg
+        push \Reg
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset \Reg, 0
+.endm
+
+.macro PROLOG_END
+        .cfi_def_cfa_register ebp
+        .cfi_def_cfa_offset 8
+.endm
+
+.macro EPILOG_BEG
+.endm
+
+.macro EPILOG_POP Reg
+        pop \Reg
+        .cfi_restore \Reg
+.endm
+
+.macro EPILOG_END
+        pop ebp
+.endm
+
+.macro ESP_PROLOG_BEG
+.endm
+
+.macro ESP_PROLOG_PUSH Reg
+        PROLOG_PUSH \Reg
+.endm
+
+.macro ESP_PROLOG_ALLOC Size
+        sub esp, \Size
+        .cfi_adjust_cfa_offset \Size
+.endm
+
+.macro ESP_PROLOG_END
+        .cfi_def_cfa_register esp
+.endm
+
+.macro ESP_EPILOG_BEG
+.endm
+
+.macro ESP_EPILOG_POP Reg
+        EPILOG_POP \Reg
+.endm
+
+.macro ESP_EPILOG_FREE Size
+        add esp, \Size
+        .cfi_adjust_cfa_offset -\Size
+.endm
+
+.macro ESP_EPILOG_END
+.endm
+
+.macro PREPARE_EXTERNAL_VAR Name, Reg
+.att_syntax
+    call  0f
+0:
+    popl  %\Reg
+1:
+    addl  $_GLOBAL_OFFSET_TABLE_ + (1b - 0b), %\Reg
+    movl  C_FUNC(\Name)@GOT(%\Reg), %\Reg
+.intel_syntax noprefix
+.endm
+
+.macro CHECK_STACK_ALIGNMENT
+#ifdef _DEBUG
+    test    esp, 0Fh
+    je      0f
+    int3
+0:
+#endif // _DEBUG
+.endm
diff --git a/src/coreclr/src/nativeaot/Runtime/wasm/AsmOffsetsCpu.h b/src/coreclr/src/nativeaot/Runtime/wasm/AsmOffsetsCpu.h
new file mode 100644
index 0000000000000..23976f026fed4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/wasm/AsmOffsetsCpu.h
@@ -0,0 +1,30 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// This file is used by AsmOffsets.h to validate that our
+// assembly-code offsets always match their C++ counterparts.
+//
+// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix
+
+PLAT_ASM_SIZEOF(a4, ExInfo)
+PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo)
+PLAT_ASM_OFFSET(4, ExInfo, m_pExContext)
+PLAT_ASM_OFFSET(8, ExInfo, m_exception)
+PLAT_ASM_OFFSET(0c, ExInfo, m_kind)
+PLAT_ASM_OFFSET(0d, ExInfo, m_passNumber)
+PLAT_ASM_OFFSET(10, ExInfo, m_idxCurClause)
+PLAT_ASM_OFFSET(14, ExInfo, m_frameIter)
+PLAT_ASM_OFFSET(a0, ExInfo, m_notifyDebuggerSP)
+
+PLAT_ASM_SIZEOF(8c, StackFrameIterator)
+PLAT_ASM_OFFSET(08, StackFrameIterator, m_FramePointer)
+PLAT_ASM_OFFSET(0c, StackFrameIterator, m_ControlPC)
+PLAT_ASM_OFFSET(10, StackFrameIterator, m_RegDisplay)
+PLAT_ASM_OFFSET(88, StackFrameIterator, m_OriginalControlPC)
+
+PLAT_ASM_SIZEOF(4, PAL_LIMITED_CONTEXT)
+PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP)
+
+PLAT_ASM_SIZEOF(0c, REGDISPLAY)
+PLAT_ASM_OFFSET(0, REGDISPLAY, SP)
diff --git a/src/coreclr/src/nativeaot/Runtime/windows/AsmOffsets.cpp b/src/coreclr/src/nativeaot/Runtime/windows/AsmOffsets.cpp
new file mode 100644
index 0000000000000..8a852548d49eb
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/windows/AsmOffsets.cpp
@@ -0,0 +1,18 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#if defined(HOST_ARM) || defined(HOST_ARM64)
+
+#define HASH_DEFINE #define
+#define PLAT_ASM_OFFSET(offset, cls, member) HASH_DEFINE OFFSETOF__##cls##__##member   0x##offset
+#define PLAT_ASM_SIZEOF(size,   cls        ) HASH_DEFINE SIZEOF__##cls   0x##size
+#define PLAT_ASM_CONST(constant, expr)       HASH_DEFINE expr   0x##constant
+
+#else
+
+#define PLAT_ASM_OFFSET(offset, cls, member) OFFSETOF__##cls##__##member  equ  0##offset##h
+#define PLAT_ASM_SIZEOF(size,   cls        ) SIZEOF__##cls  equ  0##size##h
+#define PLAT_ASM_CONST(constant, expr)       expr  equ  0##constant##h
+
+#endif
+
+#include "AsmOffsets.h"
diff --git a/src/coreclr/src/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/src/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp
new file mode 100644
index 0000000000000..c42dff7b72c4a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp
@@ -0,0 +1,850 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include "common.h"
+
+#include <windows.h>
+
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "regdisplay.h"
+#include "ICodeManager.h"
+#include "CoffNativeCodeManager.h"
+#include "varint.h"
+#include "holder.h"
+
+#include "CommonMacros.inl"
+
+#define GCINFODECODER_NO_EE
+#include "coreclr/gcinfodecoder.cpp"
+
+#define UBF_FUNC_KIND_MASK      0x03
+#define UBF_FUNC_KIND_ROOT      0x00
+#define UBF_FUNC_KIND_HANDLER   0x01
+#define UBF_FUNC_KIND_FILTER    0x02
+
+#define UBF_FUNC_HAS_EHINFO             0x04
+#define UBF_FUNC_REVERSE_PINVOKE        0x08
+#define UBF_FUNC_HAS_ASSOCIATED_DATA    0x10
+
+#ifdef TARGET_X86
+//
+// x86 ABI does not define RUNTIME_FUNCTION. Define our own to allow unification between x86 and other platforms.
+//
+typedef struct _RUNTIME_FUNCTION {
+    DWORD BeginAddress;
+    DWORD EndAddress;
+    DWORD UnwindData;
+} RUNTIME_FUNCTION, *PRUNTIME_FUNCTION;
+
+typedef struct _KNONVOLATILE_CONTEXT_POINTERS {
+
+    // The ordering of these fields should be aligned with that
+    // of corresponding fields in CONTEXT
+    //
+    // (See REGDISPLAY in Runtime/regdisp.h for details)
+    PDWORD Edi;
+    PDWORD Esi;
+    PDWORD Ebx;
+    PDWORD Edx;
+    PDWORD Ecx;
+    PDWORD Eax;
+
+    PDWORD Ebp;
+
+} KNONVOLATILE_CONTEXT_POINTERS, *PKNONVOLATILE_CONTEXT_POINTERS;
+
+typedef struct _UNWIND_INFO {
+    ULONG FunctionLength;
+} UNWIND_INFO, *PUNWIND_INFO;
+
+#elif defined(TARGET_AMD64)
+
+#define UNW_FLAG_NHANDLER 0x0
+#define UNW_FLAG_EHANDLER 0x1
+#define UNW_FLAG_UHANDLER 0x2
+#define UNW_FLAG_CHAININFO 0x4
+
+//
+// The following structures are defined in Windows x64 unwind info specification
+// http://www.bing.com/search?q=msdn+Exception+Handling+x64
+//
+typedef union _UNWIND_CODE {
+    struct {
+        uint8_t CodeOffset;
+        uint8_t UnwindOp : 4;
+        uint8_t OpInfo : 4;
+    };
+
+    uint16_t FrameOffset;
+} UNWIND_CODE, *PUNWIND_CODE;
+
+typedef struct _UNWIND_INFO {
+    uint8_t Version : 3;
+    uint8_t Flags : 5;
+    uint8_t SizeOfProlog;
+    uint8_t CountOfUnwindCodes;
+    uint8_t FrameRegister : 4;
+    uint8_t FrameOffset : 4;
+    UNWIND_CODE UnwindCode[1];
+} UNWIND_INFO, *PUNWIND_INFO;
+
+#endif // TARGET_X86
+
+typedef DPTR(struct _UNWIND_INFO)      PTR_UNWIND_INFO;
+typedef DPTR(union _UNWIND_CODE)       PTR_UNWIND_CODE;
+
+static PTR_VOID GetUnwindDataBlob(TADDR moduleBase, PTR_RUNTIME_FUNCTION pRuntimeFunction, /* out */ size_t * pSize)
+{
+#if defined(TARGET_AMD64)
+    PTR_UNWIND_INFO pUnwindInfo(dac_cast<PTR_UNWIND_INFO>(moduleBase + pRuntimeFunction->UnwindInfoAddress));
+
+    size_t size = offsetof(UNWIND_INFO, UnwindCode) + sizeof(UNWIND_CODE) * pUnwindInfo->CountOfUnwindCodes;
+
+    // Chained unwind info is not supported at this time
+    ASSERT((pUnwindInfo->Flags & UNW_FLAG_CHAININFO) == 0);
+
+    if (pUnwindInfo->Flags & (UNW_FLAG_EHANDLER | UNW_FLAG_UHANDLER))
+    {
+        // Personality routine
+        size = ALIGN_UP(size, sizeof(DWORD)) + sizeof(DWORD);
+    }
+
+    *pSize = size;
+
+    return pUnwindInfo;
+
+#elif defined(TARGET_X86)
+
+    PTR_UNWIND_INFO pUnwindInfo(dac_cast<PTR_UNWIND_INFO>(moduleBase + pRuntimeFunction->UnwindInfoAddress));
+
+    *pSize = sizeof(UNWIND_INFO);
+
+    return pUnwindInfo;
+
+#elif defined(TARGET_ARM)
+
+    // if this function uses packed unwind data then at least one of the two least significant bits
+    // will be non-zero.  if this is the case then there will be no xdata record to enumerate.
+    ASSERT((pRuntimeFunction->UnwindData & 0x3) == 0);
+
+    // compute the size of the unwind info
+    PTR_TADDR xdata = dac_cast<PTR_TADDR>(pRuntimeFunction->UnwindData + moduleBase);
+
+    ULONG epilogScopes = 0;
+    ULONG unwindWords = 0;
+    ULONG size = 0;
+
+    if ((xdata[0] >> 23) != 0)
+    {
+        size = 4;
+        epilogScopes = (xdata[0] >> 23) & 0x1f;
+        unwindWords = (xdata[0] >> 28) & 0x0f;
+    }
+    else
+    {
+        size = 8;
+        epilogScopes = xdata[1] & 0xffff;
+        unwindWords = (xdata[1] >> 16) & 0xff;
+    }
+
+    if (!(xdata[0] & (1 << 21)))
+        size += 4 * epilogScopes;
+
+    size += 4 * unwindWords;
+
+    if ((xdata[0] & (1 << 20)) != 0)
+    {
+        // Personality routine
+        size += 4;
+    }
+
+    *pSize = size;
+    return xdata;
+#else
+    PORTABILITY_ASSERT("GetUnwindDataBlob");
+    *pSize = 0;
+    return NULL;
+#endif
+}
+
+
+CoffNativeCodeManager::CoffNativeCodeManager(TADDR moduleBase, 
+                                             PTR_VOID pvManagedCodeStartRange, UInt32 cbManagedCodeRange,
+                                             PTR_RUNTIME_FUNCTION pRuntimeFunctionTable, UInt32 nRuntimeFunctionTable,
+                                             PTR_PTR_VOID pClasslibFunctions, UInt32 nClasslibFunctions)
+    : m_moduleBase(moduleBase), 
+      m_pvManagedCodeStartRange(pvManagedCodeStartRange), m_cbManagedCodeRange(cbManagedCodeRange),
+      m_pRuntimeFunctionTable(pRuntimeFunctionTable), m_nRuntimeFunctionTable(nRuntimeFunctionTable),
+      m_pClasslibFunctions(pClasslibFunctions), m_nClasslibFunctions(nClasslibFunctions)
+{
+}
+
+CoffNativeCodeManager::~CoffNativeCodeManager()
+{
+}
+
+static int LookupUnwindInfoForMethod(UInt32 relativePc,
+                                     PTR_RUNTIME_FUNCTION pRuntimeFunctionTable,
+                                     int low,
+                                     int high)
+{
+#ifdef TARGET_ARM
+    relativePc |= THUMB_CODE;
+#endif 
+
+    // Binary search the RUNTIME_FUNCTION table
+    // Use linear search once we get down to a small number of elements
+    // to avoid Binary search overhead.
+    while (high - low > 10) 
+    {
+       int middle = low + (high - low) / 2;
+
+       PTR_RUNTIME_FUNCTION pFunctionEntry = pRuntimeFunctionTable + middle;
+       if (relativePc < pFunctionEntry->BeginAddress) 
+       {
+           high = middle - 1;
+       } 
+       else 
+       {
+           low = middle;
+       }
+    }
+
+    for (int i = low; i < high; i++)
+    {
+        PTR_RUNTIME_FUNCTION pNextFunctionEntry = pRuntimeFunctionTable + (i + 1);
+        if (relativePc < pNextFunctionEntry->BeginAddress)
+        {
+            high = i;
+            break;
+        }
+    }
+
+    PTR_RUNTIME_FUNCTION pFunctionEntry = pRuntimeFunctionTable + high;
+    if (relativePc >= pFunctionEntry->BeginAddress)
+    {
+        return high;
+    }
+
+    ASSERT_UNCONDITIONALLY("Invalid code address");
+    return -1;
+}
+
+struct CoffNativeMethodInfo
+{
+    PTR_RUNTIME_FUNCTION mainRuntimeFunction;
+    PTR_RUNTIME_FUNCTION runtimeFunction;
+    bool executionAborted;
+};
+
+// Ensure that CoffNativeMethodInfo fits into the space reserved by MethodInfo
+static_assert(sizeof(CoffNativeMethodInfo) <= sizeof(MethodInfo), "CoffNativeMethodInfo too big");
+
+bool CoffNativeCodeManager::FindMethodInfo(PTR_VOID        ControlPC, 
+                                           MethodInfo *    pMethodInfoOut)
+{
+    // Stackwalker may call this with ControlPC that does not belong to this code manager
+    if (dac_cast<TADDR>(ControlPC) < dac_cast<TADDR>(m_pvManagedCodeStartRange) ||
+        dac_cast<TADDR>(m_pvManagedCodeStartRange) + m_cbManagedCodeRange <= dac_cast<TADDR>(ControlPC))
+    {
+        return false;
+    }
+
+    CoffNativeMethodInfo * pMethodInfo = (CoffNativeMethodInfo *)pMethodInfoOut;
+
+    TADDR relativePC = dac_cast<TADDR>(ControlPC) - m_moduleBase;
+
+    int MethodIndex = LookupUnwindInfoForMethod((UInt32)relativePC, m_pRuntimeFunctionTable,
+        0, m_nRuntimeFunctionTable - 1);
+    if (MethodIndex < 0)
+        return false;
+
+    PTR_RUNTIME_FUNCTION pRuntimeFunction = m_pRuntimeFunctionTable + MethodIndex;
+
+    pMethodInfo->runtimeFunction = pRuntimeFunction;
+
+    // The runtime function could correspond to a funclet.  We need to get to the 
+    // runtime function of the main method.
+    for (;;)
+    {
+        size_t unwindDataBlobSize;
+        PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pRuntimeFunction, &unwindDataBlobSize);
+
+        uint8_t unwindBlockFlags = *(dac_cast<DPTR(uint8_t)>(pUnwindDataBlob) + unwindDataBlobSize);
+        if ((unwindBlockFlags & UBF_FUNC_KIND_MASK) == UBF_FUNC_KIND_ROOT)
+            break;
+
+        pRuntimeFunction--;
+    }
+
+    pMethodInfo->mainRuntimeFunction = pRuntimeFunction;
+
+    pMethodInfo->executionAborted = false;
+
+    return true;
+}
+
+bool CoffNativeCodeManager::IsFunclet(MethodInfo * pMethInfo)
+{
+    CoffNativeMethodInfo * pMethodInfo = (CoffNativeMethodInfo *)pMethInfo;
+
+    size_t unwindDataBlobSize;
+    PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pMethodInfo->runtimeFunction, &unwindDataBlobSize);
+
+    uint8_t unwindBlockFlags = *(dac_cast<DPTR(uint8_t)>(pUnwindDataBlob) + unwindDataBlobSize);
+
+    // A funclet will have an entry in funclet to main method map
+    return (unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT;
+}
+
+bool CoffNativeCodeManager::IsFilter(MethodInfo * pMethInfo)
+{
+    CoffNativeMethodInfo * pMethodInfo = (CoffNativeMethodInfo *)pMethInfo;
+
+    size_t unwindDataBlobSize;
+    PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pMethodInfo->runtimeFunction, &unwindDataBlobSize);
+
+    uint8_t unwindBlockFlags = *(dac_cast<DPTR(uint8_t)>(pUnwindDataBlob) + unwindDataBlobSize);
+
+    return (unwindBlockFlags & UBF_FUNC_KIND_MASK) == UBF_FUNC_KIND_FILTER;
+}
+
+PTR_VOID CoffNativeCodeManager::GetFramePointer(MethodInfo *   pMethInfo,
+                                         REGDISPLAY *   pRegisterSet)
+{
+    CoffNativeMethodInfo * pMethodInfo = (CoffNativeMethodInfo *)pMethInfo;
+
+    size_t unwindDataBlobSize;
+    PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pMethodInfo->runtimeFunction, &unwindDataBlobSize);
+
+    uint8_t unwindBlockFlags = *(dac_cast<DPTR(uint8_t)>(pUnwindDataBlob) + unwindDataBlobSize);
+
+    // Return frame pointer for methods with EH and funclets
+    if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0 || (unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT)
+    {
+        return (PTR_VOID)pRegisterSet->GetFP();
+    }
+
+    return NULL;
+}
+
+void CoffNativeCodeManager::EnumGcRefs(MethodInfo *    pMethodInfo, 
+                                       PTR_VOID        safePointAddress,
+                                       REGDISPLAY *    pRegisterSet,
+                                       GCEnumContext * hCallback)
+{
+    CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo;
+
+    size_t unwindDataBlobSize;
+    PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->mainRuntimeFunction, &unwindDataBlobSize);
+
+    PTR_UInt8 p = dac_cast<PTR_UInt8>(pUnwindDataBlob) + unwindDataBlobSize;
+
+    uint8_t unwindBlockFlags = *p++;
+
+    if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0)
+        p += sizeof(int32_t);
+
+    if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0)
+        p += sizeof(int32_t);
+
+    TADDR methodStartAddress = m_moduleBase + pNativeMethodInfo->mainRuntimeFunction->BeginAddress;
+    UInt32 codeOffset = (UInt32)(dac_cast<TADDR>(safePointAddress) - methodStartAddress);
+
+    GcInfoDecoder decoder(
+        GCInfoToken(p),
+        GcInfoDecoderFlags(DECODE_GC_LIFETIMES | DECODE_SECURITY_OBJECT | DECODE_VARARG),
+        codeOffset - 1 // TODO: Is this adjustment correct?
+        );
+
+    ICodeManagerFlags flags = (ICodeManagerFlags)0;
+    if (pNativeMethodInfo->executionAborted)
+        flags = ICodeManagerFlags::ExecutionAborted;
+    if (IsFilter(pMethodInfo))
+        flags = (ICodeManagerFlags)(flags | ICodeManagerFlags::NoReportUntracked);
+
+    if (!decoder.EnumerateLiveSlots(
+        pRegisterSet,
+        false /* reportScratchSlots */, 
+        flags,
+        hCallback->pCallback,
+        hCallback
+        ))
+    {
+        assert(false);
+    }
+}
+
+UIntNative CoffNativeCodeManager::GetConservativeUpperBoundForOutgoingArgs(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet)
+{
+#if defined(TARGET_AMD64)
+
+    // Return value
+    UIntNative upperBound;
+    CoffNativeMethodInfo* pNativeMethodInfo = (CoffNativeMethodInfo *) pMethodInfo;
+    
+    size_t unwindDataBlobSize;
+    PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->runtimeFunction, &unwindDataBlobSize);
+    PTR_UInt8 p = dac_cast<PTR_UInt8>(pUnwindDataBlob) + unwindDataBlobSize;
+    uint8_t unwindBlockFlags = *p++;
+
+    if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0)
+        p += sizeof(int32_t);
+
+    if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0)
+    {
+        TADDR basePointer =  dac_cast<TADDR>(pRegisterSet->GetFP());        
+        
+        // Get the method's GC info
+        GcInfoDecoder decoder(GCInfoToken(p), DECODE_REVERSE_PINVOKE_VAR);
+        UINT32 stackBasedRegister = decoder.GetStackBaseRegister();
+        
+        if (stackBasedRegister == NO_STACK_BASE_REGISTER)
+        {
+            basePointer = dac_cast<TADDR>(pRegisterSet->GetSP());
+        }
+        else
+        {
+            basePointer = dac_cast<TADDR>(pRegisterSet->GetFP());
+        }
+        // Reverse PInvoke case.  The embedded reverse PInvoke frame is guaranteed to reside above
+        // all outgoing arguments.
+        INT32 slot = decoder.GetReversePInvokeFrameStackSlot();
+        upperBound =  (UIntNative) dac_cast<TADDR>(basePointer + slot);
+    }
+    else
+    {
+        // Check for a pushed RBP value
+        if (GetFramePointer(pMethodInfo, pRegisterSet) == NULL)
+        {
+            // Unwind the current method context to get the caller's stack pointer
+            // and obtain the upper bound of the callee is the value just below the caller's return address on the stack
+            SIZE_T  EstablisherFrame;
+            PVOID   HandlerData;
+            CONTEXT context;
+            context.Rsp = pRegisterSet->GetSP();
+            context.Rbp = pRegisterSet->GetFP();
+            context.Rip = pRegisterSet->GetIP();
+    
+            RtlVirtualUnwind(NULL,
+                            dac_cast<TADDR>(m_moduleBase),
+                            pRegisterSet->IP,
+                            (PRUNTIME_FUNCTION)pNativeMethodInfo->runtimeFunction,
+                            &context,
+                            &HandlerData,
+                            &EstablisherFrame,
+                            NULL);
+
+            upperBound = dac_cast<TADDR>(context.Rsp - sizeof (PVOID));
+        }
+        else
+        {
+            // In amd64, it is guaranteed that if there is a pushed RBP
+            // value at the top of the frame it resides above all outgoing arguments.  Unlike x86,
+            // the frame pointer generally points to a location that is separated from the pushed RBP
+            // value by an offset that is recorded in the info header.  Recover the address of the
+            // pushed RBP value by subtracting this offset.
+            upperBound = (UIntNative) dac_cast<TADDR>(pRegisterSet->GetFP() - ((PTR_UNWIND_INFO) pUnwindDataBlob)->FrameOffset);
+        }
+    }
+    return upperBound;
+#else
+    assert(false);
+    return false;
+#endif
+}
+
+bool CoffNativeCodeManager::UnwindStackFrame(MethodInfo *    pMethodInfo,
+                                      REGDISPLAY *    pRegisterSet,                 // in/out
+                                      PTR_VOID *      ppPreviousTransitionFrame)    // out
+{
+    CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo;
+
+    size_t unwindDataBlobSize;
+    PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->runtimeFunction, &unwindDataBlobSize);
+
+    PTR_UInt8 p = dac_cast<PTR_UInt8>(pUnwindDataBlob) + unwindDataBlobSize;
+
+    uint8_t unwindBlockFlags = *p++;
+
+    if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0)
+        p += sizeof(int32_t);
+
+    if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0)
+    {
+        // Reverse PInvoke transition should be on the main function body only
+        assert(pNativeMethodInfo->mainRuntimeFunction == pNativeMethodInfo->runtimeFunction);
+
+        if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0)
+            p += sizeof(int32_t);
+
+        GcInfoDecoder decoder(GCInfoToken(p), DECODE_REVERSE_PINVOKE_VAR);
+        INT32 slot = decoder.GetReversePInvokeFrameStackSlot();
+        assert(slot != NO_REVERSE_PINVOKE_FRAME);
+
+        TADDR basePointer = NULL;
+        UINT32 stackBasedRegister = decoder.GetStackBaseRegister();
+        if (stackBasedRegister == NO_STACK_BASE_REGISTER)
+        {
+            basePointer = dac_cast<TADDR>(pRegisterSet->GetSP());
+        }
+        else
+        {
+            basePointer = dac_cast<TADDR>(pRegisterSet->GetFP());
+        }
+        *ppPreviousTransitionFrame = *(void**)(basePointer + slot);
+        return true;
+    }
+
+    *ppPreviousTransitionFrame = NULL;
+
+    CONTEXT context;
+    KNONVOLATILE_CONTEXT_POINTERS contextPointers;
+
+#ifdef _DEBUG
+    memset(&context, 0xDD, sizeof(context));
+    memset(&contextPointers, 0xDD, sizeof(contextPointers));
+#endif
+
+#ifdef TARGET_X86
+    #define FOR_EACH_NONVOLATILE_REGISTER(F) \
+        F(E, ax) F(E, cx) F(E, dx) F(E, bx) F(E, bp) F(E, si) F(E, di)
+    #define WORDPTR PDWORD
+#else
+    #define FOR_EACH_NONVOLATILE_REGISTER(F) \
+        F(R, ax) F(R, cx) F(R, dx) F(R, bx) F(R, bp) F(R, si) F(R, di) \
+        F(R, 8) F(R, 9) F(R, 10) F(R, 11) F(R, 12) F(R, 13) F(R, 14) F(R, 15)
+    #define WORDPTR PDWORD64
+#endif
+
+#define REGDISPLAY_TO_CONTEXT(prefix, reg) \
+    contextPointers.prefix####reg = (WORDPTR) pRegisterSet->pR##reg; \
+    if (pRegisterSet->pR##reg != NULL) context.prefix##reg = *(pRegisterSet->pR##reg);
+
+#define CONTEXT_TO_REGDISPLAY(prefix, reg) \
+    pRegisterSet->pR##reg = (PTR_UIntNative) contextPointers.prefix####reg;
+
+    FOR_EACH_NONVOLATILE_REGISTER(REGDISPLAY_TO_CONTEXT);
+
+#ifdef TARGET_X86
+    PORTABILITY_ASSERT("CoffNativeCodeManager::UnwindStackFrame");
+#else // TARGET_X86
+    memcpy(&context.Xmm6, pRegisterSet->Xmm, sizeof(pRegisterSet->Xmm));
+
+    context.Rsp = pRegisterSet->SP;
+    context.Rip = pRegisterSet->IP;
+
+    SIZE_T  EstablisherFrame;
+    PVOID   HandlerData;
+
+    RtlVirtualUnwind(NULL,
+                    dac_cast<TADDR>(m_moduleBase),
+                    pRegisterSet->IP,
+                    (PRUNTIME_FUNCTION)pNativeMethodInfo->runtimeFunction,
+                    &context,
+                    &HandlerData,
+                    &EstablisherFrame,
+                    &contextPointers);
+
+    pRegisterSet->SP = context.Rsp;
+    pRegisterSet->IP = context.Rip;
+
+    pRegisterSet->pIP = PTR_PCODE(pRegisterSet->SP - sizeof(TADDR));
+
+    memcpy(pRegisterSet->Xmm, &context.Xmm6, sizeof(pRegisterSet->Xmm));
+#endif // TARGET_X86
+
+    FOR_EACH_NONVOLATILE_REGISTER(CONTEXT_TO_REGDISPLAY);
+
+#undef FOR_EACH_NONVOLATILE_REGISTER
+#undef REGDISPLAY_TO_CONTEXT
+#undef CONTEXT_TO_REGDISPLAY
+
+    return true;
+}
+
+// Convert the return kind that was encoded by RyuJIT to the
+// value that CoreRT runtime can understand and support.
+GCRefKind GetGcRefKind(ReturnKind returnKind)
+{
+    static_assert((GCRefKind)ReturnKind::RT_Scalar == GCRK_Scalar, "ReturnKind::RT_Scalar does not match GCRK_Scalar");
+    static_assert((GCRefKind)ReturnKind::RT_Object == GCRK_Object, "ReturnKind::RT_Object does not match GCRK_Object");
+    static_assert((GCRefKind)ReturnKind::RT_ByRef  == GCRK_Byref, "ReturnKind::RT_ByRef does not match GCRK_Byref");
+    ASSERT((returnKind == RT_Scalar) || (returnKind == GCRK_Object) || (returnKind == GCRK_Byref));
+
+    return (GCRefKind)returnKind;
+}
+
+bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo *    pMethodInfo,
+                                                REGDISPLAY *    pRegisterSet,       // in
+                                                PTR_PTR_VOID *  ppvRetAddrLocation, // out
+                                                GCRefKind *     pRetValueKind)      // out
+{
+#if defined(TARGET_AMD64)
+    CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo;
+
+    size_t unwindDataBlobSize;
+    PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->runtimeFunction, &unwindDataBlobSize);
+
+    PTR_UInt8 p = dac_cast<PTR_UInt8>(pUnwindDataBlob) + unwindDataBlobSize;
+
+    uint8_t unwindBlockFlags = *p++;
+
+    if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0)
+        p += sizeof(int32_t);
+
+    // Check whether this is a funclet
+    if ((unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT)
+        return false;
+
+    // Skip hijacking a reverse-pinvoke method - it doesn't get us much because we already synchronize
+    // with the GC on the way back to native code.
+    if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0)
+        return false;
+
+    if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0)
+        p += sizeof(int32_t);
+
+    // Decode the GC info for the current method to determine its return type
+    GcInfoDecoder decoder(
+        GCInfoToken(p),
+        GcInfoDecoderFlags(DECODE_RETURN_KIND),
+        0
+        );
+
+    GCRefKind gcRefKind = GetGcRefKind(decoder.GetReturnKind());
+
+    // Unwind the current method context to the caller's context to get its stack pointer
+    // and obtain the location of the return address on the stack
+    SIZE_T  EstablisherFrame;
+    PVOID   HandlerData;
+    CONTEXT context;
+    context.Rsp = pRegisterSet->GetSP();
+    context.Rbp = pRegisterSet->GetFP();
+    context.Rip = pRegisterSet->GetIP();
+
+    RtlVirtualUnwind(NULL,
+                    dac_cast<TADDR>(m_moduleBase),
+                    pRegisterSet->IP,
+                    (PRUNTIME_FUNCTION)pNativeMethodInfo->runtimeFunction,
+                    &context,
+                    &HandlerData,
+                    &EstablisherFrame,
+                    NULL);
+
+    *ppvRetAddrLocation = (PTR_PTR_VOID)(context.Rsp - sizeof (PVOID));
+    *pRetValueKind = gcRefKind;
+    return true;
+#else
+    return false;
+#endif // defined(TARGET_AMD64)
+}
+
+void CoffNativeCodeManager::UnsynchronizedHijackMethodLoops(MethodInfo * pMethodInfo)
+{
+    // @TODO: CORERT: UnsynchronizedHijackMethodLoops
+}
+
+PTR_VOID CoffNativeCodeManager::RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC)
+{
+    // GCInfo decoder needs to know whether execution of the method is aborted 
+    // while querying for gc-info.  But ICodeManager::EnumGCRef() doesn't receive any
+    // flags from mrt. Call to this method is used as a cue to mark the method info
+    // as execution aborted. Note - if pMethodInfo was cached, this scheme would not work.
+    //
+    // If the method has EH, then JIT will make sure the method is fully interruptible
+    // and we will have GC-info available at the faulting address as well.
+
+    CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo;
+    pNativeMethodInfo->executionAborted = true;
+
+    return controlPC;
+}
+
+struct CoffEHEnumState
+{
+    PTR_UInt8 pMethodStartAddress;
+    PTR_UInt8 pEHInfo;
+    UInt32 uClause;
+    UInt32 nClauses;
+};
+
+// Ensure that CoffEHEnumState fits into the space reserved by EHEnumState
+static_assert(sizeof(CoffEHEnumState) <= sizeof(EHEnumState), "CoffEHEnumState too big");
+
+bool CoffNativeCodeManager::EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMethodStartAddress, EHEnumState * pEHEnumStateOut)
+{
+    assert(pMethodInfo != NULL);
+    assert(pMethodStartAddress != NULL);
+    assert(pEHEnumStateOut != NULL);
+
+    CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo;
+    CoffEHEnumState * pEnumState = (CoffEHEnumState *)pEHEnumStateOut;
+
+    size_t unwindDataBlobSize;
+    PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pNativeMethodInfo->mainRuntimeFunction, &unwindDataBlobSize);
+
+    PTR_UInt8 p = dac_cast<PTR_UInt8>(pUnwindDataBlob) + unwindDataBlobSize;
+
+    uint8_t unwindBlockFlags = *p++;
+
+    if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0)
+        p += sizeof(int32_t);
+
+    // return if there is no EH info associated with this method
+    if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) == 0)
+    {
+        return false;
+    }
+
+    *pMethodStartAddress = dac_cast<PTR_VOID>(m_moduleBase + pNativeMethodInfo->mainRuntimeFunction->BeginAddress);
+
+    pEnumState->pMethodStartAddress = dac_cast<PTR_UInt8>(*pMethodStartAddress);
+    pEnumState->pEHInfo = dac_cast<PTR_UInt8>(m_moduleBase + *dac_cast<PTR_Int32>(p));
+    pEnumState->uClause = 0;
+    pEnumState->nClauses = VarInt::ReadUnsigned(pEnumState->pEHInfo);
+
+    return true;
+}
+
+bool CoffNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pEHClauseOut)
+{
+    assert(pEHEnumState != NULL);
+    assert(pEHClauseOut != NULL);
+
+    CoffEHEnumState * pEnumState = (CoffEHEnumState *)pEHEnumState;
+    if (pEnumState->uClause >= pEnumState->nClauses)
+        return false;
+    pEnumState->uClause++;
+
+    pEHClauseOut->m_tryStartOffset = VarInt::ReadUnsigned(pEnumState->pEHInfo);
+
+    UInt32 tryEndDeltaAndClauseKind = VarInt::ReadUnsigned(pEnumState->pEHInfo);
+    pEHClauseOut->m_clauseKind = (EHClauseKind)(tryEndDeltaAndClauseKind & 0x3);
+    pEHClauseOut->m_tryEndOffset = pEHClauseOut->m_tryStartOffset + (tryEndDeltaAndClauseKind >> 2);
+
+    // For each clause, we have up to 4 integers:
+    //      1)  try start offset
+    //      2)  (try length << 2) | clauseKind
+    //      3)  if (typed || fault || filter)    { handler start offset }
+    //      4a) if (typed)                       { type RVA }
+    //      4b) if (filter)                      { filter start offset }
+    //
+    // The first two integers have already been decoded
+
+    switch (pEHClauseOut->m_clauseKind)
+    {
+    case EH_CLAUSE_TYPED:
+        pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+
+        // Read target type
+        {
+            // @TODO: CORERT: Compress EHInfo using type table index scheme
+            // https://github.com/dotnet/corert/issues/972
+            UInt32 typeRVA = *((PTR_UInt32&)pEnumState->pEHInfo)++;
+            pEHClauseOut->m_pTargetType = dac_cast<PTR_VOID>(m_moduleBase + typeRVA);
+        }
+        break;
+    case EH_CLAUSE_FAULT:
+        pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+        break;
+    case EH_CLAUSE_FILTER:
+        pEHClauseOut->m_handlerAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+        pEHClauseOut->m_filterAddress = pEnumState->pMethodStartAddress + VarInt::ReadUnsigned(pEnumState->pEHInfo);
+        break;
+    default:
+        UNREACHABLE_MSG("unexpected EHClauseKind");
+    }
+
+    return true;
+}
+
+PTR_VOID CoffNativeCodeManager::GetOsModuleHandle()
+{
+    return dac_cast<PTR_VOID>(m_moduleBase);
+}
+
+PTR_VOID CoffNativeCodeManager::GetMethodStartAddress(MethodInfo * pMethodInfo)
+{
+    CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo;
+    return dac_cast<PTR_VOID>(m_moduleBase + pNativeMethodInfo->mainRuntimeFunction->BeginAddress);
+}
+
+void * CoffNativeCodeManager::GetClasslibFunction(ClasslibFunctionId functionId)
+{
+    uint32_t id = (uint32_t)functionId;
+
+    if (id >= m_nClasslibFunctions)
+        return nullptr;
+
+    return m_pClasslibFunctions[id];
+}
+
+PTR_VOID CoffNativeCodeManager::GetAssociatedData(PTR_VOID ControlPC)
+{
+    if (dac_cast<TADDR>(ControlPC) < dac_cast<TADDR>(m_pvManagedCodeStartRange) || 
+        dac_cast<TADDR>(m_pvManagedCodeStartRange) + m_cbManagedCodeRange <= dac_cast<TADDR>(ControlPC))
+    {
+        return NULL;
+    }
+
+    TADDR relativePC = dac_cast<TADDR>(ControlPC) - m_moduleBase;
+
+    int MethodIndex = LookupUnwindInfoForMethod((UInt32)relativePC, m_pRuntimeFunctionTable, 0, m_nRuntimeFunctionTable - 1);
+    if (MethodIndex < 0)
+        return NULL;
+
+    PTR_RUNTIME_FUNCTION pRuntimeFunction = m_pRuntimeFunctionTable + MethodIndex;
+
+    size_t unwindDataBlobSize;
+    PTR_VOID pUnwindDataBlob = GetUnwindDataBlob(m_moduleBase, pRuntimeFunction, &unwindDataBlobSize);
+
+    PTR_UInt8 p = dac_cast<PTR_UInt8>(pUnwindDataBlob) + unwindDataBlobSize;
+
+    uint8_t unwindBlockFlags = *p++;
+    if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) == 0)
+        return NULL;
+
+    UInt32 dataRVA = *(UInt32*)p;
+    return dac_cast<PTR_VOID>(m_moduleBase + dataRVA);
+}
+
+extern "C" bool __stdcall RegisterCodeManager(ICodeManager * pCodeManager, PTR_VOID pvStartRange, UInt32 cbRange);
+extern "C" void __stdcall UnregisterCodeManager(ICodeManager * pCodeManager);
+extern "C" bool __stdcall RegisterUnboxingStubs(PTR_VOID pvStartRange, UInt32 cbRange);
+
+extern "C"
+bool RhRegisterOSModule(void * pModule,
+                        void * pvManagedCodeStartRange, UInt32 cbManagedCodeRange,
+                        void * pvUnboxingStubsStartRange, UInt32 cbUnboxingStubsRange,
+                        void ** pClasslibFunctions, UInt32 nClasslibFunctions)
+{
+    PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)pModule;
+    PIMAGE_NT_HEADERS pNTHeaders = (PIMAGE_NT_HEADERS)((TADDR)pModule + pDosHeader->e_lfanew);
+
+    IMAGE_DATA_DIRECTORY * pRuntimeFunctions = &(pNTHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXCEPTION]);
+
+    NewHolder<CoffNativeCodeManager> pCoffNativeCodeManager = new (nothrow) CoffNativeCodeManager((TADDR)pModule,
+        pvManagedCodeStartRange, cbManagedCodeRange,
+        dac_cast<PTR_RUNTIME_FUNCTION>((TADDR)pModule + pRuntimeFunctions->VirtualAddress),
+        pRuntimeFunctions->Size / sizeof(RUNTIME_FUNCTION),
+        pClasslibFunctions, nClasslibFunctions);
+
+    if (pCoffNativeCodeManager == nullptr)
+        return false;
+
+    if (!RegisterCodeManager(pCoffNativeCodeManager, pvManagedCodeStartRange, cbManagedCodeRange))
+        return false;
+
+    if (!RegisterUnboxingStubs(pvUnboxingStubsStartRange, cbUnboxingStubsRange))
+    {
+        UnregisterCodeManager(pCoffNativeCodeManager);
+        return false;
+    }
+
+    pCoffNativeCodeManager.SuppressRelease();
+
+    return true;
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/windows/CoffNativeCodeManager.h b/src/coreclr/src/nativeaot/Runtime/windows/CoffNativeCodeManager.h
new file mode 100644
index 0000000000000..8777bdfc7b750
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/windows/CoffNativeCodeManager.h
@@ -0,0 +1,105 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#pragma once
+
+#if defined(TARGET_AMD64) || defined(TARGET_X86)
+struct T_RUNTIME_FUNCTION {
+    uint32_t BeginAddress;
+    uint32_t EndAddress;
+    uint32_t UnwindInfoAddress;
+};
+#elif defined(TARGET_ARM)
+struct T_RUNTIME_FUNCTION {
+    uint32_t BeginAddress;
+    uint32_t UnwindData;
+};
+#elif defined(TARGET_ARM64)
+struct T_RUNTIME_FUNCTION {
+    uint32_t BeginAddress;
+    union {
+        uint32_t UnwindData;
+        struct {
+            uint32_t Flag : 2;
+            uint32_t FunctionLength : 11;
+            uint32_t RegF : 3;
+            uint32_t RegI : 4;
+            uint32_t H : 1;
+            uint32_t CR : 2;
+            uint32_t FrameSize : 9;
+        } PackedUnwindData;
+    };
+};
+#else
+#error unexpected target architecture
+#endif
+
+typedef DPTR(T_RUNTIME_FUNCTION) PTR_RUNTIME_FUNCTION;
+
+class CoffNativeCodeManager : public ICodeManager
+{
+    TADDR m_moduleBase;
+
+    PTR_VOID m_pvManagedCodeStartRange;
+    UInt32 m_cbManagedCodeRange;
+
+    PTR_RUNTIME_FUNCTION m_pRuntimeFunctionTable;
+    UInt32 m_nRuntimeFunctionTable;
+
+    PTR_PTR_VOID m_pClasslibFunctions;
+    UInt32 m_nClasslibFunctions;
+
+public:
+    CoffNativeCodeManager(TADDR moduleBase, 
+                          PTR_VOID pvManagedCodeStartRange, UInt32 cbManagedCodeRange,
+                          PTR_RUNTIME_FUNCTION pRuntimeFunctionTable, UInt32 nRuntimeFunctionTable,
+                          PTR_PTR_VOID pClasslibFunctions, UInt32 nClasslibFunctions);
+    ~CoffNativeCodeManager();
+
+    //
+    // Code manager methods
+    //
+
+    bool FindMethodInfo(PTR_VOID        ControlPC, 
+                        MethodInfo *    pMethodInfoOut);
+
+    bool IsFunclet(MethodInfo * pMethodInfo);
+
+    bool IsFilter(MethodInfo * pMethodInfo);
+
+    PTR_VOID GetFramePointer(MethodInfo *   pMethodInfo,
+                             REGDISPLAY *   pRegisterSet);
+
+    void EnumGcRefs(MethodInfo *    pMethodInfo, 
+                    PTR_VOID        safePointAddress,
+                    REGDISPLAY *    pRegisterSet,
+                    GCEnumContext * hCallback);
+
+    bool UnwindStackFrame(MethodInfo *    pMethodInfo,
+                          REGDISPLAY *    pRegisterSet,                 // in/out
+                          PTR_VOID *      ppPreviousTransitionFrame);   // out
+
+    UIntNative GetConservativeUpperBoundForOutgoingArgs(MethodInfo *   pMethodInfo,
+                                                        REGDISPLAY *   pRegisterSet);
+
+    bool GetReturnAddressHijackInfo(MethodInfo *    pMethodInfo,
+                                    REGDISPLAY *    pRegisterSet,       // in
+                                    PTR_PTR_VOID *  ppvRetAddrLocation, // out
+                                    GCRefKind *     pRetValueKind);     // out
+
+    void UnsynchronizedHijackMethodLoops(MethodInfo * pMethodInfo);
+
+    PTR_VOID RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC);
+
+    bool EHEnumInit(MethodInfo * pMethodInfo, PTR_VOID * pMethodStartAddress, EHEnumState * pEHEnumState);
+
+    bool EHEnumNext(EHEnumState * pEHEnumState, EHClause * pEHClause);
+
+    PTR_VOID GetMethodStartAddress(MethodInfo * pMethodInfo);
+
+    void * GetClasslibFunction(ClasslibFunctionId functionId);
+
+    PTR_VOID GetAssociatedData(PTR_VOID ControlPC);
+
+    PTR_VOID GetOsModuleHandle();
+};
diff --git a/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkCommon.cpp b/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkCommon.cpp
new file mode 100644
index 0000000000000..de7caaa9abe5e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkCommon.cpp
@@ -0,0 +1,405 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Implementation of the portions of the Redhawk Platform Abstraction Layer (PAL) library that are common among
+// multiple PAL variants.
+//
+// Note that in general we don't want to assume that Windows and Redhawk global definitions can co-exist.
+// Since this code must include Windows headers to do its job we can't therefore safely include general
+// Redhawk header files.
+//
+
+#include <windows.h>
+#include <stdio.h>
+#include <errno.h>
+#include <evntprov.h>
+#include "CommonTypes.h"
+#include "daccess.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include <winternl.h>
+#include "CommonMacros.h"
+#include "rhassert.h"
+
+
+#define REDHAWK_PALEXPORT extern "C"
+#define REDHAWK_PALAPI __stdcall
+
+
+// Given the OS handle of a loaded module, compute the upper and lower virtual address bounds (inclusive).
+REDHAWK_PALEXPORT void REDHAWK_PALAPI PalGetModuleBounds(HANDLE hOsHandle, _Out_ UInt8 ** ppLowerBound, _Out_ UInt8 ** ppUpperBound)
+{
+    BYTE *pbModule = (BYTE*)hOsHandle;
+    DWORD cbModule;
+
+    IMAGE_NT_HEADERS *pNtHeaders = (IMAGE_NT_HEADERS*)(pbModule + ((IMAGE_DOS_HEADER*)hOsHandle)->e_lfanew);
+    if (pNtHeaders->OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC)
+        cbModule = ((IMAGE_OPTIONAL_HEADER32*)&pNtHeaders->OptionalHeader)->SizeOfImage;
+    else
+        cbModule = ((IMAGE_OPTIONAL_HEADER64*)&pNtHeaders->OptionalHeader)->SizeOfImage;
+
+    *ppLowerBound = pbModule;
+    *ppUpperBound = pbModule + cbModule - 1;
+}
+
+// Reads through the PE header of the specified module, and returns
+// the module's matching PDB's signature GUID, age, and build path by
+// fishing them out of the last IMAGE_DEBUG_DIRECTORY of type
+// IMAGE_DEBUG_TYPE_CODEVIEW.  Used when sending the ModuleLoad event
+// to help profilers find matching PDBs for loaded modules.
+//
+// Arguments:
+//
+// [in] hOsHandle - OS Handle for module from which to get PDB info
+// [out] pGuidSignature - PDB's signature GUID to be placed here
+// [out] pdwAge - PDB's age to be placed here
+// [out] wszPath - PDB's build path to be placed here
+// [in] cchPath - Number of wide characters allocated in wszPath, including NULL terminator
+//
+// This is a simplification of similar code in desktop CLR's GetCodeViewInfo
+// in eventtrace.cpp.
+REDHAWK_PALEXPORT void REDHAWK_PALAPI PalGetPDBInfo(HANDLE hOsHandle, _Out_ GUID * pGuidSignature, _Out_ UInt32 * pdwAge, _Out_writes_z_(cchPath) WCHAR * wszPath, Int32 cchPath)
+{
+    // Zero-init [out]-params
+    ZeroMemory(pGuidSignature, sizeof(*pGuidSignature));
+    *pdwAge = 0;
+    if (cchPath <= 0)
+        return;
+    wszPath[0] = L'\0';
+
+    BYTE *pbModule = (BYTE*)hOsHandle;
+
+    IMAGE_NT_HEADERS const * pNtHeaders = (IMAGE_NT_HEADERS*)(pbModule + ((IMAGE_DOS_HEADER*)hOsHandle)->e_lfanew);
+    IMAGE_DATA_DIRECTORY const * rgDataDirectory = NULL;
+    if (pNtHeaders->OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC)
+        rgDataDirectory = ((IMAGE_OPTIONAL_HEADER32 const *)&pNtHeaders->OptionalHeader)->DataDirectory;
+    else
+        rgDataDirectory = ((IMAGE_OPTIONAL_HEADER64 const *)&pNtHeaders->OptionalHeader)->DataDirectory;
+
+    IMAGE_DATA_DIRECTORY const * pDebugDataDirectory = &rgDataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG];
+
+    // In Redhawk, modules are loaded as MAPPED, so we don't have to worry about dealing
+    // with FLAT files (with padding missing), so header addresses can be used as is
+    IMAGE_DEBUG_DIRECTORY const *rgDebugEntries = (IMAGE_DEBUG_DIRECTORY const *) (pbModule + pDebugDataDirectory->VirtualAddress);
+    DWORD cbDebugEntries = pDebugDataDirectory->Size;
+    if (cbDebugEntries < sizeof(IMAGE_DEBUG_DIRECTORY))
+        return;
+
+    // Since rgDebugEntries is an array of IMAGE_DEBUG_DIRECTORYs, cbDebugEntries
+    // should be a multiple of sizeof(IMAGE_DEBUG_DIRECTORY).
+    if (cbDebugEntries % sizeof(IMAGE_DEBUG_DIRECTORY) != 0)
+        return;
+
+    // CodeView RSDS debug information -> PDB 7.00
+    struct CV_INFO_PDB70 
+    {
+        DWORD          magic; 
+        GUID           signature;       // unique identifier 
+        DWORD          age;             // an always-incrementing value 
+        _Field_z_ char  path[MAX_PATH];  // zero terminated string with the name of the PDB file 
+    };
+
+    // Temporary storage for a CV_INFO_PDB70 and its size (which could be less than
+    // sizeof(CV_INFO_PDB70); see below).
+    struct PdbInfo
+    {
+        CV_INFO_PDB70 *     m_pPdb70;
+        ULONG               m_cbPdb70;
+    };
+
+    // Grab module bounds so we can do some rough sanity checking before we follow any
+    // RVAs
+    UInt8 * pbModuleLowerBound = NULL;
+    UInt8 * pbModuleUpperBound = NULL;
+    PalGetModuleBounds(hOsHandle, &pbModuleLowerBound, &pbModuleUpperBound);
+
+    // Iterate through all debug directory entries. The convention is that debuggers &
+    // profilers typically just use the very last IMAGE_DEBUG_TYPE_CODEVIEW entry.  Treat raw
+    // bytes we read as untrusted.
+    PdbInfo pdbInfoLast = {0};
+    int cEntries = cbDebugEntries / sizeof(IMAGE_DEBUG_DIRECTORY);
+    for (int i = 0; i < cEntries; i++)
+    {
+        if ((UInt8 *)(&rgDebugEntries[i]) + sizeof(rgDebugEntries[i]) >= pbModuleUpperBound)
+        {
+            // Bogus pointer
+            return;
+        }
+
+        if (rgDebugEntries[i].Type != IMAGE_DEBUG_TYPE_CODEVIEW)
+            continue;
+
+        // Get raw data pointed to by this IMAGE_DEBUG_DIRECTORY
+
+        // AddressOfRawData is generally set properly for Redhawk modules, so we don't
+        // have to worry about using PointerToRawData and converting it to an RVA
+        if (rgDebugEntries[i].AddressOfRawData == NULL)
+            continue;
+
+        DWORD rvaOfRawData = rgDebugEntries[i].AddressOfRawData;
+        ULONG cbDebugData = rgDebugEntries[i].SizeOfData;
+        if (cbDebugData < size_t(&((CV_INFO_PDB70*)0)->magic) + sizeof(((CV_INFO_PDB70*)0)->magic))
+        {
+            // raw data too small to contain magic number at expected spot, so its format
+            // is not recognizable. Skip
+            continue;
+        }
+
+        // Verify the magic number is as expected
+        const DWORD CV_SIGNATURE_RSDS = 0x53445352;
+        CV_INFO_PDB70 * pPdb70 = (CV_INFO_PDB70 *) (pbModule + rvaOfRawData);
+        if ((UInt8 *)(pPdb70) + cbDebugData >= pbModuleUpperBound)
+        {
+            // Bogus pointer
+            return;
+        }
+
+        if (pPdb70->magic != CV_SIGNATURE_RSDS)
+        {
+            // Unrecognized magic number.  Skip
+            continue;
+        }
+
+        // From this point forward, the format should adhere to the expected layout of
+        // CV_INFO_PDB70. If we find otherwise, then assume the IMAGE_DEBUG_DIRECTORY is
+        // outright corrupt.
+
+        // Verify sane size of raw data
+        if (cbDebugData > sizeof(CV_INFO_PDB70))
+            return;
+
+        // cbDebugData actually can be < sizeof(CV_INFO_PDB70), since the "path" field
+        // can be truncated to its actual data length (i.e., fewer than MAX_PATH chars
+        // may be present in the PE file). In some cases, though, cbDebugData will
+        // include all MAX_PATH chars even though path gets null-terminated well before
+        // the MAX_PATH limit.
+        
+        // Gotta have at least one byte of the path
+        if (cbDebugData < offsetof(CV_INFO_PDB70, path) + sizeof(char))
+            return;
+        
+        // How much space is available for the path?
+        size_t cchPathMaxIncludingNullTerminator = (cbDebugData - offsetof(CV_INFO_PDB70, path)) / sizeof(char);
+        ASSERT(cchPathMaxIncludingNullTerminator >= 1);   // Guaranteed above
+
+        // Verify path string fits inside the declared size
+        size_t cchPathActualExcludingNullTerminator = strnlen_s(pPdb70->path, cchPathMaxIncludingNullTerminator);
+        if (cchPathActualExcludingNullTerminator == cchPathMaxIncludingNullTerminator)
+        {
+            // This is how strnlen indicates failure--it couldn't find the null
+            // terminator within the buffer size specified
+            return;
+        }
+
+        // Looks valid.  Remember it.
+        pdbInfoLast.m_pPdb70 = pPdb70;
+        pdbInfoLast.m_cbPdb70 = cbDebugData;
+    }
+
+    // Take the last IMAGE_DEBUG_TYPE_CODEVIEW entry we saw, and return it to the caller
+    if (pdbInfoLast.m_pPdb70 != NULL)
+    {
+        memcpy(pGuidSignature, &pdbInfoLast.m_pPdb70->signature, sizeof(GUID));
+        *pdwAge = pdbInfoLast.m_pPdb70->age;
+
+        // Convert build path from ANSI to UNICODE
+        errno_t ret;
+        size_t cchConverted;
+        ret = mbstowcs_s(
+            &cchConverted,
+            wszPath,
+            cchPath,
+            pdbInfoLast.m_pPdb70->path,
+            _countof(pdbInfoLast.m_pPdb70->path) - 1);
+        if ((ret != 0) && (ret != STRUNCATE))
+        {
+            // PDB path isn't essential.  An empty string will do if we hit an error.
+            ASSERT(cchPath > 0);        // Guaranteed at top of function
+            wszPath[0] = L'\0';
+        }
+    }
+}
+
+REDHAWK_PALEXPORT Int32 REDHAWK_PALAPI PalGetProcessCpuCount()
+{
+    static int CpuCount = 0;
+
+    if (CpuCount != 0)
+        return CpuCount;
+    else
+    {
+        // The concept of process CPU affinity is going away and so CoreSystem obsoletes the APIs used to
+        // fetch this information. Instead we'll just return total cpu count.
+        SYSTEM_INFO sysInfo;
+#ifndef APP_LOCAL_RUNTIME
+        ::GetSystemInfo(&sysInfo);
+#else
+        ::GetNativeSystemInfo(&sysInfo);
+#endif
+        CpuCount = sysInfo.dwNumberOfProcessors;
+        return sysInfo.dwNumberOfProcessors;
+    }
+}
+
+//Reads the entire contents of the file into the specified buffer, buff
+//returns the number of bytes read if the file is successfully read
+//returns 0 if the file is not found, size is greater than maxBytesToRead or the file couldn't be opened or read
+REDHAWK_PALEXPORT UInt32 REDHAWK_PALAPI PalReadFileContents(_In_z_ const TCHAR* fileName, _Out_writes_all_(maxBytesToRead) char* buff, _In_ UInt32 maxBytesToRead)
+{
+    WIN32_FILE_ATTRIBUTE_DATA attrData;
+
+    BOOL getAttrSuccess = GetFileAttributesExW(fileName, GetFileExInfoStandard, &attrData);
+
+    //if we weren't able to get the file attributes, or the file is larger than maxBytesToRead, or the file size is zero
+    if ((!getAttrSuccess) || (attrData.nFileSizeHigh != 0) || (attrData.nFileSizeLow > (DWORD)maxBytesToRead) || (attrData.nFileSizeLow == 0))
+    {
+        return 0;
+    }
+
+    HANDLE hFile = PalCreateFileW(fileName, GENERIC_READ, FILE_SHARE_DELETE | FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+    if (hFile == INVALID_HANDLE_VALUE)
+    {
+        return 0;
+    }
+
+    UInt32 bytesRead;
+
+    BOOL readSuccess = ReadFile(hFile, buff, (DWORD)maxBytesToRead, (DWORD*)&bytesRead, NULL);
+
+    CloseHandle(hFile);
+
+    if (!readSuccess)
+    {
+        return 0;
+    }
+
+    return bytesRead;
+}
+
+
+// Retrieves the entire range of memory dedicated to the calling thread's stack.  This does
+// not get the current dynamic bounds of the stack, which can be significantly smaller than 
+// the maximum bounds.
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalGetMaximumStackBounds(_Out_ void** ppStackLowOut, _Out_ void** ppStackHighOut)
+{
+    // VirtualQuery on the address of a local variable to get the allocation 
+    // base of the stack.  Then use the StackBase field in the TEB to give 
+    // the highest address of the stack region.
+    MEMORY_BASIC_INFORMATION mbi = { 0 };
+    SIZE_T cb = VirtualQuery(&mbi, &mbi, sizeof(mbi));
+    if (cb != sizeof(mbi))
+        return false;
+
+    NT_TIB* pTib = (NT_TIB*)NtCurrentTeb();
+    *ppStackHighOut = pTib->StackBase;      // stack base is the highest address
+    *ppStackLowOut = mbi.AllocationBase;    // allocation base is the lowest address
+    return true;
+}
+
+#if !defined(_INC_WINDOWS) || defined(APP_LOCAL_RUNTIME)
+
+typedef struct _UNICODE_STRING {
+    USHORT Length;
+    USHORT MaximumLength;
+    PWSTR  Buffer;
+} UNICODE_STRING;
+typedef UNICODE_STRING *PUNICODE_STRING;
+typedef const UNICODE_STRING *PCUNICODE_STRING;
+
+typedef struct _PEB_LDR_DATA {
+    BYTE Reserved1[8];
+    PVOID Reserved2[3];
+    LIST_ENTRY InMemoryOrderModuleList;
+} PEB_LDR_DATA, *PPEB_LDR_DATA;
+
+typedef struct _LDR_DATA_TABLE_ENTRY {
+    PVOID Reserved1[2];
+    LIST_ENTRY InMemoryOrderLinks;
+    PVOID Reserved2[2];
+    PVOID DllBase;
+    PVOID Reserved3[2];
+    UNICODE_STRING FullDllName;
+    BYTE Reserved4[8];
+    PVOID Reserved5[3];
+    union {
+        ULONG CheckSum;
+        PVOID Reserved6;
+    } DUMMYUNIONNAME;
+    ULONG TimeDateStamp;
+} LDR_DATA_TABLE_ENTRY, *PLDR_DATA_TABLE_ENTRY;
+
+typedef struct _PEB {
+    BYTE Reserved1[2];
+    BYTE BeingDebugged;
+    BYTE Reserved2[1];
+    PVOID Reserved3[2];
+    PPEB_LDR_DATA Ldr;
+    PVOID /*PRTL_USER_PROCESS_PARAMETERS*/ ProcessParameters;
+    PVOID Reserved4[3];
+    PVOID AtlThunkSListPtr;
+    PVOID Reserved5;
+    ULONG Reserved6;
+    PVOID Reserved7;
+    ULONG Reserved8;
+    ULONG AtlThunkSListPtr32;
+    PVOID Reserved9[45];
+    BYTE Reserved10[96];
+    PVOID /*PPS_POST_PROCESS_INIT_ROUTINE*/ PostProcessInitRoutine;
+    BYTE Reserved11[128];
+    PVOID Reserved12[1];
+    ULONG SessionId;
+} PEB, *PPEB;
+
+typedef struct _TEB {
+    PVOID Reserved1[12];
+    PPEB ProcessEnvironmentBlock;
+    PVOID Reserved2[399];
+    BYTE Reserved3[1952];
+    PVOID TlsSlots[64];
+    BYTE Reserved4[8];
+    PVOID Reserved5[26];
+    PVOID ReservedForOle;  // Windows 2000 only
+    PVOID Reserved6[4];
+    PVOID TlsExpansionSlots;
+} TEB, *PTEB;
+
+#endif // !defined(_INC_WINDOWS) || defined(APP_LOCAL_RUNTIME)
+
+// retrieves the full path to the specified module, if moduleBase is NULL retreieves the full path to the 
+// executable module of the current process.
+//
+// Return value:  number of characters in name string
+//
+//NOTE:  This implementation exists because calling GetModuleFileName is not wack compliant.  if we later decide
+//       that the framework package containing mrt100_app no longer needs to be wack compliant, this should be 
+//       removed and the windows implementation of GetModuleFileName should be substitued on windows.
+REDHAWK_PALEXPORT Int32 PalGetModuleFileName(_Out_ const TCHAR** pModuleNameOut, HANDLE moduleBase)
+{
+    TEB* pTEB = NtCurrentTeb();
+    LIST_ENTRY* pStartLink = &(pTEB->ProcessEnvironmentBlock->Ldr->InMemoryOrderModuleList);
+    LIST_ENTRY* pCurLink = pStartLink->Flink;
+
+    do
+    {
+        LDR_DATA_TABLE_ENTRY* pEntry = CONTAINING_RECORD(pCurLink, LDR_DATA_TABLE_ENTRY, InMemoryOrderLinks);
+
+        //null moduleBase will result in the first module being returned 
+        //since the module list is ordered this is the executable module of the current process
+        if ((pEntry->DllBase == moduleBase) || (moduleBase == NULL))
+        {
+            *pModuleNameOut = pEntry->FullDllName.Buffer;
+            return pEntry->FullDllName.Length / 2;
+        }
+        pCurLink = pCurLink->Flink;
+    }
+    while (pCurLink != pStartLink);
+
+    *pModuleNameOut = NULL;
+    return 0;
+}
+
+REDHAWK_PALEXPORT UInt64 __cdecl PalGetTickCount64()
+{
+    return GetTickCount64();
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkInline.h b/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkInline.h
new file mode 100644
index 0000000000000..cd9535f4731d0
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkInline.h
@@ -0,0 +1,157 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// Implementation of Redhawk PAL inline functions
+
+EXTERN_C long __cdecl _InterlockedIncrement(long volatile *);
+#pragma intrinsic(_InterlockedIncrement)
+FORCEINLINE Int32 PalInterlockedIncrement(_Inout_ _Interlocked_operand_ Int32 volatile *pDst)
+{
+    return _InterlockedIncrement((long volatile *)pDst);
+}
+
+EXTERN_C long __cdecl _InterlockedDecrement(long volatile *);
+#pragma intrinsic(_InterlockedDecrement)
+FORCEINLINE Int32 PalInterlockedDecrement(_Inout_ _Interlocked_operand_ Int32 volatile *pDst)
+{
+    return _InterlockedDecrement((long volatile *)pDst);
+}
+
+EXTERN_C long _InterlockedOr(long volatile *, long);
+#pragma intrinsic(_InterlockedOr)
+FORCEINLINE UInt32 PalInterlockedOr(_Inout_ _Interlocked_operand_ UInt32 volatile *pDst, UInt32 iValue)
+{
+    return _InterlockedOr((long volatile *)pDst, iValue);
+}
+
+EXTERN_C long _InterlockedAnd(long volatile *, long);
+#pragma intrinsic(_InterlockedAnd)
+FORCEINLINE UInt32 PalInterlockedAnd(_Inout_ _Interlocked_operand_ UInt32 volatile *pDst, UInt32 iValue)
+{
+    return _InterlockedAnd((long volatile *)pDst, iValue);
+}
+
+EXTERN_C long __PN__MACHINECALL_CDECL_OR_DEFAULT _InterlockedExchange(long volatile *, long);
+#pragma intrinsic(_InterlockedExchange)
+FORCEINLINE Int32 PalInterlockedExchange(_Inout_ _Interlocked_operand_ Int32 volatile *pDst, Int32 iValue)
+{
+    return _InterlockedExchange((long volatile *)pDst, iValue);
+}
+
+EXTERN_C long __PN__MACHINECALL_CDECL_OR_DEFAULT _InterlockedCompareExchange(long volatile *, long, long);
+#pragma intrinsic(_InterlockedCompareExchange)
+FORCEINLINE Int32 PalInterlockedCompareExchange(_Inout_ _Interlocked_operand_ Int32 volatile *pDst, Int32 iValue, Int32 iComparand)
+{
+    return _InterlockedCompareExchange((long volatile *)pDst, iValue, iComparand);
+}
+
+EXTERN_C Int64 _InterlockedCompareExchange64(Int64 volatile *, Int64, Int64);
+#pragma intrinsic(_InterlockedCompareExchange64)
+FORCEINLINE Int64 PalInterlockedCompareExchange64(_Inout_ _Interlocked_operand_ Int64 volatile *pDst, Int64 iValue, Int64 iComparand)
+{
+    return _InterlockedCompareExchange64(pDst, iValue, iComparand);
+}
+
+#if defined(HOST_AMD64) || defined(HOST_ARM64)
+EXTERN_C UInt8 _InterlockedCompareExchange128(Int64 volatile *, Int64, Int64, Int64 *);
+#pragma intrinsic(_InterlockedCompareExchange128)
+FORCEINLINE UInt8 PalInterlockedCompareExchange128(_Inout_ _Interlocked_operand_ Int64 volatile *pDst, Int64 iValueHigh, Int64 iValueLow, Int64 *pComparandAndResult)
+{
+    return _InterlockedCompareExchange128(pDst, iValueHigh, iValueLow, pComparandAndResult);
+}
+#endif // HOST_AMD64
+
+#ifdef HOST_64BIT
+
+EXTERN_C void * _InterlockedExchangePointer(void * volatile *, void *);
+#pragma intrinsic(_InterlockedExchangePointer)
+FORCEINLINE void * PalInterlockedExchangePointer(_Inout_ _Interlocked_operand_ void * volatile *pDst, _In_ void *pValue)
+{
+    return _InterlockedExchangePointer((void * volatile *)pDst, pValue);
+}
+
+EXTERN_C void * _InterlockedCompareExchangePointer(void * volatile *, void *, void *);
+#pragma intrinsic(_InterlockedCompareExchangePointer)
+FORCEINLINE void * PalInterlockedCompareExchangePointer(_Inout_ _Interlocked_operand_ void * volatile *pDst, _In_ void *pValue, _In_ void *pComparand)
+{
+    return _InterlockedCompareExchangePointer((void * volatile *)pDst, pValue, pComparand);
+}
+
+#else // HOST_64BIT
+
+#define PalInterlockedExchangePointer(_pDst, _pValue) \
+    ((void *)_InterlockedExchange((long volatile *)(_pDst), (long)(size_t)(_pValue)))
+
+#define PalInterlockedCompareExchangePointer(_pDst, _pValue, _pComparand) \
+    ((void *)_InterlockedCompareExchange((long volatile *)(_pDst), (long)(size_t)(_pValue), (long)(size_t)(_pComparand)))
+
+#endif // HOST_64BIT
+
+EXTERN_C __declspec(dllimport) unsigned long __stdcall GetLastError();
+FORCEINLINE int PalGetLastError()
+{
+    return (int)GetLastError();
+}
+
+EXTERN_C __declspec(dllimport) void  __stdcall SetLastError(unsigned long error);
+FORCEINLINE void PalSetLastError(int error)
+{
+    SetLastError((unsigned long)error);
+}
+
+#if defined(HOST_X86)
+
+EXTERN_C void _mm_pause();
+#pragma intrinsic(_mm_pause)
+#define PalYieldProcessor() _mm_pause()
+
+FORCEINLINE void PalMemoryBarrier()
+{
+    long Barrier;
+    _InterlockedOr(&Barrier, 0);
+}
+
+#elif defined(HOST_AMD64)
+
+EXTERN_C void _mm_pause();
+#pragma intrinsic(_mm_pause)
+#define PalYieldProcessor() _mm_pause()
+
+EXTERN_C void __faststorefence();
+#pragma intrinsic(__faststorefence)
+#define PalMemoryBarrier() __faststorefence()
+
+
+#elif defined(HOST_ARM)
+
+EXTERN_C void __yield(void);
+#pragma intrinsic(__yield)
+EXTERN_C void __dmb(unsigned int _Type);
+#pragma intrinsic(__dmb)
+FORCEINLINE void PalYieldProcessor()
+{
+    __dmb(0xA /* _ARM_BARRIER_ISHST */);
+    __yield();
+}
+
+#define PalMemoryBarrier() __dmb(0xF /* _ARM_BARRIER_SY */)
+
+#elif defined(HOST_ARM64)
+
+EXTERN_C void __yield(void);
+#pragma intrinsic(__yield)
+EXTERN_C void __dmb(unsigned int _Type);
+#pragma intrinsic(__dmb)
+FORCEINLINE void PalYieldProcessor()
+{
+    __dmb(0xA /* _ARM64_BARRIER_ISHST */);
+    __yield();
+}
+
+#define PalMemoryBarrier() __dmb(0xF /* _ARM64_BARRIER_SY */)
+
+#else
+#error Unsupported architecture
+#endif
+
+#define PalDebugBreak() __debugbreak()
diff --git a/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp
new file mode 100644
index 0000000000000..e9fd0e8d72fc6
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp
@@ -0,0 +1,496 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+//
+// Implementation of the Redhawk Platform Abstraction Layer (PAL) library when MinWin is the platform. In this
+// case most or all of the import requirements which Redhawk has can be satisfied via a forwarding export to
+// some native MinWin library. Therefore most of the work is done in the .def file and there is very little
+// code here.
+//
+// Note that in general we don't want to assume that Windows and Redhawk global definitions can co-exist.
+// Since this code must include Windows headers to do its job we can't therefore safely include general
+// Redhawk header files.
+//
+#include "common.h"
+#include <windows.h>
+#include <stdio.h>
+#include <errno.h>
+#include <evntprov.h>
+
+#include "holder.h"
+
+#define PalRaiseFailFastException RaiseFailFastException
+
+uint32_t PalEventWrite(REGHANDLE arg1, const EVENT_DESCRIPTOR * arg2, uint32_t arg3, EVENT_DATA_DESCRIPTOR * arg4)
+{
+    return EventWrite(arg1, arg2, arg3, arg4);
+}
+
+#include "gcenv.h"
+
+
+#define REDHAWK_PALEXPORT extern "C"
+#define REDHAWK_PALAPI __stdcall
+
+// Index for the fiber local storage of the attached thread pointer
+static UInt32 g_flsIndex = FLS_OUT_OF_INDEXES;
+
+static DWORD g_dwPALCapabilities;
+
+GCSystemInfo g_RhSystemInfo;
+
+bool InitializeSystemInfo()
+{
+    SYSTEM_INFO systemInfo;
+    GetSystemInfo(&systemInfo);
+
+    g_RhSystemInfo.dwNumberOfProcessors = systemInfo.dwNumberOfProcessors;
+    g_RhSystemInfo.dwPageSize = systemInfo.dwPageSize;
+    g_RhSystemInfo.dwAllocationGranularity = systemInfo.dwAllocationGranularity;
+
+    return true;
+}
+
+// This is called when each *fiber* is destroyed. When the home fiber of a thread is destroyed,
+// it means that the thread itself is destroyed.
+// Since we receive that notification outside of the Loader Lock, it allows us to safely acquire
+// the ThreadStore lock in the RuntimeThreadShutdown.
+void __stdcall FiberDetachCallback(void* lpFlsData)
+{
+    ASSERT(g_flsIndex != FLS_OUT_OF_INDEXES);
+    ASSERT(lpFlsData == FlsGetValue(g_flsIndex));
+
+    if (lpFlsData != NULL)
+    {
+        // The current fiber is the home fiber of a thread, so the thread is shutting down
+        RuntimeThreadShutdown(lpFlsData);
+    }
+}
+
+// The Redhawk PAL must be initialized before any of its exports can be called. Returns true for a successful
+// initialization and false on failure.
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalInit()
+{
+    g_dwPALCapabilities = WriteWatchCapability | LowMemoryNotificationCapability;
+
+    // We use fiber detach callbacks to run our thread shutdown code because the fiber detach
+    // callback is made without the OS loader lock
+    g_flsIndex = FlsAlloc(FiberDetachCallback);
+    if (g_flsIndex == FLS_OUT_OF_INDEXES)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+// Given a mask of capabilities return true if all of them are supported by the current PAL.
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalHasCapability(PalCapability capability)
+{
+    return (g_dwPALCapabilities & (DWORD)capability) == (DWORD)capability;
+}
+
+// Attach thread to PAL. 
+// It can be called multiple times for the same thread.
+// It fails fast if a different thread was already registered with the current fiber
+// or if the thread was already registered with a different fiber.
+// Parameters:
+//  thread        - thread to attach
+REDHAWK_PALEXPORT void REDHAWK_PALAPI PalAttachThread(void* thread)
+{
+    void* threadFromCurrentFiber = FlsGetValue(g_flsIndex);
+
+    if (threadFromCurrentFiber != NULL)
+    {
+        ASSERT_UNCONDITIONALLY("Multiple threads encountered from a single fiber");
+        RhFailFast();
+    }
+
+    // Associate the current fiber with the current thread.  This makes the current fiber the thread's "home"
+    // fiber.  This fiber is the only fiber allowed to execute managed code on this thread.  When this fiber
+    // is destroyed, we consider the thread to be destroyed.
+    FlsSetValue(g_flsIndex, thread);
+}
+
+// Detach thread from PAL.
+// It fails fast if some other thread value was attached to PAL.
+// Parameters:
+//  thread        - thread to detach
+// Return:
+//  true if the thread was detached, false if there was no attached thread
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalDetachThread(void* thread)
+{
+    ASSERT(g_flsIndex != FLS_OUT_OF_INDEXES);
+    void* threadFromCurrentFiber = FlsGetValue(g_flsIndex);
+
+    if (threadFromCurrentFiber == NULL)
+    {
+        // we've seen this thread, but not this fiber.  It must be a "foreign" fiber that was 
+        // borrowing this thread.
+        return false;
+    }
+
+    if (threadFromCurrentFiber != thread)
+    {
+        ASSERT_UNCONDITIONALLY("Detaching a thread from the wrong fiber");
+        RhFailFast();
+    }
+
+    FlsSetValue(g_flsIndex, NULL);
+    return true;
+}
+
+extern "C" UInt64 PalGetCurrentThreadIdForLogging()
+{
+    return GetCurrentThreadId();
+}
+
+#if !defined(USE_PORTABLE_HELPERS) && !defined(FEATURE_RX_THUNKS)
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(_In_ HANDLE hTemplateModule, UInt32 templateRva, size_t templateSize, _Outptr_result_bytebuffer_(templateSize) void** newThunksOut)
+{
+#ifdef XBOX_ONE
+    return E_NOTIMPL;
+#else
+    BOOL success = FALSE;
+    HANDLE hMap = NULL, hFile = INVALID_HANDLE_VALUE;
+
+    const WCHAR * wszModuleFileName = NULL;
+    if (PalGetModuleFileName(&wszModuleFileName, hTemplateModule) == 0 || wszModuleFileName == NULL)
+        return FALSE;
+
+    hFile = CreateFileW(wszModuleFileName, GENERIC_READ | GENERIC_EXECUTE, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+    if (hFile == INVALID_HANDLE_VALUE)
+        goto cleanup;
+
+    hMap = CreateFileMapping(hFile, NULL, SEC_IMAGE | PAGE_READONLY, 0, 0, NULL);
+    if (hMap == NULL)
+        goto cleanup;
+
+    *newThunksOut = MapViewOfFile(hMap, 0, 0, templateRva, templateSize);
+    success = ((*newThunksOut) != NULL);
+
+cleanup:
+    CloseHandle(hMap);
+    CloseHandle(hFile);
+
+    return success;
+#endif
+}
+
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalFreeThunksFromTemplate(_In_ void *pBaseAddress)
+{
+#ifdef XBOX_ONE
+    return TRUE;
+#else 
+    return UnmapViewOfFile(pBaseAddress);
+#endif    
+}
+#endif // !USE_PORTABLE_HELPERS && !FEATURE_RX_THUNKS
+
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalMarkThunksAsValidCallTargets(
+    void *virtualAddress,
+    int thunkSize,
+    int thunksPerBlock,
+    int thunkBlockSize,
+    int thunkBlocksPerMapping)
+{
+    // For CoreRT we are using RWX pages so there is no need for this API for now.
+    // Once we have a scenario for non-RWX pages we should be able to put the implementation here
+    return TRUE;
+}
+
+REDHAWK_PALEXPORT UInt32 REDHAWK_PALAPI PalCompatibleWaitAny(UInt32_BOOL alertable, UInt32 timeout, UInt32 handleCount, HANDLE* pHandles, UInt32_BOOL allowReentrantWait)
+{
+    if (!allowReentrantWait)
+    {
+        return WaitForMultipleObjectsEx(handleCount, pHandles, FALSE, timeout, alertable);
+    }
+    else
+    {
+        DWORD index;
+        SetLastError(ERROR_SUCCESS); // recommended by MSDN.
+        HRESULT hr = CoWaitForMultipleHandles(alertable ? COWAIT_ALERTABLE : 0, timeout, handleCount, pHandles, &index);
+
+        switch (hr)
+        {
+        case S_OK:
+            return index;
+
+        case RPC_S_CALLPENDING:
+            return WAIT_TIMEOUT;
+
+        default:
+            SetLastError(HRESULT_CODE(hr));
+            return WAIT_FAILED;
+        }
+    }
+}
+
+REDHAWK_PALEXPORT void REDHAWK_PALAPI PalSleep(UInt32 milliseconds)
+{
+    return Sleep(milliseconds);
+}
+
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalSwitchToThread()
+{
+    return SwitchToThread();
+}
+
+REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ LPCWSTR pName)
+{
+    return CreateEventW(pEventAttributes, manualReset, initialState, pName);
+}
+
+REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalGetThreadContext(HANDLE hThread, _Out_ PAL_LIMITED_CONTEXT * pCtx)
+{
+    CONTEXT win32ctx;
+
+    win32ctx.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_EXCEPTION_REQUEST;
+
+    if (!GetThreadContext(hThread, &win32ctx))
+        return false;
+
+    // The CONTEXT_SERVICE_ACTIVE and CONTEXT_EXCEPTION_ACTIVE output flags indicate we suspended the thread
+    // at a point where the kernel cannot guarantee a completely accurate context. We'll fail the request in
+    // this case (which should force our caller to resume the thread and try again -- since this is a fairly
+    // narrow window we're highly likely to succeed next time).
+    // Note: in some cases (x86 WOW64, ARM32 on ARM64) the OS will not set the CONTEXT_EXCEPTION_REPORTING flag
+    // if the thread is executing in kernel mode (i.e. in the middle of a syscall or exception handling).
+    // Therefore, we should treat the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that
+    // it is not safe to manipulate with the current state of the thread context.
+    if ((win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) == 0 ||
+        (win32ctx.ContextFlags & (CONTEXT_SERVICE_ACTIVE | CONTEXT_EXCEPTION_ACTIVE)))
+        return false;
+
+#ifdef HOST_X86
+    pCtx->IP = win32ctx.Eip;
+    pCtx->Rsp = win32ctx.Esp;
+    pCtx->Rbp = win32ctx.Ebp;
+    pCtx->Rdi = win32ctx.Edi;
+    pCtx->Rsi = win32ctx.Esi;
+    pCtx->Rax = win32ctx.Eax;
+    pCtx->Rbx = win32ctx.Ebx;
+#elif defined(HOST_AMD64)
+    pCtx->IP = win32ctx.Rip;
+    pCtx->Rsp = win32ctx.Rsp;
+    pCtx->Rbp = win32ctx.Rbp;
+    pCtx->Rdi = win32ctx.Rdi;
+    pCtx->Rsi = win32ctx.Rsi;
+    pCtx->Rax = win32ctx.Rax;
+    pCtx->Rbx = win32ctx.Rbx;
+    pCtx->R12 = win32ctx.R12;
+    pCtx->R13 = win32ctx.R13;
+    pCtx->R14 = win32ctx.R14;
+    pCtx->R15 = win32ctx.R15;
+#elif defined(HOST_ARM)
+    pCtx->IP = win32ctx.Pc;
+    pCtx->R0 = win32ctx.R0;
+    pCtx->R4 = win32ctx.R4;
+    pCtx->R5 = win32ctx.R5;
+    pCtx->R6 = win32ctx.R6;
+    pCtx->R7 = win32ctx.R7;
+    pCtx->R8 = win32ctx.R8;
+    pCtx->R9 = win32ctx.R9;
+    pCtx->R10 = win32ctx.R10;
+    pCtx->R11 = win32ctx.R11;
+    pCtx->SP = win32ctx.Sp;
+    pCtx->LR = win32ctx.Lr;
+#elif defined(HOST_ARM64)
+    pCtx->IP = win32ctx.Pc;
+    pCtx->X0 = win32ctx.X0;
+    pCtx->X1 = win32ctx.X1;
+    // TODO: Copy X2-X7 when we start supporting HVA's
+    pCtx->X19 = win32ctx.X19;
+    pCtx->X20 = win32ctx.X20;
+    pCtx->X21 = win32ctx.X21;
+    pCtx->X22 = win32ctx.X22;
+    pCtx->X23 = win32ctx.X23;
+    pCtx->X24 = win32ctx.X24;
+    pCtx->X25 = win32ctx.X25;
+    pCtx->X26 = win32ctx.X26;
+    pCtx->X27 = win32ctx.X27;
+    pCtx->X28 = win32ctx.X28;
+    pCtx->SP = win32ctx.Sp;
+    pCtx->LR = win32ctx.Lr;
+    pCtx->FP = win32ctx.Fp;
+#else
+#error Unsupported platform
+#endif
+    return true;
+}
+
+
+REDHAWK_PALEXPORT UInt32 REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_ PalHijackCallback callback, _In_opt_ void* pCallbackContext)
+{
+    if (hThread == INVALID_HANDLE_VALUE)
+    {
+        return (UInt32)E_INVALIDARG;
+    }
+
+    if (SuspendThread(hThread) == (DWORD)-1)
+    {
+        return HRESULT_FROM_WIN32(GetLastError());
+    }
+
+    PAL_LIMITED_CONTEXT ctx;
+    HRESULT result;
+    if (!PalGetThreadContext(hThread, &ctx))
+    {
+        result = HRESULT_FROM_WIN32(GetLastError());
+    }
+    else
+    {
+        result = callback(hThread, &ctx, pCallbackContext) ? S_OK : E_FAIL;
+    }
+
+    ResumeThread(hThread);
+
+    return result;
+}
+
+REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalStartBackgroundWork(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext, BOOL highPriority)
+{
+    HANDLE hThread = CreateThread(
+        NULL,
+        0,
+        (LPTHREAD_START_ROUTINE)callback,
+        pCallbackContext,
+        highPriority ? CREATE_SUSPENDED : 0,
+        NULL);
+
+    if (hThread == NULL)
+        return NULL;
+
+    if (highPriority)
+    {
+        SetThreadPriority(hThread, THREAD_PRIORITY_HIGHEST);
+        ResumeThread(hThread);
+    }
+
+    return hThread;
+}
+
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext)
+{
+    return PalStartBackgroundWork(callback, pCallbackContext, FALSE) != NULL;
+}
+
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext)
+{
+    return PalStartBackgroundWork(callback, pCallbackContext, TRUE) != NULL;
+}
+
+REDHAWK_PALEXPORT UInt32 REDHAWK_PALAPI PalGetTickCount()
+{
+#pragma warning(push)
+#pragma warning(disable: 28159) // Consider GetTickCount64 instead
+    return GetTickCount();
+#pragma warning(pop)
+}
+
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalEventEnabled(REGHANDLE regHandle, _In_ const EVENT_DESCRIPTOR* eventDescriptor)
+{
+    return !!EventEnabled(regHandle, eventDescriptor);
+}
+
+REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateFileW(
+    _In_z_ LPCWSTR pFileName, 
+    uint32_t desiredAccess, 
+    uint32_t shareMode, 
+    _In_opt_ void* pSecurityAttributes, 
+    uint32_t creationDisposition, 
+    uint32_t flagsAndAttributes, 
+    HANDLE hTemplateFile)
+{
+    return CreateFileW(pFileName, desiredAccess, shareMode, (LPSECURITY_ATTRIBUTES)pSecurityAttributes, 
+                       creationDisposition, flagsAndAttributes, hTemplateFile);
+}
+
+REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateLowMemoryNotification()
+{
+    return CreateMemoryResourceNotification(LowMemoryResourceNotification);
+}
+
+REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer)
+{
+    // CoreRT is not designed to be unloadable today. Use GET_MODULE_HANDLE_EX_FLAG_PIN to prevent
+    // the module from ever unloading.
+
+    HMODULE module;
+    if (!GetModuleHandleExW(
+        GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_PIN,
+        (LPCWSTR)pointer,
+        &module))
+    {
+        return NULL;
+    }
+
+    return (HANDLE)module;
+}
+
+REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvxEnabled()
+{
+    typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)();
+    PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL;
+
+    HMODULE hMod = LoadLibraryExW(L"kernel32", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
+    if (hMod == NULL)
+        return FALSE;
+
+    pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures");
+
+    if (pfnGetEnabledXStateFeatures == NULL)
+    {
+        return FALSE;
+    }
+
+    DWORD64 FeatureMask = pfnGetEnabledXStateFeatures();
+    if ((FeatureMask & XSTATE_MASK_AVX) == 0)
+    {
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+REDHAWK_PALEXPORT void* REDHAWK_PALAPI PalAddVectoredExceptionHandler(UInt32 firstHandler, _In_ PVECTORED_EXCEPTION_HANDLER vectoredHandler)
+{
+    return AddVectoredExceptionHandler(firstHandler, vectoredHandler);
+}
+
+REDHAWK_PALEXPORT void PalPrintFatalError(const char* message)
+{
+    // Write the message using lowest-level OS API available. This is used to print the stack overflow
+    // message, so there is not much that can be done here.
+    DWORD dwBytesWritten;
+    WriteFile(GetStdHandle(STD_ERROR_HANDLE), message, (DWORD)strlen(message), &dwBytesWritten, NULL);
+}
+
+REDHAWK_PALEXPORT _Ret_maybenull_ _Post_writable_byte_size_(size) void* REDHAWK_PALAPI PalVirtualAlloc(_In_opt_ void* pAddress, UIntNative size, UInt32 allocationType, UInt32 protect)
+{
+    return VirtualAlloc(pAddress, size, allocationType, protect);
+}
+
+#pragma warning (push)
+#pragma warning (disable:28160) // warnings about invalid potential parameter combinations that would cause VirtualFree to fail - those are asserted for below
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualFree(_In_ void* pAddress, UIntNative size, UInt32 freeType)
+{
+    assert(((freeType & MEM_RELEASE) != MEM_RELEASE) || size == 0);
+    assert((freeType & (MEM_RELEASE | MEM_DECOMMIT)) != (MEM_RELEASE | MEM_DECOMMIT));
+    assert(freeType != 0);
+
+    return VirtualFree(pAddress, size, freeType);
+}
+#pragma warning (pop)
+
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualProtect(_In_ void* pAddress, UIntNative size, UInt32 protect)
+{
+    DWORD oldProtect;
+    return VirtualProtect(pAddress, size, protect, &oldProtect);
+}
+
+REDHAWK_PALEXPORT _Ret_maybenull_ void* REDHAWK_PALAPI PalSetWerDataBuffer(_In_ void* pNewBuffer)
+{
+    static void* pBuffer;
+    return InterlockedExchangePointer(&pBuffer, pNewBuffer);
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/yieldprocessornormalized.cpp b/src/coreclr/src/nativeaot/Runtime/yieldprocessornormalized.cpp
new file mode 100644
index 0000000000000..83fd70bd49dcb
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/yieldprocessornormalized.cpp
@@ -0,0 +1,120 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "common.h"
+#include "gcenv.h"
+#include "gcheaputilities.h"
+#include "CommonTypes.h"
+#include "CommonMacros.h"
+#include "daccess.h"
+#include "DebugMacrosExt.h"
+#include "PalRedhawkCommon.h"
+#include "PalRedhawk.h"
+#include "rhassert.h"
+#include "slist.h"
+#include "volatile.h"
+#include "yieldprocessornormalized.h"
+
+#define ULONGLONG int64_t
+
+static Volatile<bool> s_isYieldProcessorNormalizedInitialized = false;
+static CrstStatic s_initializeYieldProcessorNormalizedCrst;
+
+// Defaults are for when InitializeYieldProcessorNormalized has not yet been called or when no measurement is done, and are
+// tuned for Skylake processors
+unsigned int g_yieldsPerNormalizedYield = 1; // current value is for Skylake processors, this is expected to be ~8 for pre-Skylake
+unsigned int g_optimalMaxNormalizedYieldsPerSpinIteration = 7;
+
+void InitializeYieldProcessorNormalizedCrst()
+{
+    WRAPPER_NO_CONTRACT;
+    s_initializeYieldProcessorNormalizedCrst.Init(CrstYieldProcessorNormalized);
+}
+
+static void InitializeYieldProcessorNormalized()
+{
+    WRAPPER_NO_CONTRACT;
+
+    CrstHolder lock(&s_initializeYieldProcessorNormalizedCrst);
+
+    if (s_isYieldProcessorNormalizedInitialized)
+    {
+        return;
+    }
+
+    // Intel pre-Skylake processor: measured typically 14-17 cycles per yield
+    // Intel post-Skylake processor: measured typically 125-150 cycles per yield
+    const int MeasureDurationMs = 10;
+    const int NsPerSecond = 1000 * 1000 * 1000;
+
+    LARGE_INTEGER li;
+    if (!PalQueryPerformanceFrequency(&li) || (ULONGLONG)li.QuadPart < 1000 / MeasureDurationMs)
+    {
+        // High precision clock not available or clock resolution is too low, resort to defaults
+        s_isYieldProcessorNormalizedInitialized = true;
+        return;
+    }
+    ULONGLONG ticksPerSecond = li.QuadPart;
+
+    // Measure the nanosecond delay per yield
+    ULONGLONG measureDurationTicks = ticksPerSecond / (1000 / MeasureDurationMs);
+    unsigned int yieldCount = 0;
+    PalQueryPerformanceCounter(&li);
+    ULONGLONG startTicks = li.QuadPart;
+    ULONGLONG elapsedTicks;
+    do
+    {
+        // On some systems, querying the high performance counter has relatively significant overhead. Do enough yields to mask
+        // the timing overhead. Assuming one yield has a delay of MinNsPerNormalizedYield, 1000 yields would have a delay in the
+        // low microsecond range.
+        for (int i = 0; i < 1000; ++i)
+        {
+            System_YieldProcessor();
+        }
+        yieldCount += 1000;
+
+        PalQueryPerformanceCounter(&li);
+        ULONGLONG nowTicks = li.QuadPart;
+        elapsedTicks = nowTicks - startTicks;
+    } while (elapsedTicks < measureDurationTicks);
+    double nsPerYield = (double)elapsedTicks * NsPerSecond / ((double)yieldCount * ticksPerSecond);
+    if (nsPerYield < 1)
+    {
+        nsPerYield = 1;
+    }
+
+    // Calculate the number of yields required to span the duration of a normalized yield. Since nsPerYield is at least 1, this
+    // value is naturally limited to MinNsPerNormalizedYield.
+    int yieldsPerNormalizedYield = (int)(MinNsPerNormalizedYield / nsPerYield + 0.5);
+    if (yieldsPerNormalizedYield < 1)
+    {
+        yieldsPerNormalizedYield = 1;
+    }
+    _ASSERTE(yieldsPerNormalizedYield <= (int)MinNsPerNormalizedYield);
+
+    // Calculate the maximum number of yields that would be optimal for a late spin iteration. Typically, we would not want to
+    // spend excessive amounts of time (thousands of cycles) doing only YieldProcessor, as SwitchToThread/Sleep would do a
+    // better job of allowing other work to run.
+    int optimalMaxNormalizedYieldsPerSpinIteration =
+        (int)(NsPerOptimalMaxSpinIterationDuration / (yieldsPerNormalizedYield * nsPerYield) + 0.5);
+    if (optimalMaxNormalizedYieldsPerSpinIteration < 1)
+    {
+        optimalMaxNormalizedYieldsPerSpinIteration = 1;
+    }
+
+    g_yieldsPerNormalizedYield = yieldsPerNormalizedYield;
+    g_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration;
+    s_isYieldProcessorNormalizedInitialized = true;
+
+    GCHeapUtilities::GetGCHeap()->SetYieldProcessorScalingFactor((float)yieldsPerNormalizedYield);
+}
+
+void EnsureYieldProcessorNormalizedInitialized()
+{
+    WRAPPER_NO_CONTRACT;
+
+    if (!s_isYieldProcessorNormalizedInitialized)
+    {
+        InitializeYieldProcessorNormalized();
+    }
+}
diff --git a/src/coreclr/src/nativeaot/Runtime/yieldprocessornormalized.h b/src/coreclr/src/nativeaot/Runtime/yieldprocessornormalized.h
new file mode 100644
index 0000000000000..405e991de07ad
--- /dev/null
+++ b/src/coreclr/src/nativeaot/Runtime/yieldprocessornormalized.h
@@ -0,0 +1,229 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#pragma once
+
+#include <limits.h>
+
+// Undefine YieldProcessor to encourage using the normalized versions below instead. System_YieldProcessor() can be used where
+// the intention is to use the system-default implementation of YieldProcessor().
+#define HAS_SYSTEM_YIELDPROCESSOR
+FORCEINLINE void System_YieldProcessor() { PalYieldProcessor(); }
+#ifdef YieldProcessor
+#undef YieldProcessor
+#endif
+#define YieldProcessor Dont_Use_YieldProcessor
+#ifdef PalYieldProcessor
+#undef PalYieldProcessor
+#endif
+#define PalYieldProcessor Dont_Use_PalYieldProcessor
+
+#define SIZE_T UIntNative
+
+const unsigned int MinNsPerNormalizedYield = 37; // measured typically 37-46 on post-Skylake
+const unsigned int NsPerOptimalMaxSpinIterationDuration = 272; // approx. 900 cycles, measured 281 on pre-Skylake, 263 on post-Skylake
+
+extern unsigned int g_yieldsPerNormalizedYield;
+extern unsigned int g_optimalMaxNormalizedYieldsPerSpinIteration;
+
+void InitializeYieldProcessorNormalizedCrst();
+void EnsureYieldProcessorNormalizedInitialized();
+
+class YieldProcessorNormalizationInfo
+{
+private:
+    unsigned int yieldsPerNormalizedYield;
+    unsigned int optimalMaxNormalizedYieldsPerSpinIteration;
+    unsigned int optimalMaxYieldsPerSpinIteration;
+
+public:
+    YieldProcessorNormalizationInfo()
+        : yieldsPerNormalizedYield(g_yieldsPerNormalizedYield),
+        optimalMaxNormalizedYieldsPerSpinIteration(g_optimalMaxNormalizedYieldsPerSpinIteration),
+        optimalMaxYieldsPerSpinIteration(yieldsPerNormalizedYield * optimalMaxNormalizedYieldsPerSpinIteration)
+    {
+    }
+
+    friend void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &);
+    friend void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &, unsigned int);
+    friend void YieldProcessorNormalizedForPreSkylakeCount(const YieldProcessorNormalizationInfo &, unsigned int);
+    friend void YieldProcessorWithBackOffNormalized(const YieldProcessorNormalizationInfo &, unsigned int);
+};
+
+// See YieldProcessorNormalized() for preliminary info. Typical usage:
+//     if (!condition)
+//     {
+//         YieldProcessorNormalizationInfo normalizationInfo;
+//         do
+//         {
+//             YieldProcessorNormalized(normalizationInfo);
+//         } while (!condition);
+//     }
+FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo)
+{
+    unsigned int n = normalizationInfo.yieldsPerNormalizedYield;
+    _ASSERTE(n != 0);
+    do
+    {
+        System_YieldProcessor();
+    } while (--n != 0);
+}
+
+// Delays execution of the current thread for a short duration. Unlike YieldProcessor(), an effort is made to normalize the
+// delay across processors. The actual delay may be meaningful in several ways, including but not limited to the following:
+//   - The delay should be long enough that a tiny spin-wait like the following has a decent likelihood of observing a new value
+//     for the condition (when changed by a different thread) on each iteration, otherwise it may unnecessary increase CPU usage
+//     and decrease scalability of the operation.
+//         while(!condition)
+//         {
+//             YieldProcessorNormalized();
+//         }
+//   - The delay should be short enough that a tiny spin-wait like above would not miss multiple cross-thread changes to the
+//     condition, otherwise it may unnecessarily increase latency of the operation
+//   - In reasonably short spin-waits, the actual delay may not matter much. In unreasonably long spin-waits that progress in
+//     yield count per iteration for each failed check of the condition, the progression can significantly magnify the second
+//     issue above on later iterations.
+//   - This function and variants are intended to provide a decent balance between the above issues, as ideal solutions to each
+//     issue have trade-offs between them. If latency of the operation is far more important in the scenario, consider using
+//     System_YieldProcessor() instead, which would issue a delay that is typically <= the delay issued by this method.
+FORCEINLINE void YieldProcessorNormalized()
+{
+    YieldProcessorNormalized(YieldProcessorNormalizationInfo());
+}
+
+// See YieldProcessorNormalized(count) for preliminary info. Typical usage:
+//     if (!moreExpensiveCondition)
+//     {
+//         YieldProcessorNormalizationInfo normalizationInfo;
+//         do
+//         {
+//             YieldProcessorNormalized(normalizationInfo, 2);
+//         } while (!moreExpensiveCondition);
+//     }
+FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo, unsigned int count)
+{
+    _ASSERTE(count != 0);
+
+    if (sizeof(SIZE_T) <= sizeof(unsigned int))
+    {
+        // On platforms with a small SIZE_T, prevent overflow on the multiply below. normalizationInfo.yieldsPerNormalizedYield
+        // is limited to MinNsPerNormalizedYield by InitializeYieldProcessorNormalized().
+        const unsigned int MaxCount = UINT_MAX / MinNsPerNormalizedYield;
+        if (count > MaxCount)
+        {
+            count = MaxCount;
+        }
+    }
+
+    SIZE_T n = (SIZE_T)count * normalizationInfo.yieldsPerNormalizedYield;
+    _ASSERTE(n != 0);
+    do
+    {
+        System_YieldProcessor();
+    } while (--n != 0);
+}
+
+// See YieldProcessorNormalized() for preliminary info. This function repeats the delay 'count' times. This overload is
+// preferred over the single-count overload when multiple yields are desired per spin-wait iteration. Typical usage:
+//     while(!moreExpensiveCondition)
+//     {
+//         YieldProcessorNormalized(2);
+//     }
+FORCEINLINE void YieldProcessorNormalized(unsigned int count)
+{
+    YieldProcessorNormalized(YieldProcessorNormalizationInfo(), count);
+}
+
+// Please DO NOT use this function in new code! See YieldProcessorNormalizedForPreSkylakeCount(preSkylakeCount) for preliminary
+// info. Typical usage:
+//     if (!condition)
+//     {
+//         YieldProcessorNormalizationInfo normalizationInfo;
+//         do
+//         {
+//             YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, 100);
+//         } while (!condition);
+//     }
+FORCEINLINE void YieldProcessorNormalizedForPreSkylakeCount(
+    const YieldProcessorNormalizationInfo &normalizationInfo,
+    unsigned int preSkylakeCount)
+{
+    _ASSERTE(preSkylakeCount != 0);
+
+    if (sizeof(SIZE_T) <= sizeof(unsigned int))
+    {
+        // On platforms with a small SIZE_T, prevent overflow on the multiply below. normalizationInfo.yieldsPerNormalizedYield
+        // is limited to MinNsPerNormalizedYield by InitializeYieldProcessorNormalized().
+        const unsigned int MaxCount = UINT_MAX / MinNsPerNormalizedYield;
+        if (preSkylakeCount > MaxCount)
+        {
+            preSkylakeCount = MaxCount;
+        }
+    }
+
+    const unsigned int PreSkylakeCountToSkylakeCountDivisor = 8;
+    SIZE_T n = (SIZE_T)preSkylakeCount * normalizationInfo.yieldsPerNormalizedYield / PreSkylakeCountToSkylakeCountDivisor;
+    if (n == 0)
+    {
+        n = 1;
+    }
+    do
+    {
+        System_YieldProcessor();
+    } while (--n != 0);
+}
+
+// Please DO NOT use this function in new code! This function is to be used for old spin-wait loops that have not been retuned
+// for recent processors, and especially where the yield count may be unreasonably high. The function scales the yield count in
+// an attempt to normalize the total delay across processors, to approximately the total delay that would be issued on a
+// pre-Skylake processor. New code should be tuned with YieldProcessorNormalized() or variants instead. Typical usage:
+//     while(!condition)
+//     {
+//         YieldProcessorNormalizedForPreSkylakeCount(100);
+//     }
+FORCEINLINE void YieldProcessorNormalizedForPreSkylakeCount(unsigned int preSkylakeCount)
+{
+    YieldProcessorNormalizedForPreSkylakeCount(YieldProcessorNormalizationInfo(), preSkylakeCount);
+}
+
+// See YieldProcessorNormalized() for preliminary info. This function is to be used when there is a decent possibility that the
+// condition would not be satisfied within a short duration. The current implementation increases the delay per spin-wait
+// iteration exponentially up to a limit. Typical usage:
+//     if (!conditionThatMayNotBeSatisfiedSoon)
+//     {
+//         YieldProcessorNormalizationInfo normalizationInfo;
+//         do
+//         {
+//             YieldProcessorWithBackOffNormalized(normalizationInfo); // maybe Sleep(0) occasionally
+//         } while (!conditionThatMayNotBeSatisfiedSoon);
+//     }
+FORCEINLINE void YieldProcessorWithBackOffNormalized(
+    const YieldProcessorNormalizationInfo &normalizationInfo,
+    unsigned int spinIteration)
+{
+    // normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration cannot exceed the value below based on calculations done in
+    // InitializeYieldProcessorNormalized()
+    const unsigned int MaxOptimalMaxNormalizedYieldsPerSpinIteration =
+        NsPerOptimalMaxSpinIterationDuration * 3 / (MinNsPerNormalizedYield * 2) + 1;
+    _ASSERTE(normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration <= MaxOptimalMaxNormalizedYieldsPerSpinIteration);
+
+    // This shift value should be adjusted based on the asserted condition below
+    const uint8_t MaxShift = 3;
+    static_assert(((unsigned int)1 << (MaxShift + 1)) >= MaxOptimalMaxNormalizedYieldsPerSpinIteration, "");
+
+    unsigned int n;
+    if (spinIteration <= MaxShift &&
+        ((unsigned int)1 << spinIteration) < normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration)
+    {
+        n = ((unsigned int)1 << spinIteration) * normalizationInfo.yieldsPerNormalizedYield;
+    }
+    else
+    {
+        n = normalizationInfo.optimalMaxYieldsPerSpinIteration;
+    }
+    _ASSERTE(n != 0);
+    do
+    {
+        System_YieldProcessor();
+    } while (--n != 0);
+}
diff --git a/src/coreclr/src/nativeaot/libunwind/.arcconfig b/src/coreclr/src/nativeaot/libunwind/.arcconfig
new file mode 100644
index 0000000000000..78ee8d358cded
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/.arcconfig
@@ -0,0 +1,4 @@
+{
+  "repository.callsign" : "UNW",
+  "conduit_uri" : "https://reviews.llvm.org/"
+}
diff --git a/src/coreclr/src/nativeaot/libunwind/.clang-format b/src/coreclr/src/nativeaot/libunwind/.clang-format
new file mode 100644
index 0000000000000..5bead5f39dd3c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/.clang-format
@@ -0,0 +1,2 @@
+BasedOnStyle: LLVM
+
diff --git a/src/coreclr/src/nativeaot/libunwind/CMakeLists.txt b/src/coreclr/src/nativeaot/libunwind/CMakeLists.txt
new file mode 100644
index 0000000000000..b51922a48fe28
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/CMakeLists.txt
@@ -0,0 +1,383 @@
+#===============================================================================
+# Setup Project
+#===============================================================================
+
+cmake_minimum_required(VERSION 3.4.3)
+
+if (POLICY CMP0042)
+  cmake_policy(SET CMP0042 NEW) # Set MACOSX_RPATH=YES by default
+endif()
+
+# Add path for custom modules
+set(CMAKE_MODULE_PATH
+  "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
+  "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules"
+  ${CMAKE_MODULE_PATH}
+  )
+
+if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_BUILD)
+  project(libunwind)
+
+  # Rely on llvm-config.
+  set(CONFIG_OUTPUT)
+  if(NOT LLVM_CONFIG_PATH)
+    find_program(LLVM_CONFIG_PATH "llvm-config")
+  endif()
+  if (DEFINED LLVM_PATH)
+    set(LLVM_INCLUDE_DIR ${LLVM_INCLUDE_DIR} CACHE PATH "Path to llvm/include")
+    set(LLVM_PATH ${LLVM_PATH} CACHE PATH "Path to LLVM source tree")
+    set(LLVM_MAIN_SRC_DIR ${LLVM_PATH})
+    set(LLVM_CMAKE_PATH "${LLVM_PATH}/cmake/modules")
+  elseif(LLVM_CONFIG_PATH)
+    message(STATUS "Found LLVM_CONFIG_PATH as ${LLVM_CONFIG_PATH}")
+    set(CONFIG_COMMAND ${LLVM_CONFIG_PATH} "--includedir" "--prefix" "--src-root")
+    execute_process(COMMAND ${CONFIG_COMMAND}
+                    RESULT_VARIABLE HAD_ERROR
+                    OUTPUT_VARIABLE CONFIG_OUTPUT)
+    if (NOT HAD_ERROR)
+      string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";"
+             CONFIG_OUTPUT ${CONFIG_OUTPUT})
+    else()
+      string(REPLACE ";" " " CONFIG_COMMAND_STR "${CONFIG_COMMAND}")
+      message(STATUS "${CONFIG_COMMAND_STR}")
+      message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}")
+    endif()
+
+    list(GET CONFIG_OUTPUT 0 INCLUDE_DIR)
+    list(GET CONFIG_OUTPUT 1 LLVM_OBJ_ROOT)
+    list(GET CONFIG_OUTPUT 2 MAIN_SRC_DIR)
+
+    set(LLVM_INCLUDE_DIR ${INCLUDE_DIR} CACHE PATH "Path to llvm/include")
+    set(LLVM_BINARY_DIR ${LLVM_OBJ_ROOT} CACHE PATH "Path to LLVM build tree")
+    set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree")
+    set(LLVM_LIT_PATH "${LLVM_PATH}/utils/lit/lit.py")
+
+    # --cmakedir is supported since llvm r291218 (4.0 release)
+    execute_process(
+      COMMAND ${LLVM_CONFIG_PATH} --cmakedir
+      RESULT_VARIABLE HAD_ERROR
+      OUTPUT_VARIABLE CONFIG_OUTPUT
+      ERROR_QUIET)
+    if(NOT HAD_ERROR)
+      string(STRIP "${CONFIG_OUTPUT}" LLVM_CMAKE_PATH_FROM_LLVM_CONFIG)
+      file(TO_CMAKE_PATH "${LLVM_CMAKE_PATH_FROM_LLVM_CONFIG}" LLVM_CMAKE_PATH)
+    else()
+      file(TO_CMAKE_PATH "${LLVM_BINARY_DIR}" LLVM_BINARY_DIR_CMAKE_STYLE)
+      set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm")
+    endif()
+  else()
+    message(WARNING "UNSUPPORTED LIBUNWIND CONFIGURATION DETECTED: "
+                    "llvm-config not found and LLVM_MAIN_SRC_DIR not defined. "
+                    "Reconfigure with -DLLVM_CONFIG=path/to/llvm-config "
+                    "or -DLLVM_PATH=path/to/llvm-source-root.")
+  endif()
+
+  if (EXISTS ${LLVM_CMAKE_PATH})
+    # Enable warnings, otherwise -w gets added to the cflags by HandleLLVMOptions.
+    set(LLVM_ENABLE_WARNINGS ON)
+    list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}")
+    include("${LLVM_CMAKE_PATH}/AddLLVM.cmake")
+    include("${LLVM_CMAKE_PATH}/HandleLLVMOptions.cmake")
+  else()
+    message(WARNING "Not found: ${LLVM_CMAKE_PATH}")
+  endif()
+
+  set(PACKAGE_NAME libunwind)
+  set(PACKAGE_VERSION 9.0.0svn)
+  set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
+  set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
+
+  if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
+    set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
+  else()
+    # Seek installed Lit.
+    find_program(LLVM_LIT "lit.py" ${LLVM_MAIN_SRC_DIR}/utils/lit
+                 DOC "Path to lit.py")
+  endif()
+
+  if (LLVM_LIT)
+    # Define the default arguments to use with 'lit', and an option for the user
+    # to override.
+    set(LIT_ARGS_DEFAULT "-sv")
+    if (MSVC OR XCODE)
+      set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
+    endif()
+    set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit")
+
+    # On Win32 hosts, provide an option to specify the path to the GnuWin32 tools.
+    if (WIN32 AND NOT CYGWIN)
+      set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools")
+    endif()
+  else()
+    set(LLVM_INCLUDE_TESTS OFF)
+  endif()
+
+  set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
+  set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
+else()
+  set(LLVM_LIT "${CMAKE_SOURCE_DIR}/utils/lit/lit.py")
+endif()
+
+#===============================================================================
+# Setup CMake Options
+#===============================================================================
+include(CMakeDependentOption)
+include(HandleCompilerRT)
+
+# Define options.
+option(LIBUNWIND_BUILD_32_BITS "Build 32 bit libunwind" ${LLVM_BUILD_32_BITS})
+option(LIBUNWIND_ENABLE_ASSERTIONS "Enable assertions independent of build mode." ON)
+option(LIBUNWIND_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
+option(LIBUNWIND_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF)
+option(LIBUNWIND_ENABLE_SHARED "Build libunwind as a shared library." ON)
+option(LIBUNWIND_ENABLE_STATIC "Build libunwind as a static library." ON)
+option(LIBUNWIND_ENABLE_CROSS_UNWINDING "Enable cross-platform unwinding support." OFF)
+option(LIBUNWIND_ENABLE_ARM_WMMX "Enable unwinding support for ARM WMMX registers." OFF)
+option(LIBUNWIND_ENABLE_THREADS "Build libunwind with threading support." ON)
+option(LIBUNWIND_WEAK_PTHREAD_LIB "Use weak references to refer to pthread functions." OFF)
+option(LIBUNWIND_USE_COMPILER_RT "Use compiler-rt instead of libgcc" OFF)
+option(LIBUNWIND_INCLUDE_DOCS "Build the libunwind documentation." ${LLVM_INCLUDE_DOCS})
+
+set(LIBUNWIND_LIBDIR_SUFFIX "${LLVM_LIBDIR_SUFFIX}" CACHE STRING
+    "Define suffix of library directory name (32/64)")
+option(LIBUNWIND_INSTALL_LIBRARY "Install the libunwind library." ON)
+cmake_dependent_option(LIBUNWIND_INSTALL_STATIC_LIBRARY
+  "Install the static libunwind library." ON
+  "LIBUNWIND_ENABLE_STATIC;LIBUNWIND_INSTALL_LIBRARY" OFF)
+cmake_dependent_option(LIBUNWIND_INSTALL_SHARED_LIBRARY
+  "Install the shared libunwind library." ON
+  "LIBUNWIND_ENABLE_SHARED;LIBUNWIND_INSTALL_LIBRARY" OFF)
+set(LIBUNWIND_TARGET_TRIPLE "" CACHE STRING "Target triple for cross compiling.")
+set(LIBUNWIND_GCC_TOOLCHAIN "" CACHE PATH "GCC toolchain for cross compiling.")
+set(LIBUNWIND_SYSROOT "" CACHE PATH "Sysroot for cross compiling.")
+set(LIBUNWIND_TEST_LINKER_FLAGS "" CACHE STRING
+    "Additional linker flags for test programs.")
+set(LIBUNWIND_TEST_COMPILER_FLAGS "" CACHE STRING
+    "Additional compiler flags for test programs.")
+
+if (NOT LIBUNWIND_ENABLE_SHARED AND NOT LIBUNWIND_ENABLE_STATIC)
+  message(FATAL_ERROR "libunwind must be built as either a shared or static library.")
+endif()
+
+# Check that we can build with 32 bits if requested.
+if (CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32)
+  if (LIBUNWIND_BUILD_32_BITS AND NOT LLVM_BUILD_32_BITS) # Don't duplicate the output from LLVM
+    message(STATUS "Building 32 bits executables and libraries.")
+  endif()
+elseif(LIBUNWIND_BUILD_32_BITS)
+  message(FATAL_ERROR "LIBUNWIND_BUILD_32_BITS=ON is not supported on this platform.")
+endif()
+
+option(LIBUNWIND_HERMETIC_STATIC_LIBRARY
+  "Do not export any symbols from the static library." OFF)
+
+#===============================================================================
+# Configure System
+#===============================================================================
+
+# Add path for custom modules
+set(CMAKE_MODULE_PATH
+    "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
+    ${CMAKE_MODULE_PATH})
+
+set(LIBUNWIND_COMPILER    ${CMAKE_CXX_COMPILER})
+set(LIBUNWIND_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
+set(LIBUNWIND_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
+
+string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION
+       ${PACKAGE_VERSION})
+
+if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
+  set(LIBUNWIND_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
+  set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
+  if(LIBCXX_LIBDIR_SUBDIR)
+    string(APPEND LIBUNWIND_LIBRARY_DIR /${LIBUNWIND_LIBDIR_SUBDIR})
+    string(APPEND LIBUNWIND_INSTALL_LIBRARY_DIR /${LIBUNWIND_LIBDIR_SUBDIR})
+  endif()
+elseif(LLVM_LIBRARY_OUTPUT_INTDIR)
+  set(LIBUNWIND_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR})
+  set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LIBUNWIND_LIBDIR_SUFFIX})
+else()
+  set(LIBUNWIND_LIBRARY_DIR ${CMAKE_BINARY_DIR}/lib${LIBUNWIND_LIBDIR_SUFFIX})
+  set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LIBUNWIND_LIBDIR_SUFFIX})
+endif()
+
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LIBUNWIND_LIBRARY_DIR})
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LIBUNWIND_LIBRARY_DIR})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LIBUNWIND_LIBRARY_DIR})
+
+set(LIBUNWIND_INSTALL_PREFIX "" CACHE STRING "Define libunwind destination prefix.")
+
+set(LIBUNWIND_C_FLAGS "")
+set(LIBUNWIND_CXX_FLAGS "")
+set(LIBUNWIND_COMPILE_FLAGS "")
+set(LIBUNWIND_LINK_FLAGS "")
+
+# Get required flags.
+macro(unwind_append_if list condition var)
+  if (${condition})
+    list(APPEND ${list} ${var})
+  endif()
+endmacro()
+
+macro(add_target_flags)
+  foreach(value ${ARGN})
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${value}")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${value}")
+    list(APPEND LIBUNWIND_COMPILE_FLAGS ${value})
+    list(APPEND LIBUNWIND_LINK_FLAGS ${value})
+  endforeach()
+endmacro()
+
+macro(add_target_flags_if condition)
+  if (${condition})
+    add_target_flags(${ARGN})
+  endif()
+endmacro()
+
+add_target_flags_if(LIBUNWIND_BUILD_32_BITS "-m32")
+
+if(LIBUNWIND_TARGET_TRIPLE)
+  add_target_flags("--target=${LIBUNWIND_TARGET_TRIPLE}")
+elseif(CMAKE_CXX_COMPILER_TARGET)
+  set(LIBUNWIND_TARGET_TRIPLE "${CMAKE_CXX_COMPILER_TARGET}")
+endif()
+if(LIBUNWIND_GCC_TOOLCHAIN)
+  add_target_flags("--gcc-toolchain=${LIBUNWIND_GCC_TOOLCHAIN}")
+elseif(CMAKE_CXX_COMPILER_EXTERNAL_TOOLCHAIN)
+  set(LIBUNWIND_GCC_TOOLCHAIN "${CMAKE_CXX_COMPILER_EXTERNAL_TOOLCHAIN}")
+endif()
+if(LIBUNWIND_SYSROOT)
+  add_target_flags("--sysroot=${LIBUNWIND_SYSROOT}")
+elseif(CMAKE_SYSROOT)
+  set(LIBUNWIND_SYSROOT "${CMAKE_SYSROOT}")
+endif()
+
+if (LIBUNWIND_TARGET_TRIPLE)
+  set(TARGET_TRIPLE "${LIBUNWIND_TARGET_TRIPLE}")
+endif()
+
+# Configure compiler.
+include(config-ix)
+
+if (LIBUNWIND_USE_COMPILER_RT AND NOT LIBUNWIND_HAS_NODEFAULTLIBS_FLAG)
+  list(APPEND LIBUNWIND_LINK_FLAGS "-rtlib=compiler-rt")
+endif()
+
+#===============================================================================
+# Setup Compiler Flags
+#===============================================================================
+
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WERROR_FLAG -Werror=return-type)
+
+# Get warning flags
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_W_FLAG -W)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WALL_FLAG -Wall)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WCHAR_SUBSCRIPTS_FLAG -Wchar-subscripts)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WCONVERSION_FLAG -Wconversion)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WMISMATCHED_TAGS_FLAG -Wmismatched-tags)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WMISSING_BRACES_FLAG -Wmissing-braces)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WNEWLINE_EOF_FLAG -Wnewline-eof)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WNO_UNUSED_FUNCTION_FLAG -Wno-unused-function)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WSHADOW_FLAG -Wshadow)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WSHORTEN_64_TO_32_FLAG -Wshorten-64-to-32)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WSIGN_COMPARE_FLAG -Wsign-compare)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WSIGN_CONVERSION_FLAG -Wsign-conversion)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WSTRICT_ALIASING_FLAG -Wstrict-aliasing=2)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WSTRICT_OVERFLOW_FLAG -Wstrict-overflow=4)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WUNUSED_PARAMETER_FLAG -Wunused-parameter)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WUNUSED_VARIABLE_FLAG -Wunused-variable)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WWRITE_STRINGS_FLAG -Wwrite-strings)
+unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WUNDEF_FLAG -Wundef)
+
+if (LIBUNWIND_ENABLE_WERROR)
+  unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WERROR_FLAG -Werror)
+  unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WX_FLAG -WX)
+else()
+  unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_WNO_ERROR_FLAG -Wno-error)
+  unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_NO_WX_FLAG -WX-)
+endif()
+
+if (LIBUNWIND_ENABLE_PEDANTIC)
+  unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_HAS_PEDANTIC_FLAG -pedantic)
+endif()
+
+# Get feature flags.
+# Exceptions
+# Catches C++ exceptions only and tells the compiler to assume that extern C
+# functions never throw a C++ exception.
+unwind_append_if(LIBUNWIND_CXX_FLAGS LIBUNWIND_HAS_FSTRICT_ALIASING_FLAG -fstrict-aliasing)
+unwind_append_if(LIBUNWIND_CXX_FLAGS LIBUNWIND_HAS_EHSC_FLAG -EHsc)
+
+unwind_append_if(LIBUNWIND_C_FLAGS LIBUNWIND_HAS_FUNWIND_TABLES -funwind-tables)
+
+# Ensure that we don't depend on C++ standard library.
+unwind_append_if(LIBUNWIND_CXX_FLAGS LIBUNWIND_HAS_NOSTDINCXX_FLAG -nostdinc++)
+
+# Assert
+string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
+if (LIBUNWIND_ENABLE_ASSERTIONS)
+  # MSVC doesn't like _DEBUG on release builds. See PR 4379.
+  if (NOT MSVC)
+    list(APPEND LIBUNWIND_COMPILE_FLAGS -D_DEBUG)
+  endif()
+
+  # On Release builds cmake automatically defines NDEBUG, so we
+  # explicitly undefine it:
+  if (uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE")
+    list(APPEND LIBUNWIND_COMPILE_FLAGS -UNDEBUG)
+  endif()
+else()
+  if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE")
+    list(APPEND LIBUNWIND_COMPILE_FLAGS -DNDEBUG)
+  endif()
+endif()
+
+# Cross-unwinding
+if (NOT LIBUNWIND_ENABLE_CROSS_UNWINDING)
+  list(APPEND LIBUNWIND_COMPILE_FLAGS -D_LIBUNWIND_IS_NATIVE_ONLY)
+endif()
+
+# Threading-support
+if (NOT LIBUNWIND_ENABLE_THREADS)
+  list(APPEND LIBUNWIND_COMPILE_FLAGS -D_LIBUNWIND_HAS_NO_THREADS)
+endif()
+
+# ARM WMMX register support
+if (LIBUNWIND_ENABLE_ARM_WMMX)
+  # __ARM_WMMX is a compiler pre-define (as per the ACLE 2.0). Clang does not
+  # define this macro for any supported target at present. Therefore, here we
+  # provide the option to explicitly enable support for WMMX registers in the
+  # unwinder.
+  list(APPEND LIBUNWIND_COMPILE_FLAGS -D__ARM_WMMX)
+endif()
+
+# This is the _ONLY_ place where add_definitions is called.
+if (MSVC)
+  add_definitions(-D_CRT_SECURE_NO_WARNINGS)
+endif()
+
+# Disable DLL annotations on Windows for static builds.
+if (WIN32 AND LIBUNWIND_ENABLE_STATIC AND NOT LIBUNWIND_ENABLE_SHARED)
+  add_definitions(-D_LIBUNWIND_DISABLE_VISIBILITY_ANNOTATIONS)
+endif()
+
+if (LIBUNWIND_HAS_COMMENT_LIB_PRAGMA)
+  add_definitions(-D_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA)
+endif()
+
+#===============================================================================
+# Setup Source Code
+#===============================================================================
+
+include_directories(include)
+
+add_subdirectory(src)
+
+if (LIBUNWIND_INCLUDE_DOCS)
+  add_subdirectory(docs)
+endif()
+
+if (EXISTS ${LLVM_CMAKE_PATH})
+  add_subdirectory(test)
+endif()
diff --git a/src/coreclr/src/nativeaot/libunwind/cmake/Modules/HandleCompilerRT.cmake b/src/coreclr/src/nativeaot/libunwind/cmake/Modules/HandleCompilerRT.cmake
new file mode 100644
index 0000000000000..77168e599466e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/cmake/Modules/HandleCompilerRT.cmake
@@ -0,0 +1,64 @@
+function(find_compiler_rt_library name dest)
+  if (NOT DEFINED LIBUNWIND_COMPILE_FLAGS)
+    message(FATAL_ERROR "LIBUNWIND_COMPILE_FLAGS must be defined when using this function")
+  endif()
+  set(dest "" PARENT_SCOPE)
+  set(CLANG_COMMAND ${CMAKE_CXX_COMPILER} ${LIBUNWIND_COMPILE_FLAGS}
+      "--rtlib=compiler-rt" "--print-libgcc-file-name")
+  if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_CXX_COMPILER_TARGET)
+    list(APPEND CLANG_COMMAND "--target=${CMAKE_CXX_COMPILER_TARGET}")
+  endif()
+  get_property(LIBUNWIND_CXX_FLAGS CACHE CMAKE_CXX_FLAGS PROPERTY VALUE)
+  string(REPLACE " " ";" LIBUNWIND_CXX_FLAGS "${LIBUNWIND_CXX_FLAGS}")
+  list(APPEND CLANG_COMMAND ${LIBUNWIND_CXX_FLAGS})
+  execute_process(
+      COMMAND ${CLANG_COMMAND}
+      RESULT_VARIABLE HAD_ERROR
+      OUTPUT_VARIABLE LIBRARY_FILE
+  )
+  string(STRIP "${LIBRARY_FILE}" LIBRARY_FILE)
+  file(TO_CMAKE_PATH "${LIBRARY_FILE}" LIBRARY_FILE)
+  string(REPLACE "builtins" "${name}" LIBRARY_FILE "${LIBRARY_FILE}")
+  if (NOT HAD_ERROR AND EXISTS "${LIBRARY_FILE}")
+    message(STATUS "Found compiler-rt library: ${LIBRARY_FILE}")
+    set(${dest} "${LIBRARY_FILE}" PARENT_SCOPE)
+  else()
+    message(STATUS "Failed to find compiler-rt library")
+  endif()
+endfunction()
+
+function(find_compiler_rt_dir dest)
+  if (NOT DEFINED LIBUNWIND_COMPILE_FLAGS)
+    message(FATAL_ERROR "LIBUNWIND_COMPILE_FLAGS must be defined when using this function")
+  endif()
+  set(dest "" PARENT_SCOPE)
+  if (APPLE)
+    set(CLANG_COMMAND ${CMAKE_CXX_COMPILER} ${LIBUNWIND_COMPILE_FLAGS}
+        "-print-file-name=lib")
+    execute_process(
+        COMMAND ${CLANG_COMMAND}
+        RESULT_VARIABLE HAD_ERROR
+        OUTPUT_VARIABLE LIBRARY_DIR
+    )
+    string(STRIP "${LIBRARY_DIR}" LIBRARY_DIR)
+    file(TO_CMAKE_PATH "${LIBRARY_DIR}" LIBRARY_DIR)
+    set(LIBRARY_DIR "${LIBRARY_DIR}/darwin")
+  else()
+    set(CLANG_COMMAND ${CMAKE_CXX_COMPILER} ${LIBUNWIND_COMPILE_FLAGS}
+        "--rtlib=compiler-rt" "--print-libgcc-file-name")
+    execute_process(
+        COMMAND ${CLANG_COMMAND}
+        RESULT_VARIABLE HAD_ERROR
+        OUTPUT_VARIABLE LIBRARY_FILE
+    )
+    string(STRIP "${LIBRARY_FILE}" LIBRARY_FILE)
+    file(TO_CMAKE_PATH "${LIBRARY_FILE}" LIBRARY_FILE)
+    get_filename_component(LIBRARY_DIR "${LIBRARY_FILE}" DIRECTORY)
+  endif()
+  if (NOT HAD_ERROR AND EXISTS "${LIBRARY_DIR}")
+    message(STATUS "Found compiler-rt directory: ${LIBRARY_DIR}")
+    set(${dest} "${LIBRARY_DIR}" PARENT_SCOPE)
+  else()
+    message(STATUS "Failed to find compiler-rt directory")
+  endif()
+endfunction()
diff --git a/src/coreclr/src/nativeaot/libunwind/cmake/config-ix.cmake b/src/coreclr/src/nativeaot/libunwind/cmake/config-ix.cmake
new file mode 100644
index 0000000000000..07a95ce1a46a8
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/cmake/config-ix.cmake
@@ -0,0 +1,110 @@
+include(CMakePushCheckState)
+include(CheckCCompilerFlag)
+include(CheckCXXCompilerFlag)
+include(CheckLibraryExists)
+include(CheckCSourceCompiles)
+
+check_library_exists(c fopen "" LIBUNWIND_HAS_C_LIB)
+
+if (NOT LIBUNWIND_USE_COMPILER_RT)
+  check_library_exists(gcc_s __gcc_personality_v0 "" LIBUNWIND_HAS_GCC_S_LIB)
+  check_library_exists(gcc __absvdi2 "" LIBUNWIND_HAS_GCC_LIB)
+endif()
+
+# libunwind is built with -nodefaultlibs, so we want all our checks to also
+# use this option, otherwise we may end up with an inconsistency between
+# the flags we think we require during configuration (if the checks are
+# performed without -nodefaultlibs) and the flags that are actually
+# required during compilation (which has the -nodefaultlibs). libc is
+# required for the link to go through. We remove sanitizers from the
+# configuration checks to avoid spurious link errors.
+check_c_compiler_flag(-nodefaultlibs LIBUNWIND_HAS_NODEFAULTLIBS_FLAG)
+if (LIBUNWIND_HAS_NODEFAULTLIBS_FLAG)
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nodefaultlibs")
+  if (LIBUNWIND_HAS_C_LIB)
+    list(APPEND CMAKE_REQUIRED_LIBRARIES c)
+  endif ()
+  if (LIBUNWIND_USE_COMPILER_RT)
+    find_compiler_rt_library(builtins LIBUNWIND_BUILTINS_LIBRARY)
+    list(APPEND CMAKE_REQUIRED_LIBRARIES "${LIBUNWIND_BUILTINS_LIBRARY}")
+  else ()
+    if (LIBUNWIND_HAS_GCC_S_LIB)
+      list(APPEND CMAKE_REQUIRED_LIBRARIES gcc_s)
+    endif ()
+    if (LIBUNWIND_HAS_GCC_LIB)
+      list(APPEND CMAKE_REQUIRED_LIBRARIES gcc)
+    endif ()
+  endif ()
+  if (MINGW)
+    # Mingw64 requires quite a few "C" runtime libraries in order for basic
+    # programs to link successfully with -nodefaultlibs.
+    if (LIBUNWIND_USE_COMPILER_RT)
+      set(MINGW_RUNTIME ${LIBUNWIND_BUILTINS_LIBRARY})
+    else ()
+      set(MINGW_RUNTIME gcc_s gcc)
+    endif()
+    set(MINGW_LIBRARIES mingw32 ${MINGW_RUNTIME} moldname mingwex msvcrt advapi32
+                        shell32 user32 kernel32 mingw32 ${MINGW_RUNTIME}
+                        moldname mingwex msvcrt)
+    list(APPEND CMAKE_REQUIRED_LIBRARIES ${MINGW_LIBRARIES})
+  endif()
+  if (CMAKE_C_FLAGS MATCHES -fsanitize OR CMAKE_CXX_FLAGS MATCHES -fsanitize)
+    set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -fno-sanitize=all")
+  endif ()
+  if (CMAKE_C_FLAGS MATCHES -fsanitize-coverage OR CMAKE_CXX_FLAGS MATCHES -fsanitize-coverage)
+    set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -fno-sanitize-coverage=edge,trace-cmp,indirect-calls,8bit-counters")
+  endif ()
+endif ()
+
+# Check compiler pragmas
+if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  cmake_push_check_state()
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas")
+  check_c_source_compiles("
+#pragma comment(lib, \"c\")
+int main() { return 0; }
+" LIBUNWIND_HAS_COMMENT_LIB_PRAGMA)
+  cmake_pop_check_state()
+endif()
+
+# Check compiler flags
+check_c_compiler_flag(-funwind-tables         LIBUNWIND_HAS_FUNWIND_TABLES)
+check_cxx_compiler_flag(-fno-exceptions       LIBUNWIND_HAS_NO_EXCEPTIONS_FLAG)
+check_cxx_compiler_flag(-fno-rtti             LIBUNWIND_HAS_NO_RTTI_FLAG)
+check_cxx_compiler_flag(-fstrict-aliasing     LIBUNWIND_HAS_FSTRICT_ALIASING_FLAG)
+check_cxx_compiler_flag(-nostdinc++           LIBUNWIND_HAS_NOSTDINCXX_FLAG)
+check_cxx_compiler_flag(-Wall                 LIBUNWIND_HAS_WALL_FLAG)
+check_cxx_compiler_flag(-W                    LIBUNWIND_HAS_W_FLAG)
+check_cxx_compiler_flag(-Wno-unused-function  LIBUNWIND_HAS_WNO_UNUSED_FUNCTION_FLAG)
+check_cxx_compiler_flag(-Wunused-variable     LIBUNWIND_HAS_WUNUSED_VARIABLE_FLAG)
+check_cxx_compiler_flag(-Wunused-parameter    LIBUNWIND_HAS_WUNUSED_PARAMETER_FLAG)
+check_cxx_compiler_flag(-Wstrict-aliasing     LIBUNWIND_HAS_WSTRICT_ALIASING_FLAG)
+check_cxx_compiler_flag(-Wstrict-overflow     LIBUNWIND_HAS_WSTRICT_OVERFLOW_FLAG)
+check_cxx_compiler_flag(-Wwrite-strings       LIBUNWIND_HAS_WWRITE_STRINGS_FLAG)
+check_cxx_compiler_flag(-Wchar-subscripts     LIBUNWIND_HAS_WCHAR_SUBSCRIPTS_FLAG)
+check_cxx_compiler_flag(-Wmismatched-tags     LIBUNWIND_HAS_WMISMATCHED_TAGS_FLAG)
+check_cxx_compiler_flag(-Wmissing-braces      LIBUNWIND_HAS_WMISSING_BRACES_FLAG)
+check_cxx_compiler_flag(-Wshorten-64-to-32    LIBUNWIND_HAS_WSHORTEN_64_TO_32_FLAG)
+check_cxx_compiler_flag(-Wsign-conversion     LIBUNWIND_HAS_WSIGN_CONVERSION_FLAG)
+check_cxx_compiler_flag(-Wsign-compare        LIBUNWIND_HAS_WSIGN_COMPARE_FLAG)
+check_cxx_compiler_flag(-Wshadow              LIBUNWIND_HAS_WSHADOW_FLAG)
+check_cxx_compiler_flag(-Wconversion          LIBUNWIND_HAS_WCONVERSION_FLAG)
+check_cxx_compiler_flag(-Wnewline-eof         LIBUNWIND_HAS_WNEWLINE_EOF_FLAG)
+check_cxx_compiler_flag(-Wundef               LIBUNWIND_HAS_WUNDEF_FLAG)
+check_cxx_compiler_flag(-pedantic             LIBUNWIND_HAS_PEDANTIC_FLAG)
+check_cxx_compiler_flag(-Werror               LIBUNWIND_HAS_WERROR_FLAG)
+check_cxx_compiler_flag(-Wno-error            LIBUNWIND_HAS_WNO_ERROR_FLAG)
+check_cxx_compiler_flag(/WX                   LIBUNWIND_HAS_WX_FLAG)
+check_cxx_compiler_flag(/WX-                  LIBUNWIND_HAS_NO_WX_FLAG)
+check_cxx_compiler_flag(/EHsc                 LIBUNWIND_HAS_EHSC_FLAG)
+check_cxx_compiler_flag(/EHs-                 LIBUNWIND_HAS_NO_EHS_FLAG)
+check_cxx_compiler_flag(/EHa-                 LIBUNWIND_HAS_NO_EHA_FLAG)
+check_cxx_compiler_flag(/GR-                  LIBUNWIND_HAS_NO_GR_FLAG)
+check_cxx_compiler_flag(-std=c++11            LIBUNWIND_HAS_STD_CXX11)
+
+if(LIBUNWIND_HAS_STD_CXX11)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+endif()
+
+check_library_exists(dl dladdr "" LIBUNWIND_HAS_DL_LIB)
+check_library_exists(pthread pthread_once "" LIBUNWIND_HAS_PTHREAD_LIB)
diff --git a/src/coreclr/src/nativeaot/libunwind/docs/BuildingLibunwind.rst b/src/coreclr/src/nativeaot/libunwind/docs/BuildingLibunwind.rst
new file mode 100644
index 0000000000000..7f42133a8a50e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/docs/BuildingLibunwind.rst
@@ -0,0 +1,161 @@
+.. _BuildingLibunwind:
+
+==================
+Building libunwind
+==================
+
+.. contents::
+  :local:
+
+.. _build instructions:
+
+Getting Started
+===============
+
+On Mac OS, the easiest way to get this library is to link with -lSystem.
+However if you want to build tip-of-trunk from here (getting the bleeding
+edge), read on.
+
+The basic steps needed to build libc++ are:
+
+#. Checkout LLVM, libunwind, and related projects:
+
+   * ``cd where-you-want-llvm-to-live``
+   * ``git clone https://github.com/llvm/llvm-project.git``
+
+#. Configure and build libunwind:
+
+   CMake is the only supported configuration system.
+
+   Clang is the preferred compiler when building and using libunwind.
+
+   * ``cd where you want to build llvm``
+   * ``mkdir build``
+   * ``cd build``
+   * ``cmake -G <generator> -DLLVM_ENABLE_PROJECTS=libunwind [options] <path to llvm sources>``
+
+   For more information about configuring libunwind see :ref:`CMake Options`.
+
+   * ``make unwind`` --- will build libunwind.
+   * ``make check-unwind`` --- will run the test suite.
+
+   Shared and static libraries for libunwind should now be present in llvm/build/lib.
+
+#. **Optional**: Install libunwind
+
+   If your system already provides an unwinder, it is important to be careful
+   not to replace it. Remember Use the CMake option ``CMAKE_INSTALL_PREFIX`` to
+   select a safe place to install libunwind.
+
+   * ``make install-unwind`` --- Will install the libraries and the headers
+
+
+It is sometimes beneficial to build outside of the LLVM tree. An out-of-tree
+build would look like this:
+
+.. code-block:: bash
+
+  $ cd where-you-want-libunwind-to-live
+  $ # Check out llvm, and libunwind
+  $ ``svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm``
+  $ ``svn co http://llvm.org/svn/llvm-project/libunwind/trunk libunwind``
+  $ cd where-you-want-to-build
+  $ mkdir build && cd build
+  $ export CC=clang CXX=clang++
+  $ cmake -DLLVM_PATH=path/to/llvm \
+          path/to/libunwind
+  $ make
+
+
+.. _CMake Options:
+
+CMake Options
+=============
+
+Here are some of the CMake variables that are used often, along with a
+brief explanation and LLVM-specific notes. For full documentation, check the
+CMake docs or execute ``cmake --help-variable VARIABLE_NAME``.
+
+**CMAKE_BUILD_TYPE**:STRING
+  Sets the build type for ``make`` based generators. Possible values are
+  Release, Debug, RelWithDebInfo and MinSizeRel. On systems like Visual Studio
+  the user sets the build type with the IDE settings.
+
+**CMAKE_INSTALL_PREFIX**:PATH
+  Path where LLVM will be installed if "make install" is invoked or the
+  "INSTALL" target is built.
+
+**CMAKE_CXX_COMPILER**:STRING
+  The C++ compiler to use when building and testing libunwind.
+
+
+.. _libunwind-specific options:
+
+libunwind specific options
+--------------------------
+
+.. option:: LIBUNWIND_BUILD_32_BITS:BOOL
+
+  **Default**: Same as LLVM_BUILD_32_BITS
+
+  Toggle whether libunwind should be built with -m32.
+
+.. option:: LIBUNWIND_ENABLE_ASSERTIONS:BOOL
+
+  **Default**: ``ON``
+
+  Toggle assertions independent of the build mode.
+
+.. option:: LIBUNWIND_ENABLE_PEDANTIC:BOOL
+
+  **Default**: ``ON``
+
+  Compile with -Wpedantic.
+
+.. option:: LIBUNWIND_ENABLE_WERROR:BOOL
+
+  **Default**: ``ON``
+
+  Compile with -Werror
+
+.. option:: LIBUNWIND_ENABLE_SHARED:BOOL
+
+  **Default**: ``ON``
+
+  Build libunwind as a shared library.
+
+.. option:: LIBUNWIND_ENABLE_STATIC:BOOL
+
+  **Default**: ``ON``
+
+  Build libunwind as a static archive.
+
+.. option:: LIBUNWIND_ENABLE_CROSS_UNWINDING:BOOL
+
+  **Default**: ``OFF``
+
+  Enable cross-platform unwinding support.
+
+.. option:: LIBUNWIND_ENABLE_ARM_WMMX:BOOL
+
+  **Default**: ``OFF``
+
+  Enable unwinding support for ARM WMMX registers.
+
+.. option:: LIBUNWIND_ENABLE_THREADS:BOOL
+
+  **Default**: ``ON``
+
+  Build libunwind with threading support.
+
+.. option:: LIBUNWIND_TARGET_TRIPLE:STRING
+
+  Target triple for cross compiling
+
+.. option:: LIBUNWIND_GCC_TOOLCHAIN:PATH
+
+  GCC toolchain for cross compiling
+
+.. option:: LIBUNWIND_SYSROOT
+
+  Sysroot for cross compiling
diff --git a/src/coreclr/src/nativeaot/libunwind/docs/CMakeLists.txt b/src/coreclr/src/nativeaot/libunwind/docs/CMakeLists.txt
new file mode 100644
index 0000000000000..c226f2f5b8e8d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/docs/CMakeLists.txt
@@ -0,0 +1,7 @@
+include(FindSphinx)
+if (SPHINX_FOUND)
+  include(AddSphinxTarget)
+  if (${SPHINX_OUTPUT_HTML})
+    add_sphinx_target(html libunwind)
+  endif()
+endif()
diff --git a/src/coreclr/src/nativeaot/libunwind/docs/README.txt b/src/coreclr/src/nativeaot/libunwind/docs/README.txt
new file mode 100644
index 0000000000000..968982fce5e07
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/docs/README.txt
@@ -0,0 +1,13 @@
+libunwind Documentation
+====================
+
+The libunwind documentation is written using the Sphinx documentation generator. It is
+currently tested with Sphinx 1.1.3.
+
+To build the documents into html configure libunwind with the following cmake options:
+
+  * -DLLVM_ENABLE_SPHINX=ON
+  * -DLIBUNWIND_INCLUDE_DOCS=ON
+
+After configuring libunwind with these options the make rule `docs-libunwind-html`
+should be available.
diff --git a/src/coreclr/src/nativeaot/libunwind/docs/conf.py b/src/coreclr/src/nativeaot/libunwind/docs/conf.py
new file mode 100644
index 0000000000000..704a1d0a12da4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/docs/conf.py
@@ -0,0 +1,252 @@
+# -*- coding: utf-8 -*-
+#
+# libunwind documentation build configuration file.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+from datetime import date
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration -----------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.intersphinx', 'sphinx.ext.todo']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'libunwind'
+copyright = u'2011-%d, LLVM Project' % date.today().year
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '9.0'
+# The full version, including alpha/beta/rc tags.
+release = '9.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+today_fmt = '%Y-%m-%d'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+show_authors = True
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'friendly'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'haiku'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'libunwinddoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+  ('contents', 'libunwind.tex', u'libunwind Documentation',
+   u'LLVM project', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output --------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ('contents', 'libunwind', u'libunwind Documentation',
+     [u'LLVM project'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output ------------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+  ('contents', 'libunwind', u'libunwind Documentation',
+   u'LLVM project', 'libunwind', 'LLVM Unwinder',
+   'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+
+# FIXME: Define intersphinx configration.
+intersphinx_mapping = {}
+
+
+# -- Options for extensions ----------------------------------------------------
+
+# Enable this if you want TODOs to show up in the generated documentation.
+todo_include_todos = True
diff --git a/src/coreclr/src/nativeaot/libunwind/docs/index.rst b/src/coreclr/src/nativeaot/libunwind/docs/index.rst
new file mode 100644
index 0000000000000..a4e21bb3c336c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/docs/index.rst
@@ -0,0 +1,104 @@
+.. _index:
+
+=======================
+libunwind LLVM Unwinder
+=======================
+
+Overview
+========
+
+libunwind is an implementation of the interface defined by the HP libunwind
+project. It was contributed by Apple as a way to enable clang++ to port to
+platforms that do not have a system unwinder. It is intended to be a small and
+fast implementation of the ABI, leaving off some features of HP's libunwind
+that never materialized (e.g. remote unwinding).
+
+The unwinder has two levels of API. The high level APIs are the `_Unwind_*`
+functions which implement functionality required by `__cxa_*` exception
+functions. The low level APIs are the `unw_*` functions which are an interface
+defined by the old HP libunwind project.
+
+Getting Started with libunwind
+------------------------------
+
+.. toctree::
+   :maxdepth: 2
+
+   BuildingLibunwind
+
+Current Status
+--------------
+
+libunwind is a production-quality unwinder, with platform support for DWARF
+unwind info, SjLj, and ARM EHABI.
+
+The low level libunwind API was designed to work either in-process (aka local)
+or to operate on another process (aka remote), but only the local path has been
+implemented. Remote unwinding remains as future work.
+
+Platform and Compiler Support
+-----------------------------
+
+libunwind is known to work on the following platforms:
+
+============ ======================== ============ ========================
+OS           Arch                     Compilers    Unwind Info
+============ ======================== ============ ========================
+Any          i386, x86_64, ARM        Clang        SjLj
+Bare Metal   ARM                      Clang, GCC   EHABI
+FreeBSD      i386, x86_64, ARM64      Clang        DWARF CFI
+iOS          ARM                      Clang        SjLj
+Linux        ARM                      Clang, GCC   EHABI
+Linux        i386, x86_64, ARM64      Clang, GCC   DWARF CFI
+macOS        i386, x86_64             Clang, GCC   DWARF CFI
+NetBSD       x86_64                   Clang, GCC   DWARF CFI
+Windows      i386, x86_64, ARM, ARM64 Clang        DWARF CFI
+============ ======================== ============ ========================
+
+The following minimum compiler versions are strongly recommended.
+
+* Clang 3.5 and above
+* GCC 4.7 and above.
+
+Anything older *may* work.
+
+Notes and Known Issues
+----------------------
+
+* TODO
+
+
+Getting Involved
+================
+
+First please review our `Developer's Policy <http://llvm.org/docs/DeveloperPolicy.html>`__
+and `Getting started with LLVM <http://llvm.org/docs/GettingStarted.html>`__.
+
+**Bug Reports**
+
+If you think you've found a bug in libunwind, please report it using
+the `LLVM Bugzilla`_. If you're not sure, you
+can post a message to the `cfe-dev mailing list`_ or on IRC.
+Please include "libunwind" in your subject.
+
+**Patches**
+
+If you want to contribute a patch to libunwind, the best place for that is
+`Phabricator <http://llvm.org/docs/Phabricator.html>`_. Please include [libunwind] in the subject and
+add `cfe-commits` as a subscriber. Also make sure you are subscribed to the
+`cfe-commits mailing list <http://lists.llvm.org/mailman/listinfo/cfe-commits>`_.
+
+**Discussion and Questions**
+
+Send discussions and questions to the
+`cfe-dev mailing list <http://lists.llvm.org/mailman/listinfo/cfe-dev>`_.
+Please include [libunwind] in the subject.
+
+
+Quick Links
+===========
+* `LLVM Homepage <http://llvm.org/>`_
+* `LLVM Bugzilla <https://bugs.llvm.org/>`_
+* `cfe-commits Mailing List`_
+* `cfe-dev Mailing List`_
+* `Browse libunwind Sources <https://github.com/llvm/llvm-project/blob/master/libunwind/>`_
diff --git a/src/coreclr/src/nativeaot/libunwind/include/__libunwind_config.h b/src/coreclr/src/nativeaot/libunwind/include/__libunwind_config.h
new file mode 100644
index 0000000000000..753085c7fe0c0
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/include/__libunwind_config.h
@@ -0,0 +1,145 @@
+//===------------------------- __libunwind_config.h -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ____LIBUNWIND_CONFIG_H__
+#define ____LIBUNWIND_CONFIG_H__
+
+#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
+    !defined(__ARM_DWARF_EH__)
+#define _LIBUNWIND_ARM_EHABI
+#endif
+
+#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86       8
+#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_64    32
+#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC       112
+#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC64     116
+#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64     95
+#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM       287
+#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_OR1K      32
+#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_MIPS      65
+#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_SPARC     31
+
+#if defined(_LIBUNWIND_IS_NATIVE_ONLY)
+# if defined(__i386__)
+#  define _LIBUNWIND_TARGET_I386
+#  define _LIBUNWIND_CONTEXT_SIZE 13
+#  define _LIBUNWIND_CURSOR_SIZE 19
+#  define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86
+# elif defined(__x86_64__)
+#  define _LIBUNWIND_TARGET_X86_64 1
+#  if defined(_WIN64)
+#    define _LIBUNWIND_CONTEXT_SIZE 54
+#    ifdef __SEH__
+#      define _LIBUNWIND_CURSOR_SIZE 204
+#    else
+#      define _LIBUNWIND_CURSOR_SIZE 66
+#    endif
+#  else
+#    define _LIBUNWIND_CONTEXT_SIZE 38
+#    define _LIBUNWIND_CURSOR_SIZE 50
+#  endif
+#  define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_64
+# elif defined(__powerpc64__)
+#  define _LIBUNWIND_TARGET_PPC64 1
+#  define _LIBUNWIND_CONTEXT_SIZE 167
+#  define _LIBUNWIND_CURSOR_SIZE 179
+#  define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC64
+# elif defined(__ppc__)
+#  define _LIBUNWIND_TARGET_PPC 1
+#  define _LIBUNWIND_CONTEXT_SIZE 117
+#  define _LIBUNWIND_CURSOR_SIZE 124
+#  define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC
+# elif defined(__aarch64__)
+#  define _LIBUNWIND_TARGET_AARCH64 1
+#  define _LIBUNWIND_CONTEXT_SIZE 100
+#  if defined(__SEH__)
+#    define _LIBUNWIND_CURSOR_SIZE 198
+#  else
+#    define _LIBUNWIND_CURSOR_SIZE 112
+#  endif
+#  define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64
+# elif defined(__arm__)
+#  define _LIBUNWIND_TARGET_ARM 1
+#  if defined(__SEH__)
+#    define _LIBUNWIND_CONTEXT_SIZE 42
+#    define _LIBUNWIND_CURSOR_SIZE 80
+#  elif defined(__ARM_WMMX)
+#    define _LIBUNWIND_CONTEXT_SIZE 61
+#    define _LIBUNWIND_CURSOR_SIZE 68
+#  else
+#    define _LIBUNWIND_CONTEXT_SIZE 50
+#    define _LIBUNWIND_CURSOR_SIZE 57
+#  endif
+#  define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM
+# elif defined(__or1k__)
+#  define _LIBUNWIND_TARGET_OR1K 1
+#  define _LIBUNWIND_CONTEXT_SIZE 16
+#  define _LIBUNWIND_CURSOR_SIZE 24
+#  define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_OR1K
+# elif defined(__mips__)
+#  if defined(_ABIO32) && _MIPS_SIM == _ABIO32
+#    define _LIBUNWIND_TARGET_MIPS_O32 1
+#    if defined(__mips_hard_float)
+#      define _LIBUNWIND_CONTEXT_SIZE 50
+#      define _LIBUNWIND_CURSOR_SIZE 57
+#    else
+#      define _LIBUNWIND_CONTEXT_SIZE 18
+#      define _LIBUNWIND_CURSOR_SIZE 24
+#    endif
+#  elif defined(_ABIN32) && _MIPS_SIM == _ABIN32
+#    define _LIBUNWIND_TARGET_MIPS_NEWABI 1
+#    if defined(__mips_hard_float)
+#      define _LIBUNWIND_CONTEXT_SIZE 67
+#      define _LIBUNWIND_CURSOR_SIZE 74
+#    else
+#      define _LIBUNWIND_CONTEXT_SIZE 35
+#      define _LIBUNWIND_CURSOR_SIZE 42
+#    endif
+#  elif defined(_ABI64) && _MIPS_SIM == _ABI64
+#    define _LIBUNWIND_TARGET_MIPS_NEWABI 1
+#    if defined(__mips_hard_float)
+#      define _LIBUNWIND_CONTEXT_SIZE 67
+#      define _LIBUNWIND_CURSOR_SIZE 79
+#    else
+#      define _LIBUNWIND_CONTEXT_SIZE 35
+#      define _LIBUNWIND_CURSOR_SIZE 47
+#    endif
+#  else
+#    error "Unsupported MIPS ABI and/or environment"
+#  endif
+#  define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_MIPS
+# elif defined(__sparc__)
+  #define _LIBUNWIND_TARGET_SPARC 1
+  #define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_SPARC
+  #define _LIBUNWIND_CONTEXT_SIZE 16
+  #define _LIBUNWIND_CURSOR_SIZE 23
+#elif defined(HOST_WASM)
+#define _LIBUNWIND_TARGET_WASM 1
+// TODO: Determine the right values
+#define _LIBUNWIND_CONTEXT_SIZE 0xbadf00d
+#define _LIBUNWIND_CURSOR_SIZE 0xbadf00d
+#else
+#  error "Unsupported architecture."
+# endif
+#else // !_LIBUNWIND_IS_NATIVE_ONLY
+# define _LIBUNWIND_TARGET_I386
+# define _LIBUNWIND_TARGET_X86_64 1
+# define _LIBUNWIND_TARGET_PPC 1
+# define _LIBUNWIND_TARGET_PPC64 1
+# define _LIBUNWIND_TARGET_AARCH64 1
+# define _LIBUNWIND_TARGET_ARM 1
+# define _LIBUNWIND_TARGET_OR1K 1
+# define _LIBUNWIND_TARGET_MIPS_O32 1
+# define _LIBUNWIND_TARGET_MIPS_NEWABI 1
+# define _LIBUNWIND_TARGET_SPARC 1
+# define _LIBUNWIND_CONTEXT_SIZE 167
+# define _LIBUNWIND_CURSOR_SIZE 179
+# define _LIBUNWIND_HIGHEST_DWARF_REGISTER 287
+#endif // _LIBUNWIND_IS_NATIVE_ONLY
+
+#endif // ____LIBUNWIND_CONFIG_H__
diff --git a/src/coreclr/src/nativeaot/libunwind/include/libunwind.h b/src/coreclr/src/nativeaot/libunwind/include/libunwind.h
new file mode 100644
index 0000000000000..6e70f264f9f36
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/include/libunwind.h
@@ -0,0 +1,855 @@
+//===---------------------------- libunwind.h -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// Compatible with libunwind API documented at:
+//   http://www.nongnu.org/libunwind/man/libunwind(3).html
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LIBUNWIND__
+#define __LIBUNWIND__
+
+#include <__libunwind_config.h>
+
+#include <stdint.h>
+#include <stddef.h>
+
+#ifdef __APPLE__
+  #if __clang__
+    #if __has_include(<Availability.h>)
+      #include <Availability.h>
+    #endif
+  #elif __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050
+    #include <Availability.h>
+  #endif
+
+  #ifdef __arm__
+     #define LIBUNWIND_AVAIL __attribute__((unavailable))
+  #elif defined(__OSX_AVAILABLE_STARTING)
+    #define LIBUNWIND_AVAIL __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_5_0)
+  #else
+    #include <AvailabilityMacros.h>
+    #ifdef AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+      #define LIBUNWIND_AVAIL AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+    #else
+      #define LIBUNWIND_AVAIL __attribute__((unavailable))
+    #endif
+  #endif
+#else
+  #define LIBUNWIND_AVAIL
+#endif
+
+/* error codes */
+enum {
+  UNW_ESUCCESS      = 0,     /* no error */
+  UNW_EUNSPEC       = -6540, /* unspecified (general) error */
+  UNW_ENOMEM        = -6541, /* out of memory */
+  UNW_EBADREG       = -6542, /* bad register number */
+  UNW_EREADONLYREG  = -6543, /* attempt to write read-only register */
+  UNW_ESTOPUNWIND   = -6544, /* stop unwinding */
+  UNW_EINVALIDIP    = -6545, /* invalid IP */
+  UNW_EBADFRAME     = -6546, /* bad frame */
+  UNW_EINVAL        = -6547, /* unsupported operation or bad value */
+  UNW_EBADVERSION   = -6548, /* unwind info has unsupported version */
+  UNW_ENOINFO       = -6549  /* no unwind info found */
+#if defined(_LIBUNWIND_TARGET_AARCH64) && !defined(_LIBUNWIND_IS_NATIVE_ONLY)
+  , UNW_ECROSSRASIGNING = -6550 /* cross unwind with return address signing */
+#endif
+};
+
+struct unw_context_t {
+  uint64_t data[_LIBUNWIND_CONTEXT_SIZE];
+};
+typedef struct unw_context_t unw_context_t;
+
+struct unw_cursor_t {
+  uint64_t data[_LIBUNWIND_CURSOR_SIZE];
+};
+typedef struct unw_cursor_t unw_cursor_t;
+
+typedef struct unw_addr_space *unw_addr_space_t;
+
+typedef int unw_regnum_t;
+typedef uintptr_t unw_word_t;
+#if defined(__arm__) && !defined(__ARM_DWARF_EH__)
+typedef uint64_t unw_fpreg_t;
+#else
+typedef double unw_fpreg_t;
+#endif
+
+struct unw_proc_info_t {
+  unw_word_t  start_ip;         /* start address of function */
+  unw_word_t  end_ip;           /* address after end of function */
+  unw_word_t  lsda;             /* address of language specific data area, */
+                                /*  or zero if not used */
+  unw_word_t  handler;          /* personality routine, or zero if not used */
+  unw_word_t  gp;               /* not used */
+  unw_word_t  flags;            /* not used */
+  uint32_t    format;           /* compact unwind encoding, or zero if none */
+  uint32_t    unwind_info_size; /* size of DWARF unwind info, or zero if none */
+  unw_word_t  unwind_info;      /* address of DWARF unwind info, or zero */
+  unw_word_t  extra;            /* mach_header of mach-o image containing func */
+};
+typedef struct unw_proc_info_t unw_proc_info_t;
+
+enum unw_save_loc_type_t
+{
+    UNW_SLT_NONE,       /* register is not saved ("not an l-value") */
+    UNW_SLT_MEMORY,     /* register has been saved in memory */
+    UNW_SLT_REG         /* register has been saved in (another) register */
+};
+typedef enum unw_save_loc_type_t unw_save_loc_type_t;
+
+struct unw_save_loc_t
+{
+    unw_save_loc_type_t type;
+    union
+    {
+        unw_word_t addr;        /* valid if type==UNW_SLT_MEMORY */
+        unw_regnum_t regnum;    /* valid if type==UNW_SLT_REG */
+    }
+    u;
+};
+typedef struct unw_save_loc_t unw_save_loc_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int unw_getcontext(unw_context_t *) LIBUNWIND_AVAIL;
+extern int unw_init_local(unw_cursor_t *, unw_context_t *) LIBUNWIND_AVAIL;
+extern int unw_step(unw_cursor_t *) LIBUNWIND_AVAIL;
+extern int unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *) LIBUNWIND_AVAIL;
+extern int unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *) LIBUNWIND_AVAIL;
+extern int unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t, unw_word_t *) LIBUNWIND_AVAIL;
+extern int unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t)  LIBUNWIND_AVAIL;
+extern int unw_resume(unw_cursor_t *) LIBUNWIND_AVAIL;
+
+#ifdef __arm__
+/* Save VFP registers in FSTMX format (instead of FSTMD). */
+extern void unw_save_vfp_as_X(unw_cursor_t *) LIBUNWIND_AVAIL;
+#endif
+
+
+extern const char *unw_regname(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL;
+extern int unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *) LIBUNWIND_AVAIL;
+extern int unw_is_fpreg(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL;
+extern int unw_is_signal_frame(unw_cursor_t *) LIBUNWIND_AVAIL;
+extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *) LIBUNWIND_AVAIL;
+extern int unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*) LIBUNWIND_AVAIL;
+
+extern unw_addr_space_t unw_local_addr_space;
+
+#ifdef __cplusplus
+}
+#endif
+
+// architecture independent register numbers
+enum {
+  UNW_REG_IP = -1, // instruction pointer
+  UNW_REG_SP = -2, // stack pointer
+};
+
+// 32-bit x86 registers
+enum {
+  UNW_X86_EAX = 0,
+  UNW_X86_ECX = 1,
+  UNW_X86_EDX = 2,
+  UNW_X86_EBX = 3,
+  UNW_X86_EBP = 4,
+  UNW_X86_ESP = 5,
+  UNW_X86_ESI = 6,
+  UNW_X86_EDI = 7
+};
+
+// 64-bit x86_64 registers
+enum {
+  UNW_X86_64_RAX = 0,
+  UNW_X86_64_RDX = 1,
+  UNW_X86_64_RCX = 2,
+  UNW_X86_64_RBX = 3,
+  UNW_X86_64_RSI = 4,
+  UNW_X86_64_RDI = 5,
+  UNW_X86_64_RBP = 6,
+  UNW_X86_64_RSP = 7,
+  UNW_X86_64_R8  = 8,
+  UNW_X86_64_R9  = 9,
+  UNW_X86_64_R10 = 10,
+  UNW_X86_64_R11 = 11,
+  UNW_X86_64_R12 = 12,
+  UNW_X86_64_R13 = 13,
+  UNW_X86_64_R14 = 14,
+  UNW_X86_64_R15 = 15,
+  UNW_X86_64_RIP = 16,
+  UNW_X86_64_XMM0 = 17,
+  UNW_X86_64_XMM1 = 18,
+  UNW_X86_64_XMM2 = 19,
+  UNW_X86_64_XMM3 = 20,
+  UNW_X86_64_XMM4 = 21,
+  UNW_X86_64_XMM5 = 22,
+  UNW_X86_64_XMM6 = 23,
+  UNW_X86_64_XMM7 = 24,
+  UNW_X86_64_XMM8 = 25,
+  UNW_X86_64_XMM9 = 26,
+  UNW_X86_64_XMM10 = 27,
+  UNW_X86_64_XMM11 = 28,
+  UNW_X86_64_XMM12 = 29,
+  UNW_X86_64_XMM13 = 30,
+  UNW_X86_64_XMM14 = 31,
+  UNW_X86_64_XMM15 = 32,
+};
+
+
+// 32-bit ppc register numbers
+enum {
+  UNW_PPC_R0  = 0,
+  UNW_PPC_R1  = 1,
+  UNW_PPC_R2  = 2,
+  UNW_PPC_R3  = 3,
+  UNW_PPC_R4  = 4,
+  UNW_PPC_R5  = 5,
+  UNW_PPC_R6  = 6,
+  UNW_PPC_R7  = 7,
+  UNW_PPC_R8  = 8,
+  UNW_PPC_R9  = 9,
+  UNW_PPC_R10 = 10,
+  UNW_PPC_R11 = 11,
+  UNW_PPC_R12 = 12,
+  UNW_PPC_R13 = 13,
+  UNW_PPC_R14 = 14,
+  UNW_PPC_R15 = 15,
+  UNW_PPC_R16 = 16,
+  UNW_PPC_R17 = 17,
+  UNW_PPC_R18 = 18,
+  UNW_PPC_R19 = 19,
+  UNW_PPC_R20 = 20,
+  UNW_PPC_R21 = 21,
+  UNW_PPC_R22 = 22,
+  UNW_PPC_R23 = 23,
+  UNW_PPC_R24 = 24,
+  UNW_PPC_R25 = 25,
+  UNW_PPC_R26 = 26,
+  UNW_PPC_R27 = 27,
+  UNW_PPC_R28 = 28,
+  UNW_PPC_R29 = 29,
+  UNW_PPC_R30 = 30,
+  UNW_PPC_R31 = 31,
+  UNW_PPC_F0  = 32,
+  UNW_PPC_F1  = 33,
+  UNW_PPC_F2  = 34,
+  UNW_PPC_F3  = 35,
+  UNW_PPC_F4  = 36,
+  UNW_PPC_F5  = 37,
+  UNW_PPC_F6  = 38,
+  UNW_PPC_F7  = 39,
+  UNW_PPC_F8  = 40,
+  UNW_PPC_F9  = 41,
+  UNW_PPC_F10 = 42,
+  UNW_PPC_F11 = 43,
+  UNW_PPC_F12 = 44,
+  UNW_PPC_F13 = 45,
+  UNW_PPC_F14 = 46,
+  UNW_PPC_F15 = 47,
+  UNW_PPC_F16 = 48,
+  UNW_PPC_F17 = 49,
+  UNW_PPC_F18 = 50,
+  UNW_PPC_F19 = 51,
+  UNW_PPC_F20 = 52,
+  UNW_PPC_F21 = 53,
+  UNW_PPC_F22 = 54,
+  UNW_PPC_F23 = 55,
+  UNW_PPC_F24 = 56,
+  UNW_PPC_F25 = 57,
+  UNW_PPC_F26 = 58,
+  UNW_PPC_F27 = 59,
+  UNW_PPC_F28 = 60,
+  UNW_PPC_F29 = 61,
+  UNW_PPC_F30 = 62,
+  UNW_PPC_F31 = 63,
+  UNW_PPC_MQ  = 64,
+  UNW_PPC_LR  = 65,
+  UNW_PPC_CTR = 66,
+  UNW_PPC_AP  = 67,
+  UNW_PPC_CR0 = 68,
+  UNW_PPC_CR1 = 69,
+  UNW_PPC_CR2 = 70,
+  UNW_PPC_CR3 = 71,
+  UNW_PPC_CR4 = 72,
+  UNW_PPC_CR5 = 73,
+  UNW_PPC_CR6 = 74,
+  UNW_PPC_CR7 = 75,
+  UNW_PPC_XER = 76,
+  UNW_PPC_V0  = 77,
+  UNW_PPC_V1  = 78,
+  UNW_PPC_V2  = 79,
+  UNW_PPC_V3  = 80,
+  UNW_PPC_V4  = 81,
+  UNW_PPC_V5  = 82,
+  UNW_PPC_V6  = 83,
+  UNW_PPC_V7  = 84,
+  UNW_PPC_V8  = 85,
+  UNW_PPC_V9  = 86,
+  UNW_PPC_V10 = 87,
+  UNW_PPC_V11 = 88,
+  UNW_PPC_V12 = 89,
+  UNW_PPC_V13 = 90,
+  UNW_PPC_V14 = 91,
+  UNW_PPC_V15 = 92,
+  UNW_PPC_V16 = 93,
+  UNW_PPC_V17 = 94,
+  UNW_PPC_V18 = 95,
+  UNW_PPC_V19 = 96,
+  UNW_PPC_V20 = 97,
+  UNW_PPC_V21 = 98,
+  UNW_PPC_V22 = 99,
+  UNW_PPC_V23 = 100,
+  UNW_PPC_V24 = 101,
+  UNW_PPC_V25 = 102,
+  UNW_PPC_V26 = 103,
+  UNW_PPC_V27 = 104,
+  UNW_PPC_V28 = 105,
+  UNW_PPC_V29 = 106,
+  UNW_PPC_V30 = 107,
+  UNW_PPC_V31 = 108,
+  UNW_PPC_VRSAVE  = 109,
+  UNW_PPC_VSCR    = 110,
+  UNW_PPC_SPE_ACC = 111,
+  UNW_PPC_SPEFSCR = 112
+};
+
+// 64-bit ppc register numbers
+enum {
+  UNW_PPC64_R0      = 0,
+  UNW_PPC64_R1      = 1,
+  UNW_PPC64_R2      = 2,
+  UNW_PPC64_R3      = 3,
+  UNW_PPC64_R4      = 4,
+  UNW_PPC64_R5      = 5,
+  UNW_PPC64_R6      = 6,
+  UNW_PPC64_R7      = 7,
+  UNW_PPC64_R8      = 8,
+  UNW_PPC64_R9      = 9,
+  UNW_PPC64_R10     = 10,
+  UNW_PPC64_R11     = 11,
+  UNW_PPC64_R12     = 12,
+  UNW_PPC64_R13     = 13,
+  UNW_PPC64_R14     = 14,
+  UNW_PPC64_R15     = 15,
+  UNW_PPC64_R16     = 16,
+  UNW_PPC64_R17     = 17,
+  UNW_PPC64_R18     = 18,
+  UNW_PPC64_R19     = 19,
+  UNW_PPC64_R20     = 20,
+  UNW_PPC64_R21     = 21,
+  UNW_PPC64_R22     = 22,
+  UNW_PPC64_R23     = 23,
+  UNW_PPC64_R24     = 24,
+  UNW_PPC64_R25     = 25,
+  UNW_PPC64_R26     = 26,
+  UNW_PPC64_R27     = 27,
+  UNW_PPC64_R28     = 28,
+  UNW_PPC64_R29     = 29,
+  UNW_PPC64_R30     = 30,
+  UNW_PPC64_R31     = 31,
+  UNW_PPC64_F0      = 32,
+  UNW_PPC64_F1      = 33,
+  UNW_PPC64_F2      = 34,
+  UNW_PPC64_F3      = 35,
+  UNW_PPC64_F4      = 36,
+  UNW_PPC64_F5      = 37,
+  UNW_PPC64_F6      = 38,
+  UNW_PPC64_F7      = 39,
+  UNW_PPC64_F8      = 40,
+  UNW_PPC64_F9      = 41,
+  UNW_PPC64_F10     = 42,
+  UNW_PPC64_F11     = 43,
+  UNW_PPC64_F12     = 44,
+  UNW_PPC64_F13     = 45,
+  UNW_PPC64_F14     = 46,
+  UNW_PPC64_F15     = 47,
+  UNW_PPC64_F16     = 48,
+  UNW_PPC64_F17     = 49,
+  UNW_PPC64_F18     = 50,
+  UNW_PPC64_F19     = 51,
+  UNW_PPC64_F20     = 52,
+  UNW_PPC64_F21     = 53,
+  UNW_PPC64_F22     = 54,
+  UNW_PPC64_F23     = 55,
+  UNW_PPC64_F24     = 56,
+  UNW_PPC64_F25     = 57,
+  UNW_PPC64_F26     = 58,
+  UNW_PPC64_F27     = 59,
+  UNW_PPC64_F28     = 60,
+  UNW_PPC64_F29     = 61,
+  UNW_PPC64_F30     = 62,
+  UNW_PPC64_F31     = 63,
+  // 64: reserved
+  UNW_PPC64_LR      = 65,
+  UNW_PPC64_CTR     = 66,
+  // 67: reserved
+  UNW_PPC64_CR0     = 68,
+  UNW_PPC64_CR1     = 69,
+  UNW_PPC64_CR2     = 70,
+  UNW_PPC64_CR3     = 71,
+  UNW_PPC64_CR4     = 72,
+  UNW_PPC64_CR5     = 73,
+  UNW_PPC64_CR6     = 74,
+  UNW_PPC64_CR7     = 75,
+  UNW_PPC64_XER     = 76,
+  UNW_PPC64_V0      = 77,
+  UNW_PPC64_V1      = 78,
+  UNW_PPC64_V2      = 79,
+  UNW_PPC64_V3      = 80,
+  UNW_PPC64_V4      = 81,
+  UNW_PPC64_V5      = 82,
+  UNW_PPC64_V6      = 83,
+  UNW_PPC64_V7      = 84,
+  UNW_PPC64_V8      = 85,
+  UNW_PPC64_V9      = 86,
+  UNW_PPC64_V10     = 87,
+  UNW_PPC64_V11     = 88,
+  UNW_PPC64_V12     = 89,
+  UNW_PPC64_V13     = 90,
+  UNW_PPC64_V14     = 91,
+  UNW_PPC64_V15     = 92,
+  UNW_PPC64_V16     = 93,
+  UNW_PPC64_V17     = 94,
+  UNW_PPC64_V18     = 95,
+  UNW_PPC64_V19     = 96,
+  UNW_PPC64_V20     = 97,
+  UNW_PPC64_V21     = 98,
+  UNW_PPC64_V22     = 99,
+  UNW_PPC64_V23     = 100,
+  UNW_PPC64_V24     = 101,
+  UNW_PPC64_V25     = 102,
+  UNW_PPC64_V26     = 103,
+  UNW_PPC64_V27     = 104,
+  UNW_PPC64_V28     = 105,
+  UNW_PPC64_V29     = 106,
+  UNW_PPC64_V30     = 107,
+  UNW_PPC64_V31     = 108,
+  // 109, 111-113: OpenPOWER ELF V2 ABI: reserved
+  // Borrowing VRSAVE number from PPC32.
+  UNW_PPC64_VRSAVE  = 109,
+  UNW_PPC64_VSCR    = 110,
+  UNW_PPC64_TFHAR   = 114,
+  UNW_PPC64_TFIAR   = 115,
+  UNW_PPC64_TEXASR  = 116,
+  UNW_PPC64_VS0     = UNW_PPC64_F0,
+  UNW_PPC64_VS1     = UNW_PPC64_F1,
+  UNW_PPC64_VS2     = UNW_PPC64_F2,
+  UNW_PPC64_VS3     = UNW_PPC64_F3,
+  UNW_PPC64_VS4     = UNW_PPC64_F4,
+  UNW_PPC64_VS5     = UNW_PPC64_F5,
+  UNW_PPC64_VS6     = UNW_PPC64_F6,
+  UNW_PPC64_VS7     = UNW_PPC64_F7,
+  UNW_PPC64_VS8     = UNW_PPC64_F8,
+  UNW_PPC64_VS9     = UNW_PPC64_F9,
+  UNW_PPC64_VS10    = UNW_PPC64_F10,
+  UNW_PPC64_VS11    = UNW_PPC64_F11,
+  UNW_PPC64_VS12    = UNW_PPC64_F12,
+  UNW_PPC64_VS13    = UNW_PPC64_F13,
+  UNW_PPC64_VS14    = UNW_PPC64_F14,
+  UNW_PPC64_VS15    = UNW_PPC64_F15,
+  UNW_PPC64_VS16    = UNW_PPC64_F16,
+  UNW_PPC64_VS17    = UNW_PPC64_F17,
+  UNW_PPC64_VS18    = UNW_PPC64_F18,
+  UNW_PPC64_VS19    = UNW_PPC64_F19,
+  UNW_PPC64_VS20    = UNW_PPC64_F20,
+  UNW_PPC64_VS21    = UNW_PPC64_F21,
+  UNW_PPC64_VS22    = UNW_PPC64_F22,
+  UNW_PPC64_VS23    = UNW_PPC64_F23,
+  UNW_PPC64_VS24    = UNW_PPC64_F24,
+  UNW_PPC64_VS25    = UNW_PPC64_F25,
+  UNW_PPC64_VS26    = UNW_PPC64_F26,
+  UNW_PPC64_VS27    = UNW_PPC64_F27,
+  UNW_PPC64_VS28    = UNW_PPC64_F28,
+  UNW_PPC64_VS29    = UNW_PPC64_F29,
+  UNW_PPC64_VS30    = UNW_PPC64_F30,
+  UNW_PPC64_VS31    = UNW_PPC64_F31,
+  UNW_PPC64_VS32    = UNW_PPC64_V0,
+  UNW_PPC64_VS33    = UNW_PPC64_V1,
+  UNW_PPC64_VS34    = UNW_PPC64_V2,
+  UNW_PPC64_VS35    = UNW_PPC64_V3,
+  UNW_PPC64_VS36    = UNW_PPC64_V4,
+  UNW_PPC64_VS37    = UNW_PPC64_V5,
+  UNW_PPC64_VS38    = UNW_PPC64_V6,
+  UNW_PPC64_VS39    = UNW_PPC64_V7,
+  UNW_PPC64_VS40    = UNW_PPC64_V8,
+  UNW_PPC64_VS41    = UNW_PPC64_V9,
+  UNW_PPC64_VS42    = UNW_PPC64_V10,
+  UNW_PPC64_VS43    = UNW_PPC64_V11,
+  UNW_PPC64_VS44    = UNW_PPC64_V12,
+  UNW_PPC64_VS45    = UNW_PPC64_V13,
+  UNW_PPC64_VS46    = UNW_PPC64_V14,
+  UNW_PPC64_VS47    = UNW_PPC64_V15,
+  UNW_PPC64_VS48    = UNW_PPC64_V16,
+  UNW_PPC64_VS49    = UNW_PPC64_V17,
+  UNW_PPC64_VS50    = UNW_PPC64_V18,
+  UNW_PPC64_VS51    = UNW_PPC64_V19,
+  UNW_PPC64_VS52    = UNW_PPC64_V20,
+  UNW_PPC64_VS53    = UNW_PPC64_V21,
+  UNW_PPC64_VS54    = UNW_PPC64_V22,
+  UNW_PPC64_VS55    = UNW_PPC64_V23,
+  UNW_PPC64_VS56    = UNW_PPC64_V24,
+  UNW_PPC64_VS57    = UNW_PPC64_V25,
+  UNW_PPC64_VS58    = UNW_PPC64_V26,
+  UNW_PPC64_VS59    = UNW_PPC64_V27,
+  UNW_PPC64_VS60    = UNW_PPC64_V28,
+  UNW_PPC64_VS61    = UNW_PPC64_V29,
+  UNW_PPC64_VS62    = UNW_PPC64_V30,
+  UNW_PPC64_VS63    = UNW_PPC64_V31
+};
+
+// 64-bit ARM64 registers
+enum {
+  UNW_ARM64_X0  = 0,
+  UNW_ARM64_X1  = 1,
+  UNW_ARM64_X2  = 2,
+  UNW_ARM64_X3  = 3,
+  UNW_ARM64_X4  = 4,
+  UNW_ARM64_X5  = 5,
+  UNW_ARM64_X6  = 6,
+  UNW_ARM64_X7  = 7,
+  UNW_ARM64_X8  = 8,
+  UNW_ARM64_X9  = 9,
+  UNW_ARM64_X10 = 10,
+  UNW_ARM64_X11 = 11,
+  UNW_ARM64_X12 = 12,
+  UNW_ARM64_X13 = 13,
+  UNW_ARM64_X14 = 14,
+  UNW_ARM64_X15 = 15,
+  UNW_ARM64_X16 = 16,
+  UNW_ARM64_X17 = 17,
+  UNW_ARM64_X18 = 18,
+  UNW_ARM64_X19 = 19,
+  UNW_ARM64_X20 = 20,
+  UNW_ARM64_X21 = 21,
+  UNW_ARM64_X22 = 22,
+  UNW_ARM64_X23 = 23,
+  UNW_ARM64_X24 = 24,
+  UNW_ARM64_X25 = 25,
+  UNW_ARM64_X26 = 26,
+  UNW_ARM64_X27 = 27,
+  UNW_ARM64_X28 = 28,
+  UNW_ARM64_X29 = 29,
+  UNW_ARM64_FP  = 29,
+  UNW_ARM64_X30 = 30,
+  UNW_ARM64_LR  = 30,
+  UNW_ARM64_X31 = 31,
+  UNW_ARM64_SP  = 31,
+  // reserved block
+  UNW_ARM64_RA_SIGN_STATE = 34,
+  // reserved block
+  UNW_ARM64_D0  = 64,
+  UNW_ARM64_D1  = 65,
+  UNW_ARM64_D2  = 66,
+  UNW_ARM64_D3  = 67,
+  UNW_ARM64_D4  = 68,
+  UNW_ARM64_D5  = 69,
+  UNW_ARM64_D6  = 70,
+  UNW_ARM64_D7  = 71,
+  UNW_ARM64_D8  = 72,
+  UNW_ARM64_D9  = 73,
+  UNW_ARM64_D10 = 74,
+  UNW_ARM64_D11 = 75,
+  UNW_ARM64_D12 = 76,
+  UNW_ARM64_D13 = 77,
+  UNW_ARM64_D14 = 78,
+  UNW_ARM64_D15 = 79,
+  UNW_ARM64_D16 = 80,
+  UNW_ARM64_D17 = 81,
+  UNW_ARM64_D18 = 82,
+  UNW_ARM64_D19 = 83,
+  UNW_ARM64_D20 = 84,
+  UNW_ARM64_D21 = 85,
+  UNW_ARM64_D22 = 86,
+  UNW_ARM64_D23 = 87,
+  UNW_ARM64_D24 = 88,
+  UNW_ARM64_D25 = 89,
+  UNW_ARM64_D26 = 90,
+  UNW_ARM64_D27 = 91,
+  UNW_ARM64_D28 = 92,
+  UNW_ARM64_D29 = 93,
+  UNW_ARM64_D30 = 94,
+  UNW_ARM64_D31 = 95,
+};
+
+// 32-bit ARM registers. Numbers match DWARF for ARM spec #3.1 Table 1.
+// Naming scheme uses recommendations given in Note 4 for VFP-v2 and VFP-v3.
+// In this scheme, even though the 64-bit floating point registers D0-D31
+// overlap physically with the 32-bit floating pointer registers S0-S31,
+// they are given a non-overlapping range of register numbers.
+//
+// Commented out ranges are not preserved during unwinding.
+enum {
+  UNW_ARM_R0  = 0,
+  UNW_ARM_R1  = 1,
+  UNW_ARM_R2  = 2,
+  UNW_ARM_R3  = 3,
+  UNW_ARM_R4  = 4,
+  UNW_ARM_R5  = 5,
+  UNW_ARM_R6  = 6,
+  UNW_ARM_R7  = 7,
+  UNW_ARM_R8  = 8,
+  UNW_ARM_R9  = 9,
+  UNW_ARM_R10 = 10,
+  UNW_ARM_R11 = 11,
+  UNW_ARM_R12 = 12,
+  UNW_ARM_SP  = 13,  // Logical alias for UNW_REG_SP
+  UNW_ARM_R13 = 13,
+  UNW_ARM_LR  = 14,
+  UNW_ARM_R14 = 14,
+  UNW_ARM_IP  = 15,  // Logical alias for UNW_REG_IP
+  UNW_ARM_R15 = 15,
+  // 16-63 -- OBSOLETE. Used in VFP1 to represent both S0-S31 and D0-D31.
+  UNW_ARM_S0  = 64,
+  UNW_ARM_S1  = 65,
+  UNW_ARM_S2  = 66,
+  UNW_ARM_S3  = 67,
+  UNW_ARM_S4  = 68,
+  UNW_ARM_S5  = 69,
+  UNW_ARM_S6  = 70,
+  UNW_ARM_S7  = 71,
+  UNW_ARM_S8  = 72,
+  UNW_ARM_S9  = 73,
+  UNW_ARM_S10 = 74,
+  UNW_ARM_S11 = 75,
+  UNW_ARM_S12 = 76,
+  UNW_ARM_S13 = 77,
+  UNW_ARM_S14 = 78,
+  UNW_ARM_S15 = 79,
+  UNW_ARM_S16 = 80,
+  UNW_ARM_S17 = 81,
+  UNW_ARM_S18 = 82,
+  UNW_ARM_S19 = 83,
+  UNW_ARM_S20 = 84,
+  UNW_ARM_S21 = 85,
+  UNW_ARM_S22 = 86,
+  UNW_ARM_S23 = 87,
+  UNW_ARM_S24 = 88,
+  UNW_ARM_S25 = 89,
+  UNW_ARM_S26 = 90,
+  UNW_ARM_S27 = 91,
+  UNW_ARM_S28 = 92,
+  UNW_ARM_S29 = 93,
+  UNW_ARM_S30 = 94,
+  UNW_ARM_S31 = 95,
+  //  96-103 -- OBSOLETE. F0-F7. Used by the FPA system. Superseded by VFP.
+  // 104-111 -- wCGR0-wCGR7, ACC0-ACC7 (Intel wireless MMX)
+  UNW_ARM_WR0 = 112,
+  UNW_ARM_WR1 = 113,
+  UNW_ARM_WR2 = 114,
+  UNW_ARM_WR3 = 115,
+  UNW_ARM_WR4 = 116,
+  UNW_ARM_WR5 = 117,
+  UNW_ARM_WR6 = 118,
+  UNW_ARM_WR7 = 119,
+  UNW_ARM_WR8 = 120,
+  UNW_ARM_WR9 = 121,
+  UNW_ARM_WR10 = 122,
+  UNW_ARM_WR11 = 123,
+  UNW_ARM_WR12 = 124,
+  UNW_ARM_WR13 = 125,
+  UNW_ARM_WR14 = 126,
+  UNW_ARM_WR15 = 127,
+  // 128-133 -- SPSR, SPSR_{FIQ|IRQ|ABT|UND|SVC}
+  // 134-143 -- Reserved
+  // 144-150 -- R8_USR-R14_USR
+  // 151-157 -- R8_FIQ-R14_FIQ
+  // 158-159 -- R13_IRQ-R14_IRQ
+  // 160-161 -- R13_ABT-R14_ABT
+  // 162-163 -- R13_UND-R14_UND
+  // 164-165 -- R13_SVC-R14_SVC
+  // 166-191 -- Reserved
+  UNW_ARM_WC0 = 192,
+  UNW_ARM_WC1 = 193,
+  UNW_ARM_WC2 = 194,
+  UNW_ARM_WC3 = 195,
+  // 196-199 -- wC4-wC7 (Intel wireless MMX control)
+  // 200-255 -- Reserved
+  UNW_ARM_D0  = 256,
+  UNW_ARM_D1  = 257,
+  UNW_ARM_D2  = 258,
+  UNW_ARM_D3  = 259,
+  UNW_ARM_D4  = 260,
+  UNW_ARM_D5  = 261,
+  UNW_ARM_D6  = 262,
+  UNW_ARM_D7  = 263,
+  UNW_ARM_D8  = 264,
+  UNW_ARM_D9  = 265,
+  UNW_ARM_D10 = 266,
+  UNW_ARM_D11 = 267,
+  UNW_ARM_D12 = 268,
+  UNW_ARM_D13 = 269,
+  UNW_ARM_D14 = 270,
+  UNW_ARM_D15 = 271,
+  UNW_ARM_D16 = 272,
+  UNW_ARM_D17 = 273,
+  UNW_ARM_D18 = 274,
+  UNW_ARM_D19 = 275,
+  UNW_ARM_D20 = 276,
+  UNW_ARM_D21 = 277,
+  UNW_ARM_D22 = 278,
+  UNW_ARM_D23 = 279,
+  UNW_ARM_D24 = 280,
+  UNW_ARM_D25 = 281,
+  UNW_ARM_D26 = 282,
+  UNW_ARM_D27 = 283,
+  UNW_ARM_D28 = 284,
+  UNW_ARM_D29 = 285,
+  UNW_ARM_D30 = 286,
+  UNW_ARM_D31 = 287,
+  // 288-319 -- Reserved for VFP/Neon
+  // 320-8191 -- Reserved
+  // 8192-16383 -- Unspecified vendor co-processor register.
+};
+
+// OpenRISC1000 register numbers
+enum {
+  UNW_OR1K_R0  = 0,
+  UNW_OR1K_R1  = 1,
+  UNW_OR1K_R2  = 2,
+  UNW_OR1K_R3  = 3,
+  UNW_OR1K_R4  = 4,
+  UNW_OR1K_R5  = 5,
+  UNW_OR1K_R6  = 6,
+  UNW_OR1K_R7  = 7,
+  UNW_OR1K_R8  = 8,
+  UNW_OR1K_R9  = 9,
+  UNW_OR1K_R10 = 10,
+  UNW_OR1K_R11 = 11,
+  UNW_OR1K_R12 = 12,
+  UNW_OR1K_R13 = 13,
+  UNW_OR1K_R14 = 14,
+  UNW_OR1K_R15 = 15,
+  UNW_OR1K_R16 = 16,
+  UNW_OR1K_R17 = 17,
+  UNW_OR1K_R18 = 18,
+  UNW_OR1K_R19 = 19,
+  UNW_OR1K_R20 = 20,
+  UNW_OR1K_R21 = 21,
+  UNW_OR1K_R22 = 22,
+  UNW_OR1K_R23 = 23,
+  UNW_OR1K_R24 = 24,
+  UNW_OR1K_R25 = 25,
+  UNW_OR1K_R26 = 26,
+  UNW_OR1K_R27 = 27,
+  UNW_OR1K_R28 = 28,
+  UNW_OR1K_R29 = 29,
+  UNW_OR1K_R30 = 30,
+  UNW_OR1K_R31 = 31,
+  UNW_OR1K_EPCR = 32,
+};
+
+// MIPS registers
+enum {
+  UNW_MIPS_R0  = 0,
+  UNW_MIPS_R1  = 1,
+  UNW_MIPS_R2  = 2,
+  UNW_MIPS_R3  = 3,
+  UNW_MIPS_R4  = 4,
+  UNW_MIPS_R5  = 5,
+  UNW_MIPS_R6  = 6,
+  UNW_MIPS_R7  = 7,
+  UNW_MIPS_R8  = 8,
+  UNW_MIPS_R9  = 9,
+  UNW_MIPS_R10 = 10,
+  UNW_MIPS_R11 = 11,
+  UNW_MIPS_R12 = 12,
+  UNW_MIPS_R13 = 13,
+  UNW_MIPS_R14 = 14,
+  UNW_MIPS_R15 = 15,
+  UNW_MIPS_R16 = 16,
+  UNW_MIPS_R17 = 17,
+  UNW_MIPS_R18 = 18,
+  UNW_MIPS_R19 = 19,
+  UNW_MIPS_R20 = 20,
+  UNW_MIPS_R21 = 21,
+  UNW_MIPS_R22 = 22,
+  UNW_MIPS_R23 = 23,
+  UNW_MIPS_R24 = 24,
+  UNW_MIPS_R25 = 25,
+  UNW_MIPS_R26 = 26,
+  UNW_MIPS_R27 = 27,
+  UNW_MIPS_R28 = 28,
+  UNW_MIPS_R29 = 29,
+  UNW_MIPS_R30 = 30,
+  UNW_MIPS_R31 = 31,
+  UNW_MIPS_F0  = 32,
+  UNW_MIPS_F1  = 33,
+  UNW_MIPS_F2  = 34,
+  UNW_MIPS_F3  = 35,
+  UNW_MIPS_F4  = 36,
+  UNW_MIPS_F5  = 37,
+  UNW_MIPS_F6  = 38,
+  UNW_MIPS_F7  = 39,
+  UNW_MIPS_F8  = 40,
+  UNW_MIPS_F9  = 41,
+  UNW_MIPS_F10 = 42,
+  UNW_MIPS_F11 = 43,
+  UNW_MIPS_F12 = 44,
+  UNW_MIPS_F13 = 45,
+  UNW_MIPS_F14 = 46,
+  UNW_MIPS_F15 = 47,
+  UNW_MIPS_F16 = 48,
+  UNW_MIPS_F17 = 49,
+  UNW_MIPS_F18 = 50,
+  UNW_MIPS_F19 = 51,
+  UNW_MIPS_F20 = 52,
+  UNW_MIPS_F21 = 53,
+  UNW_MIPS_F22 = 54,
+  UNW_MIPS_F23 = 55,
+  UNW_MIPS_F24 = 56,
+  UNW_MIPS_F25 = 57,
+  UNW_MIPS_F26 = 58,
+  UNW_MIPS_F27 = 59,
+  UNW_MIPS_F28 = 60,
+  UNW_MIPS_F29 = 61,
+  UNW_MIPS_F30 = 62,
+  UNW_MIPS_F31 = 63,
+  UNW_MIPS_HI = 64,
+  UNW_MIPS_LO = 65,
+};
+
+// SPARC registers
+enum {
+  UNW_SPARC_G0 = 0,
+  UNW_SPARC_G1 = 1,
+  UNW_SPARC_G2 = 2,
+  UNW_SPARC_G3 = 3,
+  UNW_SPARC_G4 = 4,
+  UNW_SPARC_G5 = 5,
+  UNW_SPARC_G6 = 6,
+  UNW_SPARC_G7 = 7,
+  UNW_SPARC_O0 = 8,
+  UNW_SPARC_O1 = 9,
+  UNW_SPARC_O2 = 10,
+  UNW_SPARC_O3 = 11,
+  UNW_SPARC_O4 = 12,
+  UNW_SPARC_O5 = 13,
+  UNW_SPARC_O6 = 14,
+  UNW_SPARC_O7 = 15,
+  UNW_SPARC_L0 = 16,
+  UNW_SPARC_L1 = 17,
+  UNW_SPARC_L2 = 18,
+  UNW_SPARC_L3 = 19,
+  UNW_SPARC_L4 = 20,
+  UNW_SPARC_L5 = 21,
+  UNW_SPARC_L6 = 22,
+  UNW_SPARC_L7 = 23,
+  UNW_SPARC_I0 = 24,
+  UNW_SPARC_I1 = 25,
+  UNW_SPARC_I2 = 26,
+  UNW_SPARC_I3 = 27,
+  UNW_SPARC_I4 = 28,
+  UNW_SPARC_I5 = 29,
+  UNW_SPARC_I6 = 30,
+  UNW_SPARC_I7 = 31,
+};
+
+#endif
diff --git a/src/coreclr/src/nativeaot/libunwind/include/mach-o/compact_unwind_encoding.h b/src/coreclr/src/nativeaot/libunwind/include/mach-o/compact_unwind_encoding.h
new file mode 100644
index 0000000000000..5301b1055ef93
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/include/mach-o/compact_unwind_encoding.h
@@ -0,0 +1,477 @@
+//===------------------ mach-o/compact_unwind_encoding.h ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// Darwin's alternative to DWARF based unwind encodings.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef __COMPACT_UNWIND_ENCODING__
+#define __COMPACT_UNWIND_ENCODING__
+
+#include <stdint.h>
+
+//
+// Compilers can emit standard DWARF FDEs in the __TEXT,__eh_frame section
+// of object files. Or compilers can emit compact unwind information in
+// the __LD,__compact_unwind section.
+//
+// When the linker creates a final linked image, it will create a
+// __TEXT,__unwind_info section.  This section is a small and fast way for the
+// runtime to access unwind info for any given function.  If the compiler
+// emitted compact unwind info for the function, that compact unwind info will
+// be encoded in the __TEXT,__unwind_info section. If the compiler emitted
+// DWARF unwind info, the __TEXT,__unwind_info section will contain the offset
+// of the FDE in the __TEXT,__eh_frame section in the final linked image.
+//
+// Note: Previously, the linker would transform some DWARF unwind infos into
+//       compact unwind info.  But that is fragile and no longer done.
+
+
+//
+// The compact unwind endoding is a 32-bit value which encoded in an
+// architecture specific way, which registers to restore from where, and how
+// to unwind out of the function.
+//
+typedef uint32_t compact_unwind_encoding_t;
+
+
+// architecture independent bits
+enum {
+    UNWIND_IS_NOT_FUNCTION_START           = 0x80000000,
+    UNWIND_HAS_LSDA                        = 0x40000000,
+    UNWIND_PERSONALITY_MASK                = 0x30000000,
+};
+
+
+
+
+//
+// x86
+//
+// 1-bit: start
+// 1-bit: has lsda
+// 2-bit: personality index
+//
+// 4-bits: 0=old, 1=ebp based, 2=stack-imm, 3=stack-ind, 4=DWARF
+//  ebp based:
+//        15-bits (5*3-bits per reg) register permutation
+//        8-bits for stack offset
+//  frameless:
+//        8-bits stack size
+//        3-bits stack adjust
+//        3-bits register count
+//        10-bits register permutation
+//
+enum {
+    UNWIND_X86_MODE_MASK                         = 0x0F000000,
+    UNWIND_X86_MODE_EBP_FRAME                    = 0x01000000,
+    UNWIND_X86_MODE_STACK_IMMD                   = 0x02000000,
+    UNWIND_X86_MODE_STACK_IND                    = 0x03000000,
+    UNWIND_X86_MODE_DWARF                        = 0x04000000,
+
+    UNWIND_X86_EBP_FRAME_REGISTERS               = 0x00007FFF,
+    UNWIND_X86_EBP_FRAME_OFFSET                  = 0x00FF0000,
+
+    UNWIND_X86_FRAMELESS_STACK_SIZE              = 0x00FF0000,
+    UNWIND_X86_FRAMELESS_STACK_ADJUST            = 0x0000E000,
+    UNWIND_X86_FRAMELESS_STACK_REG_COUNT         = 0x00001C00,
+    UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION   = 0x000003FF,
+
+    UNWIND_X86_DWARF_SECTION_OFFSET              = 0x00FFFFFF,
+};
+
+enum {
+    UNWIND_X86_REG_NONE     = 0,
+    UNWIND_X86_REG_EBX      = 1,
+    UNWIND_X86_REG_ECX      = 2,
+    UNWIND_X86_REG_EDX      = 3,
+    UNWIND_X86_REG_EDI      = 4,
+    UNWIND_X86_REG_ESI      = 5,
+    UNWIND_X86_REG_EBP      = 6,
+};
+
+//
+// For x86 there are four modes for the compact unwind encoding:
+// UNWIND_X86_MODE_EBP_FRAME:
+//    EBP based frame where EBP is push on stack immediately after return address,
+//    then ESP is moved to EBP. Thus, to unwind ESP is restored with the current
+//    EPB value, then EBP is restored by popping off the stack, and the return
+//    is done by popping the stack once more into the pc.
+//    All non-volatile registers that need to be restored must have been saved
+//    in a small range in the stack that starts EBP-4 to EBP-1020.  The offset/4
+//    is encoded in the UNWIND_X86_EBP_FRAME_OFFSET bits.  The registers saved
+//    are encoded in the UNWIND_X86_EBP_FRAME_REGISTERS bits as five 3-bit entries.
+//    Each entry contains which register to restore.
+// UNWIND_X86_MODE_STACK_IMMD:
+//    A "frameless" (EBP not used as frame pointer) function with a small 
+//    constant stack size.  To return, a constant (encoded in the compact
+//    unwind encoding) is added to the ESP. Then the return is done by
+//    popping the stack into the pc.
+//    All non-volatile registers that need to be restored must have been saved
+//    on the stack immediately after the return address.  The stack_size/4 is
+//    encoded in the UNWIND_X86_FRAMELESS_STACK_SIZE (max stack size is 1024).
+//    The number of registers saved is encoded in UNWIND_X86_FRAMELESS_STACK_REG_COUNT.
+//    UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION constains which registers were
+//    saved and their order.
+// UNWIND_X86_MODE_STACK_IND:
+//    A "frameless" (EBP not used as frame pointer) function large constant 
+//    stack size.  This case is like the previous, except the stack size is too
+//    large to encode in the compact unwind encoding.  Instead it requires that 
+//    the function contains "subl $nnnnnnnn,ESP" in its prolog.  The compact 
+//    encoding contains the offset to the nnnnnnnn value in the function in
+//    UNWIND_X86_FRAMELESS_STACK_SIZE.  
+// UNWIND_X86_MODE_DWARF:
+//    No compact unwind encoding is available.  Instead the low 24-bits of the
+//    compact encoding is the offset of the DWARF FDE in the __eh_frame section.
+//    This mode is never used in object files.  It is only generated by the 
+//    linker in final linked images which have only DWARF unwind info for a
+//    function.
+//
+// The permutation encoding is a Lehmer code sequence encoded into a
+// single variable-base number so we can encode the ordering of up to
+// six registers in a 10-bit space.
+//
+// The following is the algorithm used to create the permutation encoding used
+// with frameless stacks.  It is passed the number of registers to be saved and
+// an array of the register numbers saved.
+//
+//uint32_t permute_encode(uint32_t registerCount, const uint32_t registers[6])
+//{
+//    uint32_t renumregs[6];
+//    for (int i=6-registerCount; i < 6; ++i) {
+//        int countless = 0;
+//        for (int j=6-registerCount; j < i; ++j) {
+//            if ( registers[j] < registers[i] )
+//                ++countless;
+//        }
+//        renumregs[i] = registers[i] - countless -1;
+//    }
+//    uint32_t permutationEncoding = 0;
+//    switch ( registerCount ) {
+//        case 6:
+//            permutationEncoding |= (120*renumregs[0] + 24*renumregs[1]
+//                                    + 6*renumregs[2] + 2*renumregs[3]
+//                                      + renumregs[4]);
+//            break;
+//        case 5:
+//            permutationEncoding |= (120*renumregs[1] + 24*renumregs[2]
+//                                    + 6*renumregs[3] + 2*renumregs[4]
+//                                      + renumregs[5]);
+//            break;
+//        case 4:
+//            permutationEncoding |= (60*renumregs[2] + 12*renumregs[3]
+//                                   + 3*renumregs[4] + renumregs[5]);
+//            break;
+//        case 3:
+//            permutationEncoding |= (20*renumregs[3] + 4*renumregs[4]
+//                                     + renumregs[5]);
+//            break;
+//        case 2:
+//            permutationEncoding |= (5*renumregs[4] + renumregs[5]);
+//            break;
+//        case 1:
+//            permutationEncoding |= (renumregs[5]);
+//            break;
+//    }
+//    return permutationEncoding;
+//}
+//
+
+
+
+
+//
+// x86_64
+//
+// 1-bit: start
+// 1-bit: has lsda
+// 2-bit: personality index
+//
+// 4-bits: 0=old, 1=rbp based, 2=stack-imm, 3=stack-ind, 4=DWARF
+//  rbp based:
+//        15-bits (5*3-bits per reg) register permutation
+//        8-bits for stack offset
+//  frameless:
+//        8-bits stack size
+//        3-bits stack adjust
+//        3-bits register count
+//        10-bits register permutation
+//
+enum {
+    UNWIND_X86_64_MODE_MASK                         = 0x0F000000,
+    UNWIND_X86_64_MODE_RBP_FRAME                    = 0x01000000,
+    UNWIND_X86_64_MODE_STACK_IMMD                   = 0x02000000,
+    UNWIND_X86_64_MODE_STACK_IND                    = 0x03000000,
+    UNWIND_X86_64_MODE_DWARF                        = 0x04000000,
+
+    UNWIND_X86_64_RBP_FRAME_REGISTERS               = 0x00007FFF,
+    UNWIND_X86_64_RBP_FRAME_OFFSET                  = 0x00FF0000,
+
+    UNWIND_X86_64_FRAMELESS_STACK_SIZE              = 0x00FF0000,
+    UNWIND_X86_64_FRAMELESS_STACK_ADJUST            = 0x0000E000,
+    UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT         = 0x00001C00,
+    UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION   = 0x000003FF,
+
+    UNWIND_X86_64_DWARF_SECTION_OFFSET              = 0x00FFFFFF,
+};
+
+enum {
+    UNWIND_X86_64_REG_NONE       = 0,
+    UNWIND_X86_64_REG_RBX        = 1,
+    UNWIND_X86_64_REG_R12        = 2,
+    UNWIND_X86_64_REG_R13        = 3,
+    UNWIND_X86_64_REG_R14        = 4,
+    UNWIND_X86_64_REG_R15        = 5,
+    UNWIND_X86_64_REG_RBP        = 6,
+};
+//
+// For x86_64 there are four modes for the compact unwind encoding:
+// UNWIND_X86_64_MODE_RBP_FRAME:
+//    RBP based frame where RBP is push on stack immediately after return address,
+//    then RSP is moved to RBP. Thus, to unwind RSP is restored with the current 
+//    EPB value, then RBP is restored by popping off the stack, and the return 
+//    is done by popping the stack once more into the pc.
+//    All non-volatile registers that need to be restored must have been saved
+//    in a small range in the stack that starts RBP-8 to RBP-2040.  The offset/8 
+//    is encoded in the UNWIND_X86_64_RBP_FRAME_OFFSET bits.  The registers saved
+//    are encoded in the UNWIND_X86_64_RBP_FRAME_REGISTERS bits as five 3-bit entries.
+//    Each entry contains which register to restore.  
+// UNWIND_X86_64_MODE_STACK_IMMD:
+//    A "frameless" (RBP not used as frame pointer) function with a small 
+//    constant stack size.  To return, a constant (encoded in the compact 
+//    unwind encoding) is added to the RSP. Then the return is done by 
+//    popping the stack into the pc.
+//    All non-volatile registers that need to be restored must have been saved
+//    on the stack immediately after the return address.  The stack_size/8 is
+//    encoded in the UNWIND_X86_64_FRAMELESS_STACK_SIZE (max stack size is 2048).
+//    The number of registers saved is encoded in UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT.
+//    UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION constains which registers were
+//    saved and their order.  
+// UNWIND_X86_64_MODE_STACK_IND:
+//    A "frameless" (RBP not used as frame pointer) function large constant 
+//    stack size.  This case is like the previous, except the stack size is too
+//    large to encode in the compact unwind encoding.  Instead it requires that 
+//    the function contains "subq $nnnnnnnn,RSP" in its prolog.  The compact 
+//    encoding contains the offset to the nnnnnnnn value in the function in
+//    UNWIND_X86_64_FRAMELESS_STACK_SIZE.  
+// UNWIND_X86_64_MODE_DWARF:
+//    No compact unwind encoding is available.  Instead the low 24-bits of the
+//    compact encoding is the offset of the DWARF FDE in the __eh_frame section.
+//    This mode is never used in object files.  It is only generated by the 
+//    linker in final linked images which have only DWARF unwind info for a
+//    function.
+//
+
+
+// ARM64
+//
+// 1-bit: start
+// 1-bit: has lsda
+// 2-bit: personality index
+//
+// 4-bits: 4=frame-based, 3=DWARF, 2=frameless
+//  frameless:
+//        12-bits of stack size
+//  frame-based:
+//        4-bits D reg pairs saved
+//        5-bits X reg pairs saved
+//  DWARF:
+//        24-bits offset of DWARF FDE in __eh_frame section
+//
+enum {
+    UNWIND_ARM64_MODE_MASK                     = 0x0F000000,
+    UNWIND_ARM64_MODE_FRAMELESS                = 0x02000000,
+    UNWIND_ARM64_MODE_DWARF                    = 0x03000000,
+    UNWIND_ARM64_MODE_FRAME                    = 0x04000000,
+
+    UNWIND_ARM64_FRAME_X19_X20_PAIR            = 0x00000001,
+    UNWIND_ARM64_FRAME_X21_X22_PAIR            = 0x00000002,
+    UNWIND_ARM64_FRAME_X23_X24_PAIR            = 0x00000004,
+    UNWIND_ARM64_FRAME_X25_X26_PAIR            = 0x00000008,
+    UNWIND_ARM64_FRAME_X27_X28_PAIR            = 0x00000010,
+    UNWIND_ARM64_FRAME_D8_D9_PAIR              = 0x00000100,
+    UNWIND_ARM64_FRAME_D10_D11_PAIR            = 0x00000200,
+    UNWIND_ARM64_FRAME_D12_D13_PAIR            = 0x00000400,
+    UNWIND_ARM64_FRAME_D14_D15_PAIR            = 0x00000800,
+
+    UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK     = 0x00FFF000,
+    UNWIND_ARM64_DWARF_SECTION_OFFSET          = 0x00FFFFFF,
+};
+// For arm64 there are three modes for the compact unwind encoding:
+// UNWIND_ARM64_MODE_FRAME:
+//    This is a standard arm64 prolog where FP/LR are immediately pushed on the
+//    stack, then SP is copied to FP. If there are any non-volatile registers
+//    saved, then are copied into the stack frame in pairs in a contiguous
+//    range right below the saved FP/LR pair.  Any subset of the five X pairs 
+//    and four D pairs can be saved, but the memory layout must be in register
+//    number order.  
+// UNWIND_ARM64_MODE_FRAMELESS:
+//    A "frameless" leaf function, where FP/LR are not saved. The return address 
+//    remains in LR throughout the function. If any non-volatile registers
+//    are saved, they must be pushed onto the stack before any stack space is
+//    allocated for local variables.  The stack sized (including any saved
+//    non-volatile registers) divided by 16 is encoded in the bits 
+//    UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK.
+// UNWIND_ARM64_MODE_DWARF:
+//    No compact unwind encoding is available.  Instead the low 24-bits of the
+//    compact encoding is the offset of the DWARF FDE in the __eh_frame section.
+//    This mode is never used in object files.  It is only generated by the 
+//    linker in final linked images which have only DWARF unwind info for a
+//    function.
+//
+
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+//  Relocatable Object Files: __LD,__compact_unwind
+//
+////////////////////////////////////////////////////////////////////////////////
+
+//
+// A compiler can generated compact unwind information for a function by adding
+// a "row" to the __LD,__compact_unwind section.  This section has the 
+// S_ATTR_DEBUG bit set, so the section will be ignored by older linkers. 
+// It is removed by the new linker, so never ends up in final executables. 
+// This section is a table, initially with one row per function (that needs 
+// unwind info).  The table columns and some conceptual entries are:
+//
+//     range-start               pointer to start of function/range
+//     range-length              
+//     compact-unwind-encoding   32-bit encoding  
+//     personality-function      or zero if no personality function
+//     lsda                      or zero if no LSDA data
+//
+// The length and encoding fields are 32-bits.  The other are all pointer sized. 
+//
+// In x86_64 assembly, these entry would look like:
+//
+//     .section __LD,__compact_unwind,regular,debug
+//
+//     #compact unwind for _foo
+//     .quad    _foo
+//     .set     L1,LfooEnd-_foo
+//     .long    L1
+//     .long    0x01010001
+//     .quad    0
+//     .quad    0
+//
+//     #compact unwind for _bar
+//     .quad    _bar
+//     .set     L2,LbarEnd-_bar
+//     .long    L2
+//     .long    0x01020011
+//     .quad    __gxx_personality
+//     .quad    except_tab1
+//
+//
+// Notes: There is no need for any labels in the the __compact_unwind section.  
+//        The use of the .set directive is to force the evaluation of the 
+//        range-length at assembly time, instead of generating relocations.
+//
+// To support future compiler optimizations where which non-volatile registers 
+// are saved changes within a function (e.g. delay saving non-volatiles until
+// necessary), there can by multiple lines in the __compact_unwind table for one
+// function, each with a different (non-overlapping) range and each with 
+// different compact unwind encodings that correspond to the non-volatiles 
+// saved at that range of the function.
+//
+// If a particular function is so wacky that there is no compact unwind way
+// to encode it, then the compiler can emit traditional DWARF unwind info.  
+// The runtime will use which ever is available.
+//
+// Runtime support for compact unwind encodings are only available on 10.6 
+// and later.  So, the compiler should not generate it when targeting pre-10.6. 
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+//  Final Linked Images: __TEXT,__unwind_info
+//
+////////////////////////////////////////////////////////////////////////////////
+
+//
+// The __TEXT,__unwind_info section is laid out for an efficient two level lookup.
+// The header of the section contains a coarse index that maps function address
+// to the page (4096 byte block) containing the unwind info for that function.  
+//
+
+#define UNWIND_SECTION_VERSION 1
+struct unwind_info_section_header
+{
+    uint32_t    version;            // UNWIND_SECTION_VERSION
+    uint32_t    commonEncodingsArraySectionOffset;
+    uint32_t    commonEncodingsArrayCount;
+    uint32_t    personalityArraySectionOffset;
+    uint32_t    personalityArrayCount;
+    uint32_t    indexSectionOffset;
+    uint32_t    indexCount;
+    // compact_unwind_encoding_t[]
+    // uint32_t personalities[]
+    // unwind_info_section_header_index_entry[]
+    // unwind_info_section_header_lsda_index_entry[]
+};
+
+struct unwind_info_section_header_index_entry
+{
+    uint32_t        functionOffset;
+    uint32_t        secondLevelPagesSectionOffset;  // section offset to start of regular or compress page
+    uint32_t        lsdaIndexArraySectionOffset;    // section offset to start of lsda_index array for this range
+};
+
+struct unwind_info_section_header_lsda_index_entry
+{
+    uint32_t        functionOffset;
+    uint32_t        lsdaOffset;
+};
+
+//
+// There are two kinds of second level index pages: regular and compressed.
+// A compressed page can hold up to 1021 entries, but it cannot be used
+// if too many different encoding types are used.  The regular page holds
+// 511 entries.
+//
+
+struct unwind_info_regular_second_level_entry
+{
+    uint32_t                    functionOffset;
+    compact_unwind_encoding_t    encoding;
+};
+
+#define UNWIND_SECOND_LEVEL_REGULAR 2
+struct unwind_info_regular_second_level_page_header
+{
+    uint32_t    kind;    // UNWIND_SECOND_LEVEL_REGULAR
+    uint16_t    entryPageOffset;
+    uint16_t    entryCount;
+    // entry array
+};
+
+#define UNWIND_SECOND_LEVEL_COMPRESSED 3
+struct unwind_info_compressed_second_level_page_header
+{
+    uint32_t    kind;    // UNWIND_SECOND_LEVEL_COMPRESSED
+    uint16_t    entryPageOffset;
+    uint16_t    entryCount;
+    uint16_t    encodingsPageOffset;
+    uint16_t    encodingsCount;
+    // 32-bit entry array
+    // encodings array
+};
+
+#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(entry)            (entry & 0x00FFFFFF)
+#define UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(entry)        ((entry >> 24) & 0xFF)
+
+
+
+#endif
+
diff --git a/src/coreclr/src/nativeaot/libunwind/include/unwind.h b/src/coreclr/src/nativeaot/libunwind/include/unwind.h
new file mode 100644
index 0000000000000..47d303c3f095a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/include/unwind.h
@@ -0,0 +1,400 @@
+//===------------------------------- unwind.h -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// C++ ABI Level 1 ABI documented at:
+//   https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __UNWIND_H__
+#define __UNWIND_H__
+
+#include <__libunwind_config.h>
+
+#include <stdint.h>
+#include <stddef.h>
+
+#if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__) && defined(_WIN32)
+#include <windows.h>
+#include <ntverp.h>
+#endif
+
+#if defined(__APPLE__)
+#define LIBUNWIND_UNAVAIL __attribute__ (( unavailable ))
+#else
+#define LIBUNWIND_UNAVAIL
+#endif
+
+typedef enum {
+  _URC_NO_REASON = 0,
+  _URC_OK = 0,
+  _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
+  _URC_FATAL_PHASE2_ERROR = 2,
+  _URC_FATAL_PHASE1_ERROR = 3,
+  _URC_NORMAL_STOP = 4,
+  _URC_END_OF_STACK = 5,
+  _URC_HANDLER_FOUND = 6,
+  _URC_INSTALL_CONTEXT = 7,
+  _URC_CONTINUE_UNWIND = 8,
+#if defined(_LIBUNWIND_ARM_EHABI)
+  _URC_FAILURE = 9
+#endif
+} _Unwind_Reason_Code;
+
+typedef enum {
+  _UA_SEARCH_PHASE = 1,
+  _UA_CLEANUP_PHASE = 2,
+  _UA_HANDLER_FRAME = 4,
+  _UA_FORCE_UNWIND = 8,
+  _UA_END_OF_STACK = 16 // gcc extension to C++ ABI
+} _Unwind_Action;
+
+typedef struct _Unwind_Context _Unwind_Context;   // opaque
+
+#if defined(_LIBUNWIND_ARM_EHABI)
+typedef uint32_t _Unwind_State;
+
+static const _Unwind_State _US_VIRTUAL_UNWIND_FRAME   = 0;
+static const _Unwind_State _US_UNWIND_FRAME_STARTING  = 1;
+static const _Unwind_State _US_UNWIND_FRAME_RESUME    = 2;
+static const _Unwind_State _US_ACTION_MASK            = 3;
+/* Undocumented flag for force unwinding. */
+static const _Unwind_State _US_FORCE_UNWIND           = 8;
+
+typedef uint32_t _Unwind_EHT_Header;
+
+struct _Unwind_Control_Block;
+typedef struct _Unwind_Control_Block _Unwind_Control_Block;
+typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */
+
+struct _Unwind_Control_Block {
+  uint64_t exception_class;
+  void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block*);
+
+  /* Unwinder cache, private fields for the unwinder's use */
+  struct {
+    uint32_t reserved1; /* init reserved1 to 0, then don't touch */
+    uint32_t reserved2;
+    uint32_t reserved3;
+    uint32_t reserved4;
+    uint32_t reserved5;
+  } unwinder_cache;
+
+  /* Propagation barrier cache (valid after phase 1): */
+  struct {
+    uint32_t sp;
+    uint32_t bitpattern[5];
+  } barrier_cache;
+
+  /* Cleanup cache (preserved over cleanup): */
+  struct {
+    uint32_t bitpattern[4];
+  } cleanup_cache;
+
+  /* Pr cache (for pr's benefit): */
+  struct {
+    uint32_t fnstart; /* function start address */
+    _Unwind_EHT_Header* ehtp; /* pointer to EHT entry header word */
+    uint32_t additional;
+    uint32_t reserved1;
+  } pr_cache;
+
+  long long int :0; /* Enforce the 8-byte alignment */
+} __attribute__((__aligned__(8)));
+
+typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)
+      (_Unwind_State state,
+       _Unwind_Exception* exceptionObject,
+       struct _Unwind_Context* context);
+
+typedef _Unwind_Reason_Code (*__personality_routine)
+      (_Unwind_State state,
+       _Unwind_Exception* exceptionObject,
+       struct _Unwind_Context* context);
+#else
+struct _Unwind_Context;   // opaque
+struct _Unwind_Exception; // forward declaration
+typedef struct _Unwind_Exception _Unwind_Exception;
+
+struct _Unwind_Exception {
+  uint64_t exception_class;
+  void (*exception_cleanup)(_Unwind_Reason_Code reason,
+                            _Unwind_Exception *exc);
+#if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+  uintptr_t private_[6];
+#else
+  uintptr_t private_1; // non-zero means forced unwind
+  uintptr_t private_2; // holds sp that phase1 found for phase2 to use
+#endif
+#if __SIZEOF_POINTER__ == 4
+  // The implementation of _Unwind_Exception uses an attribute mode on the
+  // above fields which has the side effect of causing this whole struct to
+  // round up to 32 bytes in size (48 with SEH). To be more explicit, we add
+  // pad fields added for binary compatibility.
+  uint32_t reserved[3];
+#endif
+  // The Itanium ABI requires that _Unwind_Exception objects are "double-word
+  // aligned".  GCC has interpreted this to mean "use the maximum useful
+  // alignment for the target"; so do we.
+} __attribute__((__aligned__));
+
+typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)
+    (int version,
+     _Unwind_Action actions,
+     uint64_t exceptionClass,
+     _Unwind_Exception* exceptionObject,
+     struct _Unwind_Context* context,
+     void* stop_parameter );
+
+typedef _Unwind_Reason_Code (*__personality_routine)
+      (int version,
+       _Unwind_Action actions,
+       uint64_t exceptionClass,
+       _Unwind_Exception* exceptionObject,
+       struct _Unwind_Context* context);
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//
+// The following are the base functions documented by the C++ ABI
+//
+#ifdef __USING_SJLJ_EXCEPTIONS__
+extern _Unwind_Reason_Code
+    _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object);
+extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object);
+#else
+extern _Unwind_Reason_Code
+    _Unwind_RaiseException(_Unwind_Exception *exception_object);
+extern void _Unwind_Resume(_Unwind_Exception *exception_object);
+#endif
+extern void _Unwind_DeleteException(_Unwind_Exception *exception_object);
+
+#if defined(_LIBUNWIND_ARM_EHABI)
+typedef enum {
+  _UVRSC_CORE = 0, /* integer register */
+  _UVRSC_VFP = 1, /* vfp */
+  _UVRSC_WMMXD = 3, /* Intel WMMX data register */
+  _UVRSC_WMMXC = 4 /* Intel WMMX control register */
+} _Unwind_VRS_RegClass;
+
+typedef enum {
+  _UVRSD_UINT32 = 0,
+  _UVRSD_VFPX = 1,
+  _UVRSD_UINT64 = 3,
+  _UVRSD_FLOAT = 4,
+  _UVRSD_DOUBLE = 5
+} _Unwind_VRS_DataRepresentation;
+
+typedef enum {
+  _UVRSR_OK = 0,
+  _UVRSR_NOT_IMPLEMENTED = 1,
+  _UVRSR_FAILED = 2
+} _Unwind_VRS_Result;
+
+extern void _Unwind_Complete(_Unwind_Exception* exception_object);
+
+extern _Unwind_VRS_Result
+_Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+                uint32_t regno, _Unwind_VRS_DataRepresentation representation,
+                void *valuep);
+
+extern _Unwind_VRS_Result
+_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+                uint32_t regno, _Unwind_VRS_DataRepresentation representation,
+                void *valuep, uint32_t *pos);
+
+extern _Unwind_VRS_Result
+_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+                uint32_t discriminator,
+                _Unwind_VRS_DataRepresentation representation);
+#endif
+
+#if !defined(_LIBUNWIND_ARM_EHABI)
+
+extern uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index);
+extern void _Unwind_SetGR(struct _Unwind_Context *context, int index,
+                          uintptr_t new_value, uintptr_t *pos);
+extern uintptr_t _Unwind_GetIP(struct _Unwind_Context *context);
+extern void _Unwind_SetIP(struct _Unwind_Context *, uintptr_t new_value);
+
+#else  // defined(_LIBUNWIND_ARM_EHABI)
+
+#if defined(_LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE)
+#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 extern
+#else
+#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 static __inline__
+#endif
+
+// These are de facto helper functions for ARM, which delegate the function
+// calls to _Unwind_VRS_Get/Set().  These are not a part of ARM EHABI
+// specification, thus these function MUST be inlined.  Please don't replace
+// these with the "extern" function declaration; otherwise, the program
+// including this <unwind.h> header won't be ABI compatible and will result in
+// link error when we are linking the program with libgcc.
+
+_LIBUNWIND_EXPORT_UNWIND_LEVEL1
+uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index) {
+  uintptr_t value = 0;
+  _Unwind_VRS_Get(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value);
+  return value;
+}
+
+_LIBUNWIND_EXPORT_UNWIND_LEVEL1
+void _Unwind_SetGR(struct _Unwind_Context *context, int index,
+                   uintptr_t value,uintptr_t *pos) {
+  _Unwind_VRS_Set(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value, pos);
+}
+
+_LIBUNWIND_EXPORT_UNWIND_LEVEL1
+uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) {
+  // remove the thumb-bit before returning
+  return _Unwind_GetGR(context, 15) & (~(uintptr_t)0x1);
+}
+
+_LIBUNWIND_EXPORT_UNWIND_LEVEL1
+void _Unwind_SetIP(struct _Unwind_Context *context, uintptr_t value) {
+  uintptr_t thumb_bit = _Unwind_GetGR(context, 15) & ((uintptr_t)0x1);
+  _Unwind_SetGR(context, 15, value | thumb_bit, NULL);
+}
+#endif  // defined(_LIBUNWIND_ARM_EHABI)
+
+extern uintptr_t _Unwind_GetRegionStart(struct _Unwind_Context *context);
+extern uintptr_t
+    _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context);
+#ifdef __USING_SJLJ_EXCEPTIONS__
+extern _Unwind_Reason_Code
+    _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *exception_object,
+                              _Unwind_Stop_Fn stop, void *stop_parameter);
+#else
+extern _Unwind_Reason_Code
+    _Unwind_ForcedUnwind(_Unwind_Exception *exception_object,
+                         _Unwind_Stop_Fn stop, void *stop_parameter);
+#endif
+
+#ifdef __USING_SJLJ_EXCEPTIONS__
+typedef struct _Unwind_FunctionContext *_Unwind_FunctionContext_t;
+extern void _Unwind_SjLj_Register(_Unwind_FunctionContext_t fc);
+extern void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t fc);
+#endif
+
+//
+// The following are semi-suppoted extensions to the C++ ABI
+//
+
+//
+//  called by __cxa_rethrow().
+//
+#ifdef __USING_SJLJ_EXCEPTIONS__
+extern _Unwind_Reason_Code
+    _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *exception_object);
+#else
+extern _Unwind_Reason_Code
+    _Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object);
+#endif
+
+// _Unwind_Backtrace() is a gcc extension that walks the stack and calls the
+// _Unwind_Trace_Fn once per frame until it reaches the bottom of the stack
+// or the _Unwind_Trace_Fn function returns something other than _URC_NO_REASON.
+typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,
+                                                void *);
+extern _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
+
+// _Unwind_GetCFA is a gcc extension that can be called from within a
+// personality handler to get the CFA (stack pointer before call) of
+// current frame.
+extern uintptr_t _Unwind_GetCFA(struct _Unwind_Context *);
+
+
+// _Unwind_GetIPInfo is a gcc extension that can be called from within a
+// personality handler.  Similar to _Unwind_GetIP() but also returns in
+// *ipBefore a non-zero value if the instruction pointer is at or before the
+// instruction causing the unwind. Normally, in a function call, the IP returned
+// is the return address which is after the call instruction and may be past the
+// end of the function containing the call instruction.
+extern uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context,
+                                   int *ipBefore);
+
+
+// __register_frame() is used with dynamically generated code to register the
+// FDE for a generated (JIT) code.  The FDE must use pc-rel addressing to point
+// to its function and optional LSDA.
+// __register_frame() has existed in all versions of Mac OS X, but in 10.4 and
+// 10.5 it was buggy and did not actually register the FDE with the unwinder.
+// In 10.6 and later it does register properly.
+extern void __register_frame(const void *fde);
+extern void __deregister_frame(const void *fde);
+
+// _Unwind_Find_FDE() will locate the FDE if the pc is in some function that has
+// an associated FDE. Note, Mac OS X 10.6 and later, introduces "compact unwind
+// info" which the runtime uses in preference to DWARF unwind info.  This
+// function will only work if the target function has an FDE but no compact
+// unwind info.
+struct dwarf_eh_bases {
+  uintptr_t tbase;
+  uintptr_t dbase;
+  uintptr_t func;
+};
+extern const void *_Unwind_Find_FDE(const void *pc, struct dwarf_eh_bases *);
+
+
+// This function attempts to find the start (address of first instruction) of
+// a function given an address inside the function.  It only works if the
+// function has an FDE (DWARF unwind info).
+// This function is unimplemented on Mac OS X 10.6 and later.  Instead, use
+// _Unwind_Find_FDE() and look at the dwarf_eh_bases.func result.
+extern void *_Unwind_FindEnclosingFunction(void *pc);
+
+// Mac OS X does not support text-rel and data-rel addressing so these functions
+// are unimplemented
+extern uintptr_t _Unwind_GetDataRelBase(struct _Unwind_Context *context)
+    LIBUNWIND_UNAVAIL;
+extern uintptr_t _Unwind_GetTextRelBase(struct _Unwind_Context *context)
+    LIBUNWIND_UNAVAIL;
+
+// Mac OS X 10.4 and 10.5 had implementations of these functions in
+// libgcc_s.dylib, but they never worked.
+/// These functions are no longer available on Mac OS X.
+extern void __register_frame_info_bases(const void *fde, void *ob, void *tb,
+                                        void *db) LIBUNWIND_UNAVAIL;
+extern void __register_frame_info(const void *fde, void *ob)
+    LIBUNWIND_UNAVAIL;
+extern void __register_frame_info_table_bases(const void *fde, void *ob,
+                                              void *tb, void *db)
+    LIBUNWIND_UNAVAIL;
+extern void __register_frame_info_table(const void *fde, void *ob)
+    LIBUNWIND_UNAVAIL;
+extern void __register_frame_table(const void *fde)
+    LIBUNWIND_UNAVAIL;
+extern void *__deregister_frame_info(const void *fde)
+    LIBUNWIND_UNAVAIL;
+extern void *__deregister_frame_info_bases(const void *fde)
+    LIBUNWIND_UNAVAIL;
+
+#if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+#ifndef _WIN32
+typedef struct _EXCEPTION_RECORD EXCEPTION_RECORD;
+typedef struct _CONTEXT CONTEXT;
+typedef struct _DISPATCHER_CONTEXT DISPATCHER_CONTEXT;
+#elif !defined(__MINGW32__) && VER_PRODUCTBUILD < 8000
+typedef struct _DISPATCHER_CONTEXT DISPATCHER_CONTEXT;
+#endif
+// This is the common wrapper for GCC-style personality functions with SEH.
+extern EXCEPTION_DISPOSITION _GCC_specific_handler(EXCEPTION_RECORD *exc,
+                                                   void *frame,
+                                                   CONTEXT *ctx,
+                                                   DISPATCHER_CONTEXT *disp,
+                                                   __personality_routine pers);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __UNWIND_H__
diff --git a/src/coreclr/src/nativeaot/libunwind/src/AddressSpace.hpp b/src/coreclr/src/nativeaot/libunwind/src/AddressSpace.hpp
new file mode 100644
index 0000000000000..fb07c807db9e9
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/AddressSpace.hpp
@@ -0,0 +1,615 @@
+//===------------------------- AddressSpace.hpp ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// Abstracts accessing local vs remote address spaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __ADDRESSSPACE_HPP__
+#define __ADDRESSSPACE_HPP__
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef _LIBUNWIND_USE_DLADDR
+  #if !defined(_LIBUNWIND_IS_BAREMETAL) && !defined(_WIN32)
+    #define _LIBUNWIND_USE_DLADDR 1
+  #else
+    #define _LIBUNWIND_USE_DLADDR 0
+  #endif
+#endif
+
+#if _LIBUNWIND_USE_DLADDR
+#include <dlfcn.h>
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA)
+#pragma comment(lib, "dl")
+#endif
+#endif
+
+#ifdef __APPLE__
+#include <mach-o/getsect.h>
+namespace libunwind {
+   bool checkKeyMgrRegisteredFDEs(uintptr_t targetAddr, void *&fde);
+}
+#endif
+
+#include "libunwind.h"
+#include "config.h"
+#include "dwarf2.h"
+#include "EHHeaderParser.hpp"
+#include "Registers.hpp"
+
+#ifdef __APPLE__
+
+  struct dyld_unwind_sections
+  {
+    const struct mach_header*   mh;
+    const void*                 dwarf_section;
+    uintptr_t                   dwarf_section_length;
+    const void*                 compact_unwind_section;
+    uintptr_t                   compact_unwind_section_length;
+  };
+  #if (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) \
+                                 && (__MAC_OS_X_VERSION_MIN_REQUIRED >= 1070)) \
+      || defined(__IPHONE_OS_VERSION_MIN_REQUIRED)
+    // In 10.7.0 or later, libSystem.dylib implements this function.
+    extern "C" bool _dyld_find_unwind_sections(void *, dyld_unwind_sections *);
+  #else
+    // In 10.6.x and earlier, we need to implement this functionality. Note
+    // that this requires a newer version of libmacho (from cctools) than is
+    // present in libSystem on 10.6.x (for getsectiondata).
+    static inline bool _dyld_find_unwind_sections(void* addr,
+                                                    dyld_unwind_sections* info) {
+      // Find mach-o image containing address.
+      Dl_info dlinfo;
+      if (!dladdr(addr, &dlinfo))
+        return false;
+#if __LP64__
+      const struct mach_header_64 *mh = (const struct mach_header_64 *)dlinfo.dli_fbase;
+#else
+      const struct mach_header *mh = (const struct mach_header *)dlinfo.dli_fbase;
+#endif
+
+      // Initialize the return struct
+      info->mh = (const struct mach_header *)mh;
+      info->dwarf_section = getsectiondata(mh, "__TEXT", "__eh_frame", &info->dwarf_section_length);
+      info->compact_unwind_section = getsectiondata(mh, "__TEXT", "__unwind_info", &info->compact_unwind_section_length);
+
+      if (!info->dwarf_section) {
+        info->dwarf_section_length = 0;
+      }
+
+      if (!info->compact_unwind_section) {
+        info->compact_unwind_section_length = 0;
+      }
+
+      return true;
+    }
+  #endif
+
+#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL)
+
+// When statically linked on bare-metal, the symbols for the EH table are looked
+// up without going through the dynamic loader.
+
+// The following linker script may be used to produce the necessary sections and symbols.
+// Unless the --eh-frame-hdr linker option is provided, the section is not generated
+// and does not take space in the output file.
+//
+//   .eh_frame :
+//   {
+//       __eh_frame_start = .;
+//       KEEP(*(.eh_frame))
+//       __eh_frame_end = .;
+//   }
+//
+//   .eh_frame_hdr :
+//   {
+//       KEEP(*(.eh_frame_hdr))
+//   }
+//
+//   __eh_frame_hdr_start = SIZEOF(.eh_frame_hdr) > 0 ? ADDR(.eh_frame_hdr) : 0;
+//   __eh_frame_hdr_end = SIZEOF(.eh_frame_hdr) > 0 ? . : 0;
+
+extern char __eh_frame_start;
+extern char __eh_frame_end;
+
+#if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX)
+extern char __eh_frame_hdr_start;
+extern char __eh_frame_hdr_end;
+#endif
+
+#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL)
+
+// When statically linked on bare-metal, the symbols for the EH table are looked
+// up without going through the dynamic loader.
+extern char __exidx_start;
+extern char __exidx_end;
+
+#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+
+// ELF-based systems may use dl_iterate_phdr() to access sections
+// containing unwinding information. The ElfW() macro for pointer-size
+// independent ELF header traversal is not provided by <link.h> on some
+// systems (e.g., FreeBSD). On these systems the data structures are
+// just called Elf_XXX. Define ElfW() locally.
+#ifndef _WIN32
+#include <link.h>
+#else
+#include <windows.h>
+#include <psapi.h>
+#endif
+#if !defined(ElfW)
+#define ElfW(type) Elf_##type
+#endif
+
+#endif
+
+namespace libunwind {
+
+/// Used by findUnwindSections() to return info about needed sections.
+struct UnwindInfoSections {
+#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) || defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) ||       \
+    defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
+  // No dso_base for SEH or ARM EHABI.
+  uintptr_t       dso_base;
+#endif
+#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+  uintptr_t       dwarf_section;
+  uintptr_t       dwarf_section_length;
+#endif
+#if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX)
+  uintptr_t       dwarf_index_section;
+  uintptr_t       dwarf_index_section_length;
+#endif
+#if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
+  uintptr_t       compact_unwind_section;
+  uintptr_t       compact_unwind_section_length;
+#endif
+#if defined(_LIBUNWIND_ARM_EHABI)
+  uintptr_t       arm_section;
+  uintptr_t       arm_section_length;
+#endif
+};
+
+
+/// LocalAddressSpace is used as a template parameter to UnwindCursor when
+/// unwinding a thread in the same process.  The wrappers compile away,
+/// making local unwinds fast.
+class _LIBUNWIND_HIDDEN LocalAddressSpace {
+public:
+  typedef uintptr_t pint_t;
+  typedef intptr_t  sint_t;
+  uint8_t         get8(pint_t addr) {
+    uint8_t val;
+    memcpy(&val, (void *)addr, sizeof(val));
+    return val;
+  }
+  uint16_t         get16(pint_t addr) {
+    uint16_t val;
+    memcpy(&val, (void *)addr, sizeof(val));
+    return val;
+  }
+  uint32_t         get32(pint_t addr) {
+    uint32_t val;
+    memcpy(&val, (void *)addr, sizeof(val));
+    return val;
+  }
+  uint64_t         get64(pint_t addr) {
+    uint64_t val;
+    memcpy(&val, (void *)addr, sizeof(val));
+    return val;
+  }
+  double           getDouble(pint_t addr) {
+    double val;
+    memcpy(&val, (void *)addr, sizeof(val));
+    return val;
+  }
+  v128             getVector(pint_t addr) {
+    v128 val;
+    memcpy(&val, (void *)addr, sizeof(val));
+    return val;
+  }
+  uintptr_t       getP(pint_t addr);
+  uint64_t        getRegister(pint_t addr);
+  static uint64_t getULEB128(pint_t &addr, pint_t end);
+  static int64_t  getSLEB128(pint_t &addr, pint_t end);
+
+  pint_t getEncodedP(pint_t &addr, pint_t end, uint8_t encoding,
+                     pint_t datarelBase = 0);
+  bool findFunctionName(pint_t addr, char *buf, size_t bufLen,
+                        unw_word_t *offset);
+  bool findUnwindSections(pint_t targetAddr, UnwindInfoSections &info);
+  bool findOtherFDE(pint_t targetAddr, pint_t &fde);
+
+  static LocalAddressSpace sThisAddressSpace;
+};
+
+inline uintptr_t LocalAddressSpace::getP(pint_t addr) {
+#if __SIZEOF_POINTER__ == 8
+  return get64(addr);
+#else
+  return get32(addr);
+#endif
+}
+
+inline uint64_t LocalAddressSpace::getRegister(pint_t addr) {
+#if __SIZEOF_POINTER__ == 8 || defined(__mips64)
+  return get64(addr);
+#else
+  return get32(addr);
+#endif
+}
+
+/// Read a ULEB128 into a 64-bit word.
+inline uint64_t LocalAddressSpace::getULEB128(pint_t &addr, pint_t end) {
+  const uint8_t *p = (uint8_t *)addr;
+  const uint8_t *pend = (uint8_t *)end;
+  uint64_t result = 0;
+  int bit = 0;
+  do {
+    uint64_t b;
+
+    if (p == pend)
+      _LIBUNWIND_ABORT("truncated uleb128 expression");
+
+    b = *p & 0x7f;
+
+    if (bit >= 64 || b << bit >> bit != b) {
+      _LIBUNWIND_ABORT("malformed uleb128 expression");
+    } else {
+      result |= b << bit;
+      bit += 7;
+    }
+  } while (*p++ >= 0x80);
+  addr = (pint_t) p;
+  return result;
+}
+
+/// Read a SLEB128 into a 64-bit word.
+inline int64_t LocalAddressSpace::getSLEB128(pint_t &addr, pint_t end) {
+  const uint8_t *p = (uint8_t *)addr;
+  const uint8_t *pend = (uint8_t *)end;
+  int64_t result = 0;
+  int bit = 0;
+  uint8_t byte;
+  do {
+    if (p == pend)
+      _LIBUNWIND_ABORT("truncated sleb128 expression");
+    byte = *p++;
+    result |= ((byte & 0x7f) << bit);
+    bit += 7;
+  } while (byte & 0x80);
+  // sign extend negative numbers
+  if ((byte & 0x40) != 0)
+    result |= (-1ULL) << bit;
+  addr = (pint_t) p;
+  return result;
+}
+
+inline LocalAddressSpace::pint_t
+LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding,
+                               pint_t datarelBase) {
+  pint_t startAddr = addr;
+  const uint8_t *p = (uint8_t *)addr;
+  pint_t result;
+
+  // first get value
+  switch (encoding & 0x0F) {
+  case DW_EH_PE_ptr:
+    result = getP(addr);
+    p += sizeof(pint_t);
+    addr = (pint_t) p;
+    break;
+  case DW_EH_PE_uleb128:
+    result = (pint_t)getULEB128(addr, end);
+    break;
+  case DW_EH_PE_udata2:
+    result = get16(addr);
+    p += 2;
+    addr = (pint_t) p;
+    break;
+  case DW_EH_PE_udata4:
+    result = get32(addr);
+    p += 4;
+    addr = (pint_t) p;
+    break;
+  case DW_EH_PE_udata8:
+    result = (pint_t)get64(addr);
+    p += 8;
+    addr = (pint_t) p;
+    break;
+  case DW_EH_PE_sleb128:
+    result = (pint_t)getSLEB128(addr, end);
+    break;
+  case DW_EH_PE_sdata2:
+    // Sign extend from signed 16-bit value.
+    result = (pint_t)(int16_t)get16(addr);
+    p += 2;
+    addr = (pint_t) p;
+    break;
+  case DW_EH_PE_sdata4:
+    // Sign extend from signed 32-bit value.
+    result = (pint_t)(int32_t)get32(addr);
+    p += 4;
+    addr = (pint_t) p;
+    break;
+  case DW_EH_PE_sdata8:
+    result = (pint_t)get64(addr);
+    p += 8;
+    addr = (pint_t) p;
+    break;
+  default:
+    _LIBUNWIND_ABORT("unknown pointer encoding");
+  }
+
+  // then add relative offset
+  switch (encoding & 0x70) {
+  case DW_EH_PE_absptr:
+    // do nothing
+    break;
+  case DW_EH_PE_pcrel:
+    result += startAddr;
+    break;
+  case DW_EH_PE_textrel:
+    _LIBUNWIND_ABORT("DW_EH_PE_textrel pointer encoding not supported");
+    break;
+  case DW_EH_PE_datarel:
+    // DW_EH_PE_datarel is only valid in a few places, so the parameter has a
+    // default value of 0, and we abort in the event that someone calls this
+    // function with a datarelBase of 0 and DW_EH_PE_datarel encoding.
+    if (datarelBase == 0)
+      _LIBUNWIND_ABORT("DW_EH_PE_datarel is invalid with a datarelBase of 0");
+    result += datarelBase;
+    break;
+  case DW_EH_PE_funcrel:
+    _LIBUNWIND_ABORT("DW_EH_PE_funcrel pointer encoding not supported");
+    break;
+  case DW_EH_PE_aligned:
+    _LIBUNWIND_ABORT("DW_EH_PE_aligned pointer encoding not supported");
+    break;
+  default:
+    _LIBUNWIND_ABORT("unknown pointer encoding");
+    break;
+  }
+
+  if (encoding & DW_EH_PE_indirect)
+    result = getP(result);
+
+  return result;
+}
+
+inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr,
+                                                  UnwindInfoSections &info) {
+#ifdef __APPLE__
+  dyld_unwind_sections dyldInfo;
+  if (_dyld_find_unwind_sections((void *)targetAddr, &dyldInfo)) {
+    info.dso_base                      = (uintptr_t)dyldInfo.mh;
+ #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+    info.dwarf_section                 = (uintptr_t)dyldInfo.dwarf_section;
+    info.dwarf_section_length          = dyldInfo.dwarf_section_length;
+ #endif
+    info.compact_unwind_section        = (uintptr_t)dyldInfo.compact_unwind_section;
+    info.compact_unwind_section_length = dyldInfo.compact_unwind_section_length;
+    return true;
+  }
+#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL)
+  // Bare metal is statically linked, so no need to ask the dynamic loader
+  info.dwarf_section_length = (uintptr_t)(&__eh_frame_end - &__eh_frame_start);
+  info.dwarf_section =        (uintptr_t)(&__eh_frame_start);
+  _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: section %p length %p",
+                             (void *)info.dwarf_section, (void *)info.dwarf_section_length);
+#if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX)
+  info.dwarf_index_section =        (uintptr_t)(&__eh_frame_hdr_start);
+  info.dwarf_index_section_length = (uintptr_t)(&__eh_frame_hdr_end - &__eh_frame_hdr_start);
+  _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: index section %p length %p",
+                             (void *)info.dwarf_index_section, (void *)info.dwarf_index_section_length);
+#endif
+  if (info.dwarf_section_length)
+    return true;
+#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL)
+  // Bare metal is statically linked, so no need to ask the dynamic loader
+  info.arm_section =        (uintptr_t)(&__exidx_start);
+  info.arm_section_length = (uintptr_t)(&__exidx_end - &__exidx_start);
+  _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: section %p length %p",
+                             (void *)info.arm_section, (void *)info.arm_section_length);
+  if (info.arm_section && info.arm_section_length)
+    return true;
+#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32)
+  HMODULE mods[1024];
+  HANDLE process = GetCurrentProcess();
+  DWORD needed;
+
+  if (!EnumProcessModules(process, mods, sizeof(mods), &needed))
+    return false;
+
+  for (unsigned i = 0; i < (needed / sizeof(HMODULE)); i++) {
+    PIMAGE_DOS_HEADER pidh = (PIMAGE_DOS_HEADER)mods[i];
+    PIMAGE_NT_HEADERS pinh = (PIMAGE_NT_HEADERS)((BYTE *)pidh + pidh->e_lfanew);
+    PIMAGE_FILE_HEADER pifh = (PIMAGE_FILE_HEADER)&pinh->FileHeader;
+    PIMAGE_SECTION_HEADER pish = IMAGE_FIRST_SECTION(pinh);
+    bool found_obj = false;
+    bool found_hdr = false;
+
+    info.dso_base = (uintptr_t)mods[i];
+    for (unsigned j = 0; j < pifh->NumberOfSections; j++, pish++) {
+      uintptr_t begin = pish->VirtualAddress + (uintptr_t)mods[i];
+      uintptr_t end = begin + pish->Misc.VirtualSize;
+      if (!strncmp((const char *)pish->Name, ".text",
+                   IMAGE_SIZEOF_SHORT_NAME)) {
+        if (targetAddr >= begin && targetAddr < end)
+          found_obj = true;
+      } else if (!strncmp((const char *)pish->Name, ".eh_frame",
+                          IMAGE_SIZEOF_SHORT_NAME)) {
+        info.dwarf_section = begin;
+        info.dwarf_section_length = pish->Misc.VirtualSize;
+        found_hdr = true;
+      }
+      if (found_obj && found_hdr)
+        return true;
+    }
+  }
+  return false;
+#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)
+  // Don't even bother, since Windows has functions that do all this stuff
+  // for us.
+  (void)targetAddr;
+  (void)info;
+  return true;
+#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__) &&                  \
+    (__ANDROID_API__ < 21)
+  int length = 0;
+  info.arm_section =
+      (uintptr_t)dl_unwind_find_exidx((_Unwind_Ptr)targetAddr, &length);
+  info.arm_section_length = (uintptr_t)length;
+  if (info.arm_section && info.arm_section_length)
+    return true;
+#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+  struct dl_iterate_cb_data {
+    LocalAddressSpace *addressSpace;
+    UnwindInfoSections *sects;
+    uintptr_t targetAddr;
+  };
+
+  dl_iterate_cb_data cb_data = {this, &info, targetAddr};
+  int found = dl_iterate_phdr(
+      [](struct dl_phdr_info *pinfo, size_t, void *data) -> int {
+        auto cbdata = static_cast<dl_iterate_cb_data *>(data);
+        bool found_obj = false;
+        bool found_hdr = false;
+
+        assert(cbdata);
+        assert(cbdata->sects);
+
+        if (cbdata->targetAddr < pinfo->dlpi_addr) {
+          return false;
+        }
+
+#if !defined(Elf_Half)
+        typedef ElfW(Half) Elf_Half;
+#endif
+#if !defined(Elf_Phdr)
+        typedef ElfW(Phdr) Elf_Phdr;
+#endif
+#if !defined(Elf_Addr) && defined(__ANDROID__)
+        typedef ElfW(Addr) Elf_Addr;
+#endif
+
+ #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+  #if !defined(_LIBUNWIND_SUPPORT_DWARF_INDEX)
+   #error "_LIBUNWIND_SUPPORT_DWARF_UNWIND requires _LIBUNWIND_SUPPORT_DWARF_INDEX on this platform."
+  #endif
+        size_t object_length;
+#if defined(__ANDROID__)
+        Elf_Addr image_base =
+            pinfo->dlpi_phnum
+                ? reinterpret_cast<Elf_Addr>(pinfo->dlpi_phdr) -
+                      reinterpret_cast<const Elf_Phdr *>(pinfo->dlpi_phdr)
+                          ->p_offset
+                : 0;
+#endif
+
+        for (Elf_Half i = 0; i < pinfo->dlpi_phnum; i++) {
+          const Elf_Phdr *phdr = &pinfo->dlpi_phdr[i];
+          if (phdr->p_type == PT_LOAD) {
+            uintptr_t begin = pinfo->dlpi_addr + phdr->p_vaddr;
+#if defined(__ANDROID__)
+            if (pinfo->dlpi_addr == 0 && phdr->p_vaddr < image_base)
+              begin = begin + image_base;
+#endif
+            uintptr_t end = begin + phdr->p_memsz;
+            if (cbdata->targetAddr >= begin && cbdata->targetAddr < end) {
+              cbdata->sects->dso_base = begin;
+              object_length = phdr->p_memsz;
+              found_obj = true;
+            }
+          } else if (phdr->p_type == PT_GNU_EH_FRAME) {
+            EHHeaderParser<LocalAddressSpace>::EHHeaderInfo hdrInfo;
+            uintptr_t eh_frame_hdr_start = pinfo->dlpi_addr + phdr->p_vaddr;
+#if defined(__ANDROID__)
+            if (pinfo->dlpi_addr == 0 && phdr->p_vaddr < image_base)
+              eh_frame_hdr_start = eh_frame_hdr_start + image_base;
+#endif
+            cbdata->sects->dwarf_index_section = eh_frame_hdr_start;
+            cbdata->sects->dwarf_index_section_length = phdr->p_memsz;
+            found_hdr = EHHeaderParser<LocalAddressSpace>::decodeEHHdr(
+                *cbdata->addressSpace, eh_frame_hdr_start, phdr->p_memsz,
+                hdrInfo);
+            if (found_hdr)
+              cbdata->sects->dwarf_section = hdrInfo.eh_frame_ptr;
+          }
+        }
+
+        if (found_obj && found_hdr) {
+          cbdata->sects->dwarf_section_length = object_length;
+          return true;
+        } else {
+          return false;
+        }
+ #else // defined(_LIBUNWIND_ARM_EHABI)
+        for (Elf_Half i = 0; i < pinfo->dlpi_phnum; i++) {
+          const Elf_Phdr *phdr = &pinfo->dlpi_phdr[i];
+          if (phdr->p_type == PT_LOAD) {
+            uintptr_t begin = pinfo->dlpi_addr + phdr->p_vaddr;
+            uintptr_t end = begin + phdr->p_memsz;
+            if (cbdata->targetAddr >= begin && cbdata->targetAddr < end)
+              found_obj = true;
+          } else if (phdr->p_type == PT_ARM_EXIDX) {
+            uintptr_t exidx_start = pinfo->dlpi_addr + phdr->p_vaddr;
+            cbdata->sects->arm_section = exidx_start;
+            cbdata->sects->arm_section_length = phdr->p_memsz;
+            found_hdr = true;
+          }
+        }
+        return found_obj && found_hdr;
+ #endif
+      },
+      &cb_data);
+  return static_cast<bool>(found);
+#endif
+
+  return false;
+}
+
+
+inline bool LocalAddressSpace::findOtherFDE(pint_t targetAddr, pint_t &fde) {
+#ifdef __APPLE__
+  return checkKeyMgrRegisteredFDEs(targetAddr, *((void**)&fde));
+#else
+  // TO DO: if OS has way to dynamically register FDEs, check that.
+  (void)targetAddr;
+  (void)fde;
+  return false;
+#endif
+}
+
+inline bool LocalAddressSpace::findFunctionName(pint_t addr, char *buf,
+                                                size_t bufLen,
+                                                unw_word_t *offset) {
+#if _LIBUNWIND_USE_DLADDR
+  Dl_info dyldInfo;
+  if (dladdr((void *)addr, &dyldInfo)) {
+    if (dyldInfo.dli_sname != NULL) {
+      snprintf(buf, bufLen, "%s", dyldInfo.dli_sname);
+      *offset = (addr - (pint_t) dyldInfo.dli_saddr);
+      return true;
+    }
+  }
+#else
+  (void)addr;
+  (void)buf;
+  (void)bufLen;
+  (void)offset;
+#endif
+  return false;
+}
+
+} // namespace libunwind
+
+#endif // __ADDRESSSPACE_HPP__
diff --git a/src/coreclr/src/nativeaot/libunwind/src/CMakeLists.txt b/src/coreclr/src/nativeaot/libunwind/src/CMakeLists.txt
new file mode 100644
index 0000000000000..572c82396bfa4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/CMakeLists.txt
@@ -0,0 +1,178 @@
+# Get sources
+
+set(LIBUNWIND_CXX_SOURCES
+    libunwind.cpp
+    Unwind-EHABI.cpp
+    Unwind-seh.cpp
+    )
+unwind_append_if(LIBUNWIND_CXX_SOURCES APPLE Unwind_AppleExtras.cpp)
+
+set(LIBUNWIND_C_SOURCES
+    UnwindLevel1.c
+    UnwindLevel1-gcc-ext.c
+    Unwind-sjlj.c
+    )
+set_source_files_properties(${LIBUNWIND_C_SOURCES}
+                            PROPERTIES
+                              COMPILE_FLAGS "-std=c99")
+
+set(LIBUNWIND_ASM_SOURCES
+    UnwindRegistersRestore.S
+    UnwindRegistersSave.S
+    )
+set_source_files_properties(${LIBUNWIND_ASM_SOURCES}
+                            PROPERTIES
+                              LANGUAGE C)
+
+set(LIBUNWIND_HEADERS
+    AddressSpace.hpp
+    assembly.h
+    CompactUnwinder.hpp
+    config.h
+    dwarf2.h
+    DwarfInstructions.hpp
+    DwarfParser.hpp
+    libunwind_ext.h
+    Registers.hpp
+    RWMutex.hpp
+    UnwindCursor.hpp
+    ../include/libunwind.h
+    ../include/unwind.h
+    )
+
+unwind_append_if(LIBUNWIND_HEADERS APPLE
+    ../include/mach-o/compact_unwind_encoding.h
+    )
+
+if (MSVC_IDE)
+  # Force them all into the headers dir on MSVC, otherwise they end up at
+  # project scope because they don't have extensions.
+  source_group("Header Files" FILES ${LIBUNWIND_HEADERS})
+endif()
+
+set(LIBUNWIND_SOURCES
+    ${LIBUNWIND_CXX_SOURCES}
+    ${LIBUNWIND_C_SOURCES}
+    ${LIBUNWIND_ASM_SOURCES})
+
+# Generate library list.
+set(libraries)
+unwind_append_if(libraries LIBUNWIND_HAS_C_LIB c)
+if (LIBUNWIND_USE_COMPILER_RT)
+  list(APPEND libraries "${LIBUNWIND_BUILTINS_LIBRARY}")
+else()
+  unwind_append_if(libraries LIBUNWIND_HAS_GCC_S_LIB gcc_s)
+  unwind_append_if(libraries LIBUNWIND_HAS_GCC_LIB gcc)
+endif()
+unwind_append_if(libraries LIBUNWIND_HAS_DL_LIB dl)
+if (LIBUNWIND_ENABLE_THREADS)
+  unwind_append_if(libraries LIBUNWIND_HAS_PTHREAD_LIB pthread)
+  unwind_append_if(LIBUNWIND_COMPILE_FLAGS LIBUNWIND_WEAK_PTHREAD_LIB -DLIBUNWIND_USE_WEAK_PTHREAD=1)
+endif()
+
+# Setup flags.
+unwind_append_if(LIBUNWIND_CXX_FLAGS LIBUNWIND_HAS_NO_RTTI_FLAG -fno-rtti)
+
+unwind_append_if(LIBUNWIND_LINK_FLAGS LIBUNWIND_HAS_NODEFAULTLIBS_FLAG -nodefaultlibs)
+
+# MINGW_LIBRARIES is defined in config-ix.cmake
+unwind_append_if(libraries MINGW "${MINGW_LIBRARIES}")
+
+if (LIBUNWIND_HAS_NO_EXCEPTIONS_FLAG AND LIBUNWIND_HAS_FUNWIND_TABLES)
+  list(APPEND LIBUNWIND_COMPILE_FLAGS -fno-exceptions)
+  list(APPEND LIBUNWIND_COMPILE_FLAGS -funwind-tables)
+elseif (LIBUNWIND_ENABLE_SHARED)
+  message(FATAL_ERROR
+          "Compiler doesn't support generation of unwind tables if exception "
+          "support is disabled.  Building libunwind DSO with runtime dependency "
+          "on C++ ABI library is not supported.")
+endif()
+
+if (APPLE)
+  list(APPEND LIBUNWIND_COMPILE_FLAGS "-U__STRICT_ANSI__")
+  list(APPEND LIBUNWIND_LINK_FLAGS
+       "-compatibility_version 1"
+       "-install_name /usr/lib/libunwind.1.dylib")
+
+  if (CMAKE_OSX_DEPLOYMENT_TARGET STREQUAL "10.6")
+    list(APPEND LIBUNWIND_LINK_FLAGS
+         "-current_version ${LIBUNWIND_VERSION}"
+         "/usr/lib/libSystem.B.dylib")
+  endif ()
+endif ()
+
+string(REPLACE ";" " " LIBUNWIND_COMPILE_FLAGS "${LIBUNWIND_COMPILE_FLAGS}")
+string(REPLACE ";" " " LIBUNWIND_CXX_FLAGS "${LIBUNWIND_CXX_FLAGS}")
+string(REPLACE ";" " " LIBUNWIND_C_FLAGS "${LIBUNWIND_C_FLAGS}")
+string(REPLACE ";" " " LIBUNWIND_LINK_FLAGS "${LIBUNWIND_LINK_FLAGS}")
+
+set_property(SOURCE ${LIBUNWIND_CXX_SOURCES}
+             APPEND_STRING PROPERTY COMPILE_FLAGS " ${LIBUNWIND_CXX_FLAGS}")
+set_property(SOURCE ${LIBUNWIND_C_SOURCES}
+             APPEND_STRING PROPERTY COMPILE_FLAGS " ${LIBUNWIND_C_FLAGS}")
+
+# Build the shared library.
+if (LIBUNWIND_ENABLE_SHARED)
+  add_library(unwind_shared SHARED ${LIBUNWIND_SOURCES} ${LIBUNWIND_HEADERS})
+  if(COMMAND llvm_setup_rpath)
+    llvm_setup_rpath(unwind_shared)
+  endif()
+  target_link_libraries(unwind_shared PRIVATE ${libraries})
+  set_target_properties(unwind_shared
+                        PROPERTIES
+                          COMPILE_FLAGS "${LIBUNWIND_COMPILE_FLAGS}"
+                          LINK_FLAGS    "${LIBUNWIND_LINK_FLAGS}"
+                          OUTPUT_NAME   "unwind"
+                          VERSION       "1.0"
+                          SOVERSION     "1")
+  list(APPEND LIBUNWIND_BUILD_TARGETS "unwind_shared")
+  if (LIBUNWIND_INSTALL_SHARED_LIBRARY)
+    list(APPEND LIBUNWIND_INSTALL_TARGETS "unwind_shared")
+  endif()
+endif()
+
+# Build the static library.
+if (LIBUNWIND_ENABLE_STATIC)
+  add_library(unwind_static STATIC ${LIBUNWIND_SOURCES} ${LIBUNWIND_HEADERS})
+  target_link_libraries(unwind_static PRIVATE ${libraries})
+  set_target_properties(unwind_static
+                        PROPERTIES
+                          COMPILE_FLAGS "${LIBUNWIND_COMPILE_FLAGS}"
+                          LINK_FLAGS    "${LIBUNWIND_LINK_FLAGS}"
+                          OUTPUT_NAME   "unwind")
+
+  if(LIBUNWIND_HERMETIC_STATIC_LIBRARY)
+    append_flags_if_supported(UNWIND_STATIC_LIBRARY_FLAGS -fvisibility=hidden)
+    append_flags_if_supported(UNWIND_STATIC_LIBRARY_FLAGS -fvisibility-global-new-delete-hidden)
+    target_compile_options(unwind_static PRIVATE ${UNWIND_STATIC_LIBRARY_FLAGS})
+    target_compile_definitions(unwind_static PRIVATE _LIBUNWIND_DISABLE_VISIBILITY_ANNOTATIONS)
+  endif()
+
+  list(APPEND LIBUNWIND_BUILD_TARGETS "unwind_static")
+  if (LIBUNWIND_INSTALL_STATIC_LIBRARY)
+    list(APPEND LIBUNWIND_INSTALL_TARGETS "unwind_static")
+  endif()
+endif()
+
+# Add a meta-target for both libraries.
+add_custom_target(unwind DEPENDS ${LIBUNWIND_BUILD_TARGETS})
+
+if (LIBUNWIND_INSTALL_LIBRARY)
+  install(TARGETS ${LIBUNWIND_INSTALL_TARGETS}
+    LIBRARY DESTINATION ${LIBUNWIND_INSTALL_PREFIX}${LIBUNWIND_INSTALL_LIBRARY_DIR} COMPONENT unwind
+    ARCHIVE DESTINATION ${LIBUNWIND_INSTALL_PREFIX}${LIBUNWIND_INSTALL_LIBRARY_DIR} COMPONENT unwind)
+endif()
+
+if (NOT CMAKE_CONFIGURATION_TYPES AND LIBUNWIND_INSTALL_LIBRARY)
+  add_custom_target(install-unwind
+    DEPENDS unwind
+    COMMAND "${CMAKE_COMMAND}"
+            -DCMAKE_INSTALL_COMPONENT=unwind
+            -P "${LIBUNWIND_BINARY_DIR}/cmake_install.cmake")
+  add_custom_target(install-unwind-stripped
+    DEPENDS unwind
+    COMMAND "${CMAKE_COMMAND}"
+            -DCMAKE_INSTALL_COMPONENT=unwind
+            -DCMAKE_INSTALL_DO_STRIP=1
+            -P "${LIBUNWIND_BINARY_DIR}/cmake_install.cmake")
+endif()
diff --git a/src/coreclr/src/nativeaot/libunwind/src/CompactUnwinder.hpp b/src/coreclr/src/nativeaot/libunwind/src/CompactUnwinder.hpp
new file mode 100644
index 0000000000000..3546f195120a1
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/CompactUnwinder.hpp
@@ -0,0 +1,697 @@
+//===-------------------------- CompactUnwinder.hpp -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//  Does runtime stack unwinding using compact unwind encodings.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __COMPACT_UNWINDER_HPP__
+#define __COMPACT_UNWINDER_HPP__
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <libunwind.h>
+#include <mach-o/compact_unwind_encoding.h>
+
+#include "Registers.hpp"
+
+#define EXTRACT_BITS(value, mask)                                              \
+  ((value >> __builtin_ctz(mask)) & (((1 << __builtin_popcount(mask))) - 1))
+
+namespace libunwind {
+
+#if defined(_LIBUNWIND_TARGET_I386)
+/// CompactUnwinder_x86 uses a compact unwind info to virtually "step" (aka
+/// unwind) by modifying a Registers_x86 register set
+template <typename A>
+class CompactUnwinder_x86 {
+public:
+
+  static int stepWithCompactEncoding(compact_unwind_encoding_t info,
+                                     uint32_t functionStart, A &addressSpace,
+                                     Registers_x86 &registers);
+
+private:
+  typename A::pint_t pint_t;
+
+  static void frameUnwind(A &addressSpace, Registers_x86 &registers);
+  static void framelessUnwind(A &addressSpace,
+                              typename A::pint_t returnAddressLocation,
+                              Registers_x86 &registers);
+  static int
+      stepWithCompactEncodingEBPFrame(compact_unwind_encoding_t compactEncoding,
+                                      uint32_t functionStart, A &addressSpace,
+                                      Registers_x86 &registers);
+  static int stepWithCompactEncodingFrameless(
+      compact_unwind_encoding_t compactEncoding, uint32_t functionStart,
+      A &addressSpace, Registers_x86 &registers, bool indirectStackSize);
+};
+
+template <typename A>
+int CompactUnwinder_x86<A>::stepWithCompactEncoding(
+    compact_unwind_encoding_t compactEncoding, uint32_t functionStart,
+    A &addressSpace, Registers_x86 &registers) {
+  switch (compactEncoding & UNWIND_X86_MODE_MASK) {
+  case UNWIND_X86_MODE_EBP_FRAME:
+    return stepWithCompactEncodingEBPFrame(compactEncoding, functionStart,
+                                           addressSpace, registers);
+  case UNWIND_X86_MODE_STACK_IMMD:
+    return stepWithCompactEncodingFrameless(compactEncoding, functionStart,
+                                            addressSpace, registers, false);
+  case UNWIND_X86_MODE_STACK_IND:
+    return stepWithCompactEncodingFrameless(compactEncoding, functionStart,
+                                            addressSpace, registers, true);
+  }
+  _LIBUNWIND_ABORT("invalid compact unwind encoding");
+}
+
+template <typename A>
+int CompactUnwinder_x86<A>::stepWithCompactEncodingEBPFrame(
+    compact_unwind_encoding_t compactEncoding, uint32_t functionStart,
+    A &addressSpace, Registers_x86 &registers) {
+  uint32_t savedRegistersOffset =
+      EXTRACT_BITS(compactEncoding, UNWIND_X86_EBP_FRAME_OFFSET);
+  uint32_t savedRegistersLocations =
+      EXTRACT_BITS(compactEncoding, UNWIND_X86_EBP_FRAME_REGISTERS);
+
+  uint32_t savedRegisters = registers.getEBP() - 4 * savedRegistersOffset;
+  for (int i = 0; i < 5; ++i) {
+    switch (savedRegistersLocations & 0x7) {
+    case UNWIND_X86_REG_NONE:
+      // no register saved in this slot
+      break;
+    case UNWIND_X86_REG_EBX:
+      registers.setEBX(addressSpace.get32(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_REG_ECX:
+      registers.setECX(addressSpace.get32(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_REG_EDX:
+      registers.setEDX(addressSpace.get32(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_REG_EDI:
+      registers.setEDI(addressSpace.get32(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_REG_ESI:
+      registers.setESI(addressSpace.get32(savedRegisters), savedRegisters);
+      break;
+    default:
+      (void)functionStart;
+      _LIBUNWIND_DEBUG_LOG("bad register for EBP frame, encoding=%08X for  "
+                           "function starting at 0x%X",
+                            compactEncoding, functionStart);
+      _LIBUNWIND_ABORT("invalid compact unwind encoding");
+    }
+    savedRegisters += 4;
+    savedRegistersLocations = (savedRegistersLocations >> 3);
+  }
+  frameUnwind(addressSpace, registers);
+  return UNW_STEP_SUCCESS;
+}
+
+template <typename A>
+int CompactUnwinder_x86<A>::stepWithCompactEncodingFrameless(
+    compact_unwind_encoding_t encoding, uint32_t functionStart,
+    A &addressSpace, Registers_x86 &registers, bool indirectStackSize) {
+  uint32_t stackSizeEncoded =
+      EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE);
+  uint32_t stackAdjust =
+      EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_ADJUST);
+  uint32_t regCount =
+      EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_COUNT);
+  uint32_t permutation =
+      EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION);
+  uint32_t stackSize = stackSizeEncoded * 4;
+  if (indirectStackSize) {
+    // stack size is encoded in subl $xxx,%esp instruction
+    uint32_t subl = addressSpace.get32(functionStart + stackSizeEncoded);
+    stackSize = subl + 4 * stackAdjust;
+  }
+  // decompress permutation
+  uint32_t permunreg[6];
+  switch (regCount) {
+  case 6:
+    permunreg[0] = permutation / 120;
+    permutation -= (permunreg[0] * 120);
+    permunreg[1] = permutation / 24;
+    permutation -= (permunreg[1] * 24);
+    permunreg[2] = permutation / 6;
+    permutation -= (permunreg[2] * 6);
+    permunreg[3] = permutation / 2;
+    permutation -= (permunreg[3] * 2);
+    permunreg[4] = permutation;
+    permunreg[5] = 0;
+    break;
+  case 5:
+    permunreg[0] = permutation / 120;
+    permutation -= (permunreg[0] * 120);
+    permunreg[1] = permutation / 24;
+    permutation -= (permunreg[1] * 24);
+    permunreg[2] = permutation / 6;
+    permutation -= (permunreg[2] * 6);
+    permunreg[3] = permutation / 2;
+    permutation -= (permunreg[3] * 2);
+    permunreg[4] = permutation;
+    break;
+  case 4:
+    permunreg[0] = permutation / 60;
+    permutation -= (permunreg[0] * 60);
+    permunreg[1] = permutation / 12;
+    permutation -= (permunreg[1] * 12);
+    permunreg[2] = permutation / 3;
+    permutation -= (permunreg[2] * 3);
+    permunreg[3] = permutation;
+    break;
+  case 3:
+    permunreg[0] = permutation / 20;
+    permutation -= (permunreg[0] * 20);
+    permunreg[1] = permutation / 4;
+    permutation -= (permunreg[1] * 4);
+    permunreg[2] = permutation;
+    break;
+  case 2:
+    permunreg[0] = permutation / 5;
+    permutation -= (permunreg[0] * 5);
+    permunreg[1] = permutation;
+    break;
+  case 1:
+    permunreg[0] = permutation;
+    break;
+  }
+  // re-number registers back to standard numbers
+  int registersSaved[6];
+  bool used[7] = { false, false, false, false, false, false, false };
+  for (uint32_t i = 0; i < regCount; ++i) {
+    uint32_t renum = 0;
+    for (int u = 1; u < 7; ++u) {
+      if (!used[u]) {
+        if (renum == permunreg[i]) {
+          registersSaved[i] = u;
+          used[u] = true;
+          break;
+        }
+        ++renum;
+      }
+    }
+  }
+  uint32_t savedRegisters = registers.getSP() + stackSize - 4 - 4 * regCount;
+  for (uint32_t i = 0; i < regCount; ++i) {
+    switch (registersSaved[i]) {
+    case UNWIND_X86_REG_EBX:
+      registers.setEBX(addressSpace.get32(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_REG_ECX:
+      registers.setECX(addressSpace.get32(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_REG_EDX:
+      registers.setEDX(addressSpace.get32(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_REG_EDI:
+      registers.setEDI(addressSpace.get32(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_REG_ESI:
+      registers.setESI(addressSpace.get32(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_REG_EBP:
+      registers.setEBP(addressSpace.get32(savedRegisters), savedRegisters);
+      break;
+    default:
+      _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for "
+                           "function starting at 0x%X",
+                           encoding, functionStart);
+      _LIBUNWIND_ABORT("invalid compact unwind encoding");
+    }
+    savedRegisters += 4;
+  }
+  framelessUnwind(addressSpace, savedRegisters, registers);
+  return UNW_STEP_SUCCESS;
+}
+
+
+template <typename A>
+void CompactUnwinder_x86<A>::frameUnwind(A &addressSpace,
+                                         Registers_x86 &registers) {
+  typename A::pint_t bp = registers.getEBP();
+  // ebp points to old ebp
+  registers.setEBP(addressSpace.get32(bp), bp);
+  // old esp is ebp less saved ebp and return address
+  registers.setSP((uint32_t)bp + 8, 0);
+  // pop return address into eip
+  registers.setIP(addressSpace.get32(bp + 4), bp + 4);
+}
+
+template <typename A>
+void CompactUnwinder_x86<A>::framelessUnwind(
+    A &addressSpace, typename A::pint_t returnAddressLocation,
+    Registers_x86 &registers) {
+  // return address is on stack after last saved register
+  registers.setIP(addressSpace.get32(returnAddressLocation), returnAddressLocation);
+  // old esp is before return address
+  registers.setSP((uint32_t)returnAddressLocation + 4, 0);
+}
+#endif // _LIBUNWIND_TARGET_I386
+
+
+#if defined(_LIBUNWIND_TARGET_X86_64)
+/// CompactUnwinder_x86_64 uses a compact unwind info to virtually "step" (aka
+/// unwind) by modifying a Registers_x86_64 register set
+template <typename A>
+class CompactUnwinder_x86_64 {
+public:
+
+  static int stepWithCompactEncoding(compact_unwind_encoding_t compactEncoding,
+                                     uint64_t functionStart, A &addressSpace,
+                                     Registers_x86_64 &registers);
+
+private:
+  typename A::pint_t pint_t;
+
+  static void frameUnwind(A &addressSpace, Registers_x86_64 &registers);
+  static void framelessUnwind(A &addressSpace, uint64_t returnAddressLocation,
+                              Registers_x86_64 &registers);
+  static int
+      stepWithCompactEncodingRBPFrame(compact_unwind_encoding_t compactEncoding,
+                                      uint64_t functionStart, A &addressSpace,
+                                      Registers_x86_64 &registers);
+  static int stepWithCompactEncodingFrameless(
+      compact_unwind_encoding_t compactEncoding, uint64_t functionStart,
+      A &addressSpace, Registers_x86_64 &registers, bool indirectStackSize);
+};
+
+template <typename A>
+int CompactUnwinder_x86_64<A>::stepWithCompactEncoding(
+    compact_unwind_encoding_t compactEncoding, uint64_t functionStart,
+    A &addressSpace, Registers_x86_64 &registers) {
+  switch (compactEncoding & UNWIND_X86_64_MODE_MASK) {
+  case UNWIND_X86_64_MODE_RBP_FRAME:
+    return stepWithCompactEncodingRBPFrame(compactEncoding, functionStart,
+                                           addressSpace, registers);
+  case UNWIND_X86_64_MODE_STACK_IMMD:
+    return stepWithCompactEncodingFrameless(compactEncoding, functionStart,
+                                            addressSpace, registers, false);
+  case UNWIND_X86_64_MODE_STACK_IND:
+    return stepWithCompactEncodingFrameless(compactEncoding, functionStart,
+                                            addressSpace, registers, true);
+  }
+  _LIBUNWIND_ABORT("invalid compact unwind encoding");
+}
+
+template <typename A>
+int CompactUnwinder_x86_64<A>::stepWithCompactEncodingRBPFrame(
+    compact_unwind_encoding_t compactEncoding, uint64_t functionStart,
+    A &addressSpace, Registers_x86_64 &registers) {
+  uint32_t savedRegistersOffset =
+      EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_OFFSET);
+  uint32_t savedRegistersLocations =
+      EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_REGISTERS);
+
+  uint64_t savedRegisters = registers.getRBP() - 8 * savedRegistersOffset;
+  for (int i = 0; i < 5; ++i) {
+    switch (savedRegistersLocations & 0x7) {
+    case UNWIND_X86_64_REG_NONE:
+      // no register saved in this slot
+      break;
+    case UNWIND_X86_64_REG_RBX:
+      registers.setRBX(addressSpace.get64(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_64_REG_R12:
+      registers.setR12(addressSpace.get64(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_64_REG_R13:
+      registers.setR13(addressSpace.get64(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_64_REG_R14:
+      registers.setR14(addressSpace.get64(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_64_REG_R15:
+      registers.setR15(addressSpace.get64(savedRegisters), savedRegisters);
+      break;
+    default:
+      (void)functionStart;
+      _LIBUNWIND_DEBUG_LOG("bad register for RBP frame, encoding=%08X for "
+                           "function starting at 0x%llX",
+                            compactEncoding, functionStart);
+      _LIBUNWIND_ABORT("invalid compact unwind encoding");
+    }
+    savedRegisters += 8;
+    savedRegistersLocations = (savedRegistersLocations >> 3);
+  }
+  frameUnwind(addressSpace, registers);
+  return UNW_STEP_SUCCESS;
+}
+
+template <typename A>
+int CompactUnwinder_x86_64<A>::stepWithCompactEncodingFrameless(
+    compact_unwind_encoding_t encoding, uint64_t functionStart, A &addressSpace,
+    Registers_x86_64 &registers, bool indirectStackSize) {
+  uint32_t stackSizeEncoded =
+      EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE);
+  uint32_t stackAdjust =
+      EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_ADJUST);
+  uint32_t regCount =
+      EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT);
+  uint32_t permutation =
+      EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION);
+  uint32_t stackSize = stackSizeEncoded * 8;
+  if (indirectStackSize) {
+    // stack size is encoded in subl $xxx,%esp instruction
+    uint32_t subl = addressSpace.get32(functionStart + stackSizeEncoded);
+    stackSize = subl + 8 * stackAdjust;
+  }
+  // decompress permutation
+  uint32_t permunreg[6];
+  switch (regCount) {
+  case 6:
+    permunreg[0] = permutation / 120;
+    permutation -= (permunreg[0] * 120);
+    permunreg[1] = permutation / 24;
+    permutation -= (permunreg[1] * 24);
+    permunreg[2] = permutation / 6;
+    permutation -= (permunreg[2] * 6);
+    permunreg[3] = permutation / 2;
+    permutation -= (permunreg[3] * 2);
+    permunreg[4] = permutation;
+    permunreg[5] = 0;
+    break;
+  case 5:
+    permunreg[0] = permutation / 120;
+    permutation -= (permunreg[0] * 120);
+    permunreg[1] = permutation / 24;
+    permutation -= (permunreg[1] * 24);
+    permunreg[2] = permutation / 6;
+    permutation -= (permunreg[2] * 6);
+    permunreg[3] = permutation / 2;
+    permutation -= (permunreg[3] * 2);
+    permunreg[4] = permutation;
+    break;
+  case 4:
+    permunreg[0] = permutation / 60;
+    permutation -= (permunreg[0] * 60);
+    permunreg[1] = permutation / 12;
+    permutation -= (permunreg[1] * 12);
+    permunreg[2] = permutation / 3;
+    permutation -= (permunreg[2] * 3);
+    permunreg[3] = permutation;
+    break;
+  case 3:
+    permunreg[0] = permutation / 20;
+    permutation -= (permunreg[0] * 20);
+    permunreg[1] = permutation / 4;
+    permutation -= (permunreg[1] * 4);
+    permunreg[2] = permutation;
+    break;
+  case 2:
+    permunreg[0] = permutation / 5;
+    permutation -= (permunreg[0] * 5);
+    permunreg[1] = permutation;
+    break;
+  case 1:
+    permunreg[0] = permutation;
+    break;
+  }
+  // re-number registers back to standard numbers
+  int registersSaved[6];
+  bool used[7] = { false, false, false, false, false, false, false };
+  for (uint32_t i = 0; i < regCount; ++i) {
+    uint32_t renum = 0;
+    for (int u = 1; u < 7; ++u) {
+      if (!used[u]) {
+        if (renum == permunreg[i]) {
+          registersSaved[i] = u;
+          used[u] = true;
+          break;
+        }
+        ++renum;
+      }
+    }
+  }
+  uint64_t savedRegisters = registers.getSP() + stackSize - 8 - 8 * regCount;
+  for (uint32_t i = 0; i < regCount; ++i) {
+    switch (registersSaved[i]) {
+    case UNWIND_X86_64_REG_RBX:
+      registers.setRBX(addressSpace.get64(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_64_REG_R12:
+      registers.setR12(addressSpace.get64(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_64_REG_R13:
+      registers.setR13(addressSpace.get64(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_64_REG_R14:
+      registers.setR14(addressSpace.get64(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_64_REG_R15:
+      registers.setR15(addressSpace.get64(savedRegisters), savedRegisters);
+      break;
+    case UNWIND_X86_64_REG_RBP:
+      registers.setRBP(addressSpace.get64(savedRegisters), savedRegisters);
+      break;
+    default:
+      _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for "
+                           "function starting at 0x%llX",
+                            encoding, functionStart);
+      _LIBUNWIND_ABORT("invalid compact unwind encoding");
+    }
+    savedRegisters += 8;
+  }
+  framelessUnwind(addressSpace, savedRegisters, registers);
+  return UNW_STEP_SUCCESS;
+}
+
+
+template <typename A>
+void CompactUnwinder_x86_64<A>::frameUnwind(A &addressSpace,
+                                            Registers_x86_64 &registers) {
+  uint64_t rbp = registers.getRBP();
+  // ebp points to old ebp
+  registers.setRBP(addressSpace.get64(rbp), rbp);
+  // old esp is ebp less saved ebp and return address
+  registers.setSP(rbp + 16, 0);
+  // pop return address into eip
+  registers.setIP(addressSpace.get64(rbp + 8), rbp + 8);
+}
+
+template <typename A>
+void CompactUnwinder_x86_64<A>::framelessUnwind(A &addressSpace,
+                                                uint64_t returnAddressLocation,
+                                                Registers_x86_64 &registers) {
+  // return address is on stack after last saved register
+  registers.setIP(addressSpace.get64(returnAddressLocation), returnAddressLocation);
+  // old esp is before return address
+  registers.setSP(returnAddressLocation + 8, 0);
+}
+#endif // _LIBUNWIND_TARGET_X86_64
+
+
+
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+/// CompactUnwinder_arm64 uses a compact unwind info to virtually "step" (aka
+/// unwind) by modifying a Registers_arm64 register set
+template <typename A>
+class CompactUnwinder_arm64 {
+public:
+
+  static int stepWithCompactEncoding(compact_unwind_encoding_t compactEncoding,
+                                     uint64_t functionStart, A &addressSpace,
+                                     Registers_arm64 &registers);
+
+private:
+  typename A::pint_t pint_t;
+
+  static int
+      stepWithCompactEncodingFrame(compact_unwind_encoding_t compactEncoding,
+                                   uint64_t functionStart, A &addressSpace,
+                                   Registers_arm64 &registers);
+  static int stepWithCompactEncodingFrameless(
+      compact_unwind_encoding_t compactEncoding, uint64_t functionStart,
+      A &addressSpace, Registers_arm64 &registers);
+};
+
+template <typename A>
+int CompactUnwinder_arm64<A>::stepWithCompactEncoding(
+    compact_unwind_encoding_t compactEncoding, uint64_t functionStart,
+    A &addressSpace, Registers_arm64 &registers) {
+  switch (compactEncoding & UNWIND_ARM64_MODE_MASK) {
+  case UNWIND_ARM64_MODE_FRAME:
+    return stepWithCompactEncodingFrame(compactEncoding, functionStart,
+                                        addressSpace, registers);
+  case UNWIND_ARM64_MODE_FRAMELESS:
+    return stepWithCompactEncodingFrameless(compactEncoding, functionStart,
+                                            addressSpace, registers);
+  }
+  _LIBUNWIND_ABORT("invalid compact unwind encoding");
+}
+
+template <typename A>
+int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrameless(
+    compact_unwind_encoding_t encoding, uint64_t, A &addressSpace,
+    Registers_arm64 &registers) {
+  uint32_t stackSize =
+      16 * EXTRACT_BITS(encoding, UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK);
+
+  uint64_t savedRegisterLoc = registers.getSP() + stackSize;
+
+  if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) {
+    registers.setRegister(UNW_ARM64_X19, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setRegister(UNW_ARM64_X20, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) {
+    registers.setRegister(UNW_ARM64_X21, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setRegister(UNW_ARM64_X22, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) {
+    registers.setRegister(UNW_ARM64_X23, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setRegister(UNW_ARM64_X24, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) {
+    registers.setRegister(UNW_ARM64_X25, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setRegister(UNW_ARM64_X26, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) {
+    registers.setRegister(UNW_ARM64_X27, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setRegister(UNW_ARM64_X28, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+
+  if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) {
+    registers.setFloatRegister(UNW_ARM64_D8,
+                               addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setFloatRegister(UNW_ARM64_D9,
+                               addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) {
+    registers.setFloatRegister(UNW_ARM64_D10,
+                               addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setFloatRegister(UNW_ARM64_D11,
+                               addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) {
+    registers.setFloatRegister(UNW_ARM64_D12,
+                               addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setFloatRegister(UNW_ARM64_D13,
+                               addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) {
+    registers.setFloatRegister(UNW_ARM64_D14,
+                               addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setFloatRegister(UNW_ARM64_D15,
+                               addressSpace.getDouble(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+
+  // subtract stack size off of sp
+  registers.setSP(savedRegisterLoc, 0);
+
+  // set pc to be value in lr
+  registers.setIP(registers.getRegister(UNW_ARM64_LR), 0);
+
+  return UNW_STEP_SUCCESS;
+}
+
+template <typename A>
+int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrame(
+    compact_unwind_encoding_t encoding, uint64_t, A &addressSpace,
+    Registers_arm64 &registers) {
+  uint64_t savedRegisterLoc = registers.getFP() - 8;
+
+  if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) {
+    registers.setRegister(UNW_ARM64_X19, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setRegister(UNW_ARM64_X20, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) {
+    registers.setRegister(UNW_ARM64_X21, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setRegister(UNW_ARM64_X22, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) {
+    registers.setRegister(UNW_ARM64_X23, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setRegister(UNW_ARM64_X24, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) {
+    registers.setRegister(UNW_ARM64_X25, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setRegister(UNW_ARM64_X26, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) {
+    registers.setRegister(UNW_ARM64_X27, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+    registers.setRegister(UNW_ARM64_X28, addressSpace.get64(savedRegisterLoc), savedRegisterLoc);
+    savedRegisterLoc -= 8;
+  }
+
+  if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) {
+    registers.setFloatRegister(UNW_ARM64_D8,
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+    registers.setFloatRegister(UNW_ARM64_D9,
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) {
+    registers.setFloatRegister(UNW_ARM64_D10,
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+    registers.setFloatRegister(UNW_ARM64_D11,
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) {
+    registers.setFloatRegister(UNW_ARM64_D12,
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+    registers.setFloatRegister(UNW_ARM64_D13,
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+  }
+  if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) {
+    registers.setFloatRegister(UNW_ARM64_D14,
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+    registers.setFloatRegister(UNW_ARM64_D15,
+                               addressSpace.getDouble(savedRegisterLoc));
+    savedRegisterLoc -= 8;
+  }
+
+  uint64_t fp = registers.getFP();
+  // fp points to old fp
+  registers.setFP(addressSpace.get64(fp), fp);
+  // old sp is fp less saved fp and lr
+  registers.setSP(fp + 16, 0);
+  // pop return address into pc
+  registers.setIP(addressSpace.get64(fp + 8), fp + 8);
+
+  return UNW_STEP_SUCCESS;
+}
+#endif // _LIBUNWIND_TARGET_AARCH64
+
+
+} // namespace libunwind
+
+#endif // __COMPACT_UNWINDER_HPP__
diff --git a/src/coreclr/src/nativeaot/libunwind/src/DwarfInstructions.hpp b/src/coreclr/src/nativeaot/libunwind/src/DwarfInstructions.hpp
new file mode 100644
index 0000000000000..c5cc6c9d5107e
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/DwarfInstructions.hpp
@@ -0,0 +1,831 @@
+//===-------------------------- DwarfInstructions.hpp ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//  Processor specific interpretation of DWARF unwind info.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __DWARF_INSTRUCTIONS_HPP__
+#define __DWARF_INSTRUCTIONS_HPP__
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "dwarf2.h"
+#include "Registers.hpp"
+#include "DwarfParser.hpp"
+#include "config.h"
+
+
+namespace libunwind {
+
+
+/// DwarfInstructions maps abtract DWARF unwind instructions to a particular
+/// architecture
+template <typename A, typename R>
+class DwarfInstructions {
+public:
+  typedef typename A::pint_t pint_t;
+  typedef typename A::sint_t sint_t;
+
+  static int stepWithDwarf(A &addressSpace, pint_t pc, pint_t fdeStart,
+                           R &registers);
+
+private:
+
+  enum {
+    DW_X86_64_RET_ADDR = 16
+  };
+
+  enum {
+    DW_X86_RET_ADDR = 8
+  };
+
+  typedef typename CFI_Parser<A>::RegisterLocation  RegisterLocation;
+  typedef typename CFI_Parser<A>::PrologInfo        PrologInfo;
+  typedef typename CFI_Parser<A>::FDE_Info          FDE_Info;
+  typedef typename CFI_Parser<A>::CIE_Info          CIE_Info;
+
+  static pint_t evaluateExpression(pint_t expression, A &addressSpace,
+                                   const R &registers,
+                                   pint_t initialStackValue);
+  static pint_t getSavedRegister(A &addressSpace, const R &registers,
+                                 pint_t cfa, const RegisterLocation &savedReg,
+                                 pint_t& location);
+  static double getSavedFloatRegister(A &addressSpace, const R &registers,
+                                  pint_t cfa, const RegisterLocation &savedReg);
+  static v128 getSavedVectorRegister(A &addressSpace, const R &registers,
+                                  pint_t cfa, const RegisterLocation &savedReg);
+
+  static pint_t getCFA(A &addressSpace, const PrologInfo &prolog,
+                       const R &registers) {
+    if (prolog.cfaRegister != 0)
+      return (pint_t)((sint_t)registers.getRegister((int)prolog.cfaRegister) +
+             prolog.cfaRegisterOffset);
+    if (prolog.cfaExpression != 0)
+      return evaluateExpression((pint_t)prolog.cfaExpression, addressSpace, 
+                                registers, 0);
+    assert(0 && "getCFA(): unknown location");
+    __builtin_unreachable();
+  }
+};
+
+
+template <typename A, typename R>
+typename A::pint_t DwarfInstructions<A, R>::getSavedRegister(
+    A &addressSpace, const R &registers, pint_t cfa,
+    const RegisterLocation &savedReg,
+    typename A::pint_t& location) {
+  switch (savedReg.location) {
+  case CFI_Parser<A>::kRegisterInCFA:
+    location = cfa + (pint_t)savedReg.value;
+    return (pint_t)addressSpace.getP(location);
+
+  case CFI_Parser<A>::kRegisterAtExpression:
+    location = evaluateExpression((pint_t)savedReg.value, addressSpace,
+                                  registers, cfa);
+    return (pint_t)addressSpace.getP(location);
+
+  case CFI_Parser<A>::kRegisterIsExpression:
+    location = 0;
+    return evaluateExpression((pint_t)savedReg.value, addressSpace,
+                              registers, cfa);
+  case CFI_Parser<A>::kRegisterInRegister:
+    location = 0;
+    return registers.getRegister((int)savedReg.value);
+
+  case CFI_Parser<A>::kRegisterUnused:
+  case CFI_Parser<A>::kRegisterOffsetFromCFA:
+    // FIX ME
+    break;
+  }
+  _LIBUNWIND_ABORT("unsupported restore location for register");
+}
+
+template <typename A, typename R>
+double DwarfInstructions<A, R>::getSavedFloatRegister(
+    A &addressSpace, const R &registers, pint_t cfa,
+    const RegisterLocation &savedReg) {
+  switch (savedReg.location) {
+  case CFI_Parser<A>::kRegisterInCFA:
+    return addressSpace.getDouble(cfa + (pint_t)savedReg.value);
+
+  case CFI_Parser<A>::kRegisterAtExpression:
+    return addressSpace.getDouble(
+        evaluateExpression((pint_t)savedReg.value, addressSpace,
+                            registers, cfa));
+
+  case CFI_Parser<A>::kRegisterIsExpression:
+  case CFI_Parser<A>::kRegisterUnused:
+  case CFI_Parser<A>::kRegisterOffsetFromCFA:
+  case CFI_Parser<A>::kRegisterInRegister:
+    // FIX ME
+    break;
+  }
+  _LIBUNWIND_ABORT("unsupported restore location for float register");
+}
+
+template <typename A, typename R>
+v128 DwarfInstructions<A, R>::getSavedVectorRegister(
+    A &addressSpace, const R &registers, pint_t cfa,
+    const RegisterLocation &savedReg) {
+  switch (savedReg.location) {
+  case CFI_Parser<A>::kRegisterInCFA:
+    return addressSpace.getVector(cfa + (pint_t)savedReg.value);
+
+  case CFI_Parser<A>::kRegisterAtExpression:
+    return addressSpace.getVector(
+        evaluateExpression((pint_t)savedReg.value, addressSpace,
+                            registers, cfa));
+
+  case CFI_Parser<A>::kRegisterIsExpression:
+  case CFI_Parser<A>::kRegisterUnused:
+  case CFI_Parser<A>::kRegisterOffsetFromCFA:
+  case CFI_Parser<A>::kRegisterInRegister:
+    // FIX ME
+    break;
+  }
+  _LIBUNWIND_ABORT("unsupported restore location for vector register");
+}
+
+template <typename A, typename R>
+int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
+                                           pint_t fdeStart, R &registers) {
+  FDE_Info fdeInfo;
+  CIE_Info cieInfo;
+  if (CFI_Parser<A>::decodeFDE(addressSpace, fdeStart, &fdeInfo,
+                               &cieInfo) == NULL) {
+    PrologInfo prolog;
+    if (CFI_Parser<A>::parseFDEInstructions(addressSpace, fdeInfo, cieInfo, pc,
+                                            R::getArch(), &prolog)) {
+      // get pointer to cfa (architecture specific)
+      pint_t cfa = getCFA(addressSpace, prolog, registers);
+
+       // restore registers that DWARF says were saved
+      R newRegisters = registers;
+      pint_t returnAddress = 0;
+      const int lastReg = R::lastDwarfRegNum();
+      assert(static_cast<int>(CFI_Parser<A>::kMaxRegisterNumber) >= lastReg &&
+             "register range too large");
+      assert(lastReg >= (int)cieInfo.returnAddressRegister &&
+             "register range does not contain return address register");
+      for (int i = 0; i <= lastReg; ++i) {
+        if (prolog.savedRegisters[i].location !=
+            CFI_Parser<A>::kRegisterUnused) {
+          if (registers.validFloatRegister(i))
+            newRegisters.setFloatRegister(
+                i, getSavedFloatRegister(addressSpace, registers, cfa,
+                                         prolog.savedRegisters[i]));
+          else if (registers.validVectorRegister(i))
+            newRegisters.setVectorRegister(
+                i, getSavedVectorRegister(addressSpace, registers, cfa,
+                                          prolog.savedRegisters[i]));
+          else if (i == (int)cieInfo.returnAddressRegister) {
+            pint_t dummyLocation;
+            returnAddress = getSavedRegister(addressSpace, registers, cfa,
+                                             prolog.savedRegisters[i],
+                                             dummyLocation);
+          }
+          else if (registers.validRegister(i)) {
+            pint_t value;
+            pint_t location;
+            value = getSavedRegister(addressSpace, registers, cfa,
+                                     prolog.savedRegisters[i],
+                                     location);
+            
+            newRegisters.setRegister(i, value, location);
+          }
+          else
+            return UNW_EBADREG;
+        }
+      }
+
+      // By definition, the CFA is the stack pointer at the call site, so
+      // restoring SP means setting it to CFA.
+      newRegisters.setSP(cfa, 0);
+
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+      // If the target is aarch64 then the return address may have been signed
+      // using the v8.3 pointer authentication extensions. The original
+      // return address needs to be authenticated before the return address is
+      // restored. autia1716 is used instead of autia as autia1716 assembles
+      // to a NOP on pre-v8.3a architectures.
+      if ((R::getArch() == REGISTERS_ARM64) &&
+          prolog.savedRegisters[UNW_ARM64_RA_SIGN_STATE].value) {
+#if !defined(_LIBUNWIND_IS_NATIVE_ONLY)
+        return UNW_ECROSSRASIGNING;
+#else
+        register unsigned long long x17 __asm("x17") = returnAddress;
+        register unsigned long long x16 __asm("x16") = cfa;
+
+        // These are the autia1716/autib1716 instructions. The hint instructions
+        // are used here as gcc does not assemble autia1716/autib1716 for pre
+        // armv8.3a targets.
+        if (cieInfo.addressesSignedWithBKey)
+          asm("hint 0xe" : "+r"(x17) : "r"(x16)); // autib1716
+        else
+          asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716
+        returnAddress = x17;
+#endif
+      }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_SPARC)
+      if (R::getArch() == REGISTERS_SPARC) {
+        // Skip call site instruction and delay slot
+        returnAddress += 8;
+        // Skip unimp instruction if function returns a struct
+        if ((addressSpace.get32(returnAddress) & 0xC1C00000) == 0)
+          returnAddress += 4;
+      }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_PPC64)
+#define PPC64_ELFV1_R2_LOAD_INST_ENCODING 0xe8410028u // ld r2,40(r1)
+#define PPC64_ELFV1_R2_OFFSET 40
+#define PPC64_ELFV2_R2_LOAD_INST_ENCODING 0xe8410018u // ld r2,24(r1)
+#define PPC64_ELFV2_R2_OFFSET 24
+      // If the instruction at return address is a TOC (r2) restore,
+      // then r2 was saved and needs to be restored.
+      // ELFv2 ABI specifies that the TOC Pointer must be saved at SP + 24,
+      // while in ELFv1 ABI it is saved at SP + 40.
+      if (R::getArch() == REGISTERS_PPC64 && returnAddress != 0) {
+        pint_t sp = newRegisters.getRegister(UNW_REG_SP);
+        pint_t r2 = 0;
+        switch (addressSpace.get32(returnAddress)) {
+        case PPC64_ELFV1_R2_LOAD_INST_ENCODING:
+          r2 = addressSpace.get64(sp + PPC64_ELFV1_R2_OFFSET);
+          break;
+        case PPC64_ELFV2_R2_LOAD_INST_ENCODING:
+          r2 = addressSpace.get64(sp + PPC64_ELFV2_R2_OFFSET);
+          break;
+        }
+        if (r2)
+          newRegisters.setRegister(UNW_PPC64_R2, r2);
+      }
+#endif
+
+      // Return address is address after call site instruction, so setting IP to
+      // that does simualates a return.
+      newRegisters.setIP(returnAddress, 0);
+
+      // Simulate the step by replacing the register set with the new ones.
+      registers = newRegisters;
+
+      return UNW_STEP_SUCCESS;
+    }
+  }
+  return UNW_EBADFRAME;
+}
+
+template <typename A, typename R>
+typename A::pint_t
+DwarfInstructions<A, R>::evaluateExpression(pint_t expression, A &addressSpace,
+                                            const R &registers,
+                                            pint_t initialStackValue) {
+  const bool log = false;
+  pint_t p = expression;
+  pint_t expressionEnd = expression + 20; // temp, until len read
+  pint_t length = (pint_t)addressSpace.getULEB128(p, expressionEnd);
+  expressionEnd = p + length;
+  if (log)
+    fprintf(stderr, "evaluateExpression(): length=%" PRIu64 "\n",
+            (uint64_t)length);
+  pint_t stack[100];
+  pint_t *sp = stack;
+  *(++sp) = initialStackValue;
+
+  while (p < expressionEnd) {
+    if (log) {
+      for (pint_t *t = sp; t > stack; --t) {
+        fprintf(stderr, "sp[] = 0x%" PRIx64 "\n", (uint64_t)(*t));
+      }
+    }
+    uint8_t opcode = addressSpace.get8(p++);
+    sint_t svalue, svalue2;
+    pint_t value;
+    uint32_t reg;
+    switch (opcode) {
+    case DW_OP_addr:
+      // push immediate address sized value
+      value = addressSpace.getP(p);
+      p += sizeof(pint_t);
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_deref:
+      // pop stack, dereference, push result
+      value = *sp--;
+      *(++sp) = addressSpace.getP(value);
+      if (log)
+        fprintf(stderr, "dereference 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_const1u:
+      // push immediate 1 byte value
+      value = addressSpace.get8(p);
+      p += 1;
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_const1s:
+      // push immediate 1 byte signed value
+      svalue = (int8_t) addressSpace.get8(p);
+      p += 1;
+      *(++sp) = (pint_t)svalue;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue);
+      break;
+
+    case DW_OP_const2u:
+      // push immediate 2 byte value
+      value = addressSpace.get16(p);
+      p += 2;
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_const2s:
+      // push immediate 2 byte signed value
+      svalue = (int16_t) addressSpace.get16(p);
+      p += 2;
+      *(++sp) = (pint_t)svalue;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue);
+      break;
+
+    case DW_OP_const4u:
+      // push immediate 4 byte value
+      value = addressSpace.get32(p);
+      p += 4;
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_const4s:
+      // push immediate 4 byte signed value
+      svalue = (int32_t)addressSpace.get32(p);
+      p += 4;
+      *(++sp) = (pint_t)svalue;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue);
+      break;
+
+    case DW_OP_const8u:
+      // push immediate 8 byte value
+      value = (pint_t)addressSpace.get64(p);
+      p += 8;
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_const8s:
+      // push immediate 8 byte signed value
+      value = (pint_t)addressSpace.get64(p);
+      p += 8;
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_constu:
+      // push immediate ULEB128 value
+      value = (pint_t)addressSpace.getULEB128(p, expressionEnd);
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_consts:
+      // push immediate SLEB128 value
+      svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd);
+      *(++sp) = (pint_t)svalue;
+      if (log)
+        fprintf(stderr, "push 0x%" PRIx64 "\n", (uint64_t)svalue);
+      break;
+
+    case DW_OP_dup:
+      // push top of stack
+      value = *sp;
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "duplicate top of stack\n");
+      break;
+
+    case DW_OP_drop:
+      // pop
+      --sp;
+      if (log)
+        fprintf(stderr, "pop top of stack\n");
+      break;
+
+    case DW_OP_over:
+      // dup second
+      value = sp[-1];
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "duplicate second in stack\n");
+      break;
+
+    case DW_OP_pick:
+      // pick from
+      reg = addressSpace.get8(p);
+      p += 1;
+      value = sp[-reg];
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "duplicate %d in stack\n", reg);
+      break;
+
+    case DW_OP_swap:
+      // swap top two
+      value = sp[0];
+      sp[0] = sp[-1];
+      sp[-1] = value;
+      if (log)
+        fprintf(stderr, "swap top of stack\n");
+      break;
+
+    case DW_OP_rot:
+      // rotate top three
+      value = sp[0];
+      sp[0] = sp[-1];
+      sp[-1] = sp[-2];
+      sp[-2] = value;
+      if (log)
+        fprintf(stderr, "rotate top three of stack\n");
+      break;
+
+    case DW_OP_xderef:
+      // pop stack, dereference, push result
+      value = *sp--;
+      *sp = *((pint_t*)value);
+      if (log)
+        fprintf(stderr, "x-dereference 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_abs:
+      svalue = (sint_t)*sp;
+      if (svalue < 0)
+        *sp = (pint_t)(-svalue);
+      if (log)
+        fprintf(stderr, "abs\n");
+      break;
+
+    case DW_OP_and:
+      value = *sp--;
+      *sp &= value;
+      if (log)
+        fprintf(stderr, "and\n");
+      break;
+
+    case DW_OP_div:
+      svalue = (sint_t)(*sp--);
+      svalue2 = (sint_t)*sp;
+      *sp = (pint_t)(svalue2 / svalue);
+      if (log)
+        fprintf(stderr, "div\n");
+      break;
+
+    case DW_OP_minus:
+      value = *sp--;
+      *sp = *sp - value;
+      if (log)
+        fprintf(stderr, "minus\n");
+      break;
+
+    case DW_OP_mod:
+      svalue = (sint_t)(*sp--);
+      svalue2 = (sint_t)*sp;
+      *sp = (pint_t)(svalue2 % svalue);
+      if (log)
+        fprintf(stderr, "module\n");
+      break;
+
+    case DW_OP_mul:
+      svalue = (sint_t)(*sp--);
+      svalue2 = (sint_t)*sp;
+      *sp = (pint_t)(svalue2 * svalue);
+      if (log)
+        fprintf(stderr, "mul\n");
+      break;
+
+    case DW_OP_neg:
+      *sp = 0 - *sp;
+      if (log)
+        fprintf(stderr, "neg\n");
+      break;
+
+    case DW_OP_not:
+      svalue = (sint_t)(*sp);
+      *sp = (pint_t)(~svalue);
+      if (log)
+        fprintf(stderr, "not\n");
+      break;
+
+    case DW_OP_or:
+      value = *sp--;
+      *sp |= value;
+      if (log)
+        fprintf(stderr, "or\n");
+      break;
+
+    case DW_OP_plus:
+      value = *sp--;
+      *sp += value;
+      if (log)
+        fprintf(stderr, "plus\n");
+      break;
+
+    case DW_OP_plus_uconst:
+      // pop stack, add uelb128 constant, push result
+      *sp += static_cast<pint_t>(addressSpace.getULEB128(p, expressionEnd));
+      if (log)
+        fprintf(stderr, "add constant\n");
+      break;
+
+    case DW_OP_shl:
+      value = *sp--;
+      *sp = *sp << value;
+      if (log)
+        fprintf(stderr, "shift left\n");
+      break;
+
+    case DW_OP_shr:
+      value = *sp--;
+      *sp = *sp >> value;
+      if (log)
+        fprintf(stderr, "shift left\n");
+      break;
+
+    case DW_OP_shra:
+      value = *sp--;
+      svalue = (sint_t)*sp;
+      *sp = (pint_t)(svalue >> value);
+      if (log)
+        fprintf(stderr, "shift left arithmetric\n");
+      break;
+
+    case DW_OP_xor:
+      value = *sp--;
+      *sp ^= value;
+      if (log)
+        fprintf(stderr, "xor\n");
+      break;
+
+    case DW_OP_skip:
+      svalue = (int16_t) addressSpace.get16(p);
+      p += 2;
+      p = (pint_t)((sint_t)p + svalue);
+      if (log)
+        fprintf(stderr, "skip %" PRIu64 "\n", (uint64_t)svalue);
+      break;
+
+    case DW_OP_bra:
+      svalue = (int16_t) addressSpace.get16(p);
+      p += 2;
+      if (*sp--)
+        p = (pint_t)((sint_t)p + svalue);
+      if (log)
+        fprintf(stderr, "bra %" PRIu64 "\n", (uint64_t)svalue);
+      break;
+
+    case DW_OP_eq:
+      value = *sp--;
+      *sp = (*sp == value);
+      if (log)
+        fprintf(stderr, "eq\n");
+      break;
+
+    case DW_OP_ge:
+      value = *sp--;
+      *sp = (*sp >= value);
+      if (log)
+        fprintf(stderr, "ge\n");
+      break;
+
+    case DW_OP_gt:
+      value = *sp--;
+      *sp = (*sp > value);
+      if (log)
+        fprintf(stderr, "gt\n");
+      break;
+
+    case DW_OP_le:
+      value = *sp--;
+      *sp = (*sp <= value);
+      if (log)
+        fprintf(stderr, "le\n");
+      break;
+
+    case DW_OP_lt:
+      value = *sp--;
+      *sp = (*sp < value);
+      if (log)
+        fprintf(stderr, "lt\n");
+      break;
+
+    case DW_OP_ne:
+      value = *sp--;
+      *sp = (*sp != value);
+      if (log)
+        fprintf(stderr, "ne\n");
+      break;
+
+    case DW_OP_lit0:
+    case DW_OP_lit1:
+    case DW_OP_lit2:
+    case DW_OP_lit3:
+    case DW_OP_lit4:
+    case DW_OP_lit5:
+    case DW_OP_lit6:
+    case DW_OP_lit7:
+    case DW_OP_lit8:
+    case DW_OP_lit9:
+    case DW_OP_lit10:
+    case DW_OP_lit11:
+    case DW_OP_lit12:
+    case DW_OP_lit13:
+    case DW_OP_lit14:
+    case DW_OP_lit15:
+    case DW_OP_lit16:
+    case DW_OP_lit17:
+    case DW_OP_lit18:
+    case DW_OP_lit19:
+    case DW_OP_lit20:
+    case DW_OP_lit21:
+    case DW_OP_lit22:
+    case DW_OP_lit23:
+    case DW_OP_lit24:
+    case DW_OP_lit25:
+    case DW_OP_lit26:
+    case DW_OP_lit27:
+    case DW_OP_lit28:
+    case DW_OP_lit29:
+    case DW_OP_lit30:
+    case DW_OP_lit31:
+      value = static_cast<pint_t>(opcode - DW_OP_lit0);
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "push literal 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_reg0:
+    case DW_OP_reg1:
+    case DW_OP_reg2:
+    case DW_OP_reg3:
+    case DW_OP_reg4:
+    case DW_OP_reg5:
+    case DW_OP_reg6:
+    case DW_OP_reg7:
+    case DW_OP_reg8:
+    case DW_OP_reg9:
+    case DW_OP_reg10:
+    case DW_OP_reg11:
+    case DW_OP_reg12:
+    case DW_OP_reg13:
+    case DW_OP_reg14:
+    case DW_OP_reg15:
+    case DW_OP_reg16:
+    case DW_OP_reg17:
+    case DW_OP_reg18:
+    case DW_OP_reg19:
+    case DW_OP_reg20:
+    case DW_OP_reg21:
+    case DW_OP_reg22:
+    case DW_OP_reg23:
+    case DW_OP_reg24:
+    case DW_OP_reg25:
+    case DW_OP_reg26:
+    case DW_OP_reg27:
+    case DW_OP_reg28:
+    case DW_OP_reg29:
+    case DW_OP_reg30:
+    case DW_OP_reg31:
+      reg = static_cast<uint32_t>(opcode - DW_OP_reg0);
+      *(++sp) = registers.getRegister((int)reg);
+      if (log)
+        fprintf(stderr, "push reg %d\n", reg);
+      break;
+
+    case DW_OP_regx:
+      reg = static_cast<uint32_t>(addressSpace.getULEB128(p, expressionEnd));
+      *(++sp) = registers.getRegister((int)reg);
+      if (log)
+        fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue);
+      break;
+
+    case DW_OP_breg0:
+    case DW_OP_breg1:
+    case DW_OP_breg2:
+    case DW_OP_breg3:
+    case DW_OP_breg4:
+    case DW_OP_breg5:
+    case DW_OP_breg6:
+    case DW_OP_breg7:
+    case DW_OP_breg8:
+    case DW_OP_breg9:
+    case DW_OP_breg10:
+    case DW_OP_breg11:
+    case DW_OP_breg12:
+    case DW_OP_breg13:
+    case DW_OP_breg14:
+    case DW_OP_breg15:
+    case DW_OP_breg16:
+    case DW_OP_breg17:
+    case DW_OP_breg18:
+    case DW_OP_breg19:
+    case DW_OP_breg20:
+    case DW_OP_breg21:
+    case DW_OP_breg22:
+    case DW_OP_breg23:
+    case DW_OP_breg24:
+    case DW_OP_breg25:
+    case DW_OP_breg26:
+    case DW_OP_breg27:
+    case DW_OP_breg28:
+    case DW_OP_breg29:
+    case DW_OP_breg30:
+    case DW_OP_breg31:
+      reg = static_cast<uint32_t>(opcode - DW_OP_breg0);
+      svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd);
+      svalue += static_cast<sint_t>(registers.getRegister((int)reg));
+      *(++sp) = (pint_t)(svalue);
+      if (log)
+        fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue);
+      break;
+
+    case DW_OP_bregx:
+      reg = static_cast<uint32_t>(addressSpace.getULEB128(p, expressionEnd));
+      svalue = (sint_t)addressSpace.getSLEB128(p, expressionEnd);
+      svalue += static_cast<sint_t>(registers.getRegister((int)reg));
+      *(++sp) = (pint_t)(svalue);
+      if (log)
+        fprintf(stderr, "push reg %d + 0x%" PRIx64 "\n", reg, (uint64_t)svalue);
+      break;
+
+    case DW_OP_fbreg:
+      _LIBUNWIND_ABORT("DW_OP_fbreg not implemented");
+      break;
+
+    case DW_OP_piece:
+      _LIBUNWIND_ABORT("DW_OP_piece not implemented");
+      break;
+
+    case DW_OP_deref_size:
+      // pop stack, dereference, push result
+      value = *sp--;
+      switch (addressSpace.get8(p++)) {
+      case 1:
+        value = addressSpace.get8(value);
+        break;
+      case 2:
+        value = addressSpace.get16(value);
+        break;
+      case 4:
+        value = addressSpace.get32(value);
+        break;
+      case 8:
+        value = (pint_t)addressSpace.get64(value);
+        break;
+      default:
+        _LIBUNWIND_ABORT("DW_OP_deref_size with bad size");
+      }
+      *(++sp) = value;
+      if (log)
+        fprintf(stderr, "sized dereference 0x%" PRIx64 "\n", (uint64_t)value);
+      break;
+
+    case DW_OP_xderef_size:
+    case DW_OP_nop:
+    case DW_OP_push_object_addres:
+    case DW_OP_call2:
+    case DW_OP_call4:
+    case DW_OP_call_ref:
+    default:
+      _LIBUNWIND_ABORT("DWARF opcode not implemented");
+    }
+
+  }
+  if (log)
+    fprintf(stderr, "expression evaluates to 0x%" PRIx64 "\n", (uint64_t)*sp);
+  return *sp;
+}
+
+
+
+} // namespace libunwind
+
+#endif // __DWARF_INSTRUCTIONS_HPP__
diff --git a/src/coreclr/src/nativeaot/libunwind/src/DwarfParser.hpp b/src/coreclr/src/nativeaot/libunwind/src/DwarfParser.hpp
new file mode 100644
index 0000000000000..a2ebf3bb0e189
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/DwarfParser.hpp
@@ -0,0 +1,766 @@
+//===--------------------------- DwarfParser.hpp --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//  Parses DWARF CFIs (FDEs and CIEs).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __DWARF_PARSER_HPP__
+#define __DWARF_PARSER_HPP__
+#define __STDC_FORMAT_MACROS
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "libunwind.h"
+#include "dwarf2.h"
+#include "Registers.hpp"
+
+#include "config.h"
+
+namespace libunwind {
+
+/// CFI_Parser does basic parsing of a CFI (Call Frame Information) records.
+/// See DWARF Spec for details:
+///    http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+///
+template <typename A>
+class CFI_Parser {
+public:
+  typedef typename A::pint_t pint_t;
+
+  /// Information encoded in a CIE (Common Information Entry)
+  struct CIE_Info {
+    pint_t    cieStart;
+    pint_t    cieLength;
+    pint_t    cieInstructions;
+    uint8_t   pointerEncoding;
+    uint8_t   lsdaEncoding;
+    uint8_t   personalityEncoding;
+    uint8_t   personalityOffsetInCIE;
+    pint_t    personality;
+    uint32_t  codeAlignFactor;
+    int       dataAlignFactor;
+    bool      isSignalFrame;
+    bool      fdesHaveAugmentationData;
+    uint8_t   returnAddressRegister;
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+    bool      addressesSignedWithBKey;
+#endif
+  };
+
+  /// Information about an FDE (Frame Description Entry)
+  struct FDE_Info {
+    pint_t  fdeStart;
+    pint_t  fdeLength;
+    pint_t  fdeInstructions;
+    pint_t  pcStart;
+    pint_t  pcEnd;
+    pint_t  lsda;
+  };
+
+  enum {
+    kMaxRegisterNumber = _LIBUNWIND_HIGHEST_DWARF_REGISTER
+  };
+  enum RegisterSavedWhere {
+    kRegisterUnused,
+    kRegisterInCFA,
+    kRegisterOffsetFromCFA,
+    kRegisterInRegister,
+    kRegisterAtExpression,
+    kRegisterIsExpression
+  };
+  struct RegisterLocation {
+    RegisterSavedWhere location;
+    int64_t value;
+  };
+  /// Information about a frame layout and registers saved determined
+  /// by "running" the DWARF FDE "instructions"
+  struct PrologInfo {
+    uint32_t          cfaRegister;
+    int32_t           cfaRegisterOffset;  // CFA = (cfaRegister)+cfaRegisterOffset
+    int64_t           cfaExpression;      // CFA = expression
+    uint32_t          spExtraArgSize;
+    uint32_t          codeOffsetAtStackDecrement;
+    bool              registersInOtherRegisters;
+    bool              sameValueUsed;
+    RegisterLocation  savedRegisters[kMaxRegisterNumber + 1];
+  };
+
+  struct PrologInfoStackEntry {
+    PrologInfoStackEntry(PrologInfoStackEntry *n, const PrologInfo &i)
+        : next(n), info(i) {}
+    PrologInfoStackEntry *next;
+    PrologInfo info;
+  };
+
+  static bool findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart,
+                      uint32_t sectionLength, pint_t fdeHint, FDE_Info *fdeInfo,
+                      CIE_Info *cieInfo);
+  static const char *decodeFDE(A &addressSpace, pint_t fdeStart,
+                               FDE_Info *fdeInfo, CIE_Info *cieInfo);
+  static bool parseFDEInstructions(A &addressSpace, const FDE_Info &fdeInfo,
+                                   const CIE_Info &cieInfo, pint_t upToPC,
+                                   int arch, PrologInfo *results);
+
+  static const char *parseCIE(A &addressSpace, pint_t cie, CIE_Info *cieInfo);
+
+private:
+  static bool parseInstructions(A &addressSpace, pint_t instructions,
+                                pint_t instructionsEnd, const CIE_Info &cieInfo,
+                                pint_t pcoffset,
+                                PrologInfoStackEntry *&rememberStack, int arch,
+                                PrologInfo *results);
+};
+
+/// Parse a FDE into a CIE_Info and an FDE_Info
+template <typename A>
+const char *CFI_Parser<A>::decodeFDE(A &addressSpace, pint_t fdeStart,
+                                     FDE_Info *fdeInfo, CIE_Info *cieInfo) {
+  pint_t p = fdeStart;
+  pint_t cfiLength = (pint_t)addressSpace.get32(p);
+  p += 4;
+  if (cfiLength == 0xffffffff) {
+    // 0xffffffff means length is really next 8 bytes
+    cfiLength = (pint_t)addressSpace.get64(p);
+    p += 8;
+  }
+  if (cfiLength == 0)
+    return "FDE has zero length"; // end marker
+  uint32_t ciePointer = addressSpace.get32(p);
+  if (ciePointer == 0)
+    return "FDE is really a CIE"; // this is a CIE not an FDE
+  pint_t nextCFI = p + cfiLength;
+  pint_t cieStart = p - ciePointer;
+  const char *err = parseCIE(addressSpace, cieStart, cieInfo);
+  if (err != NULL)
+    return err;
+  p += 4;
+  // Parse pc begin and range.
+  pint_t pcStart =
+      addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding);
+  pint_t pcRange =
+      addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding & 0x0F);
+  // Parse rest of info.
+  fdeInfo->lsda = 0;
+  // Check for augmentation length.
+  if (cieInfo->fdesHaveAugmentationData) {
+    pint_t augLen = (pint_t)addressSpace.getULEB128(p, nextCFI);
+    pint_t endOfAug = p + augLen;
+    if (cieInfo->lsdaEncoding != DW_EH_PE_omit) {
+      // Peek at value (without indirection).  Zero means no LSDA.
+      pint_t lsdaStart = p;
+      if (addressSpace.getEncodedP(p, nextCFI, cieInfo->lsdaEncoding & 0x0F) !=
+          0) {
+        // Reset pointer and re-parse LSDA address.
+        p = lsdaStart;
+        fdeInfo->lsda =
+            addressSpace.getEncodedP(p, nextCFI, cieInfo->lsdaEncoding);
+      }
+    }
+    p = endOfAug;
+  }
+  fdeInfo->fdeStart = fdeStart;
+  fdeInfo->fdeLength = nextCFI - fdeStart;
+  fdeInfo->fdeInstructions = p;
+  fdeInfo->pcStart = pcStart;
+  fdeInfo->pcEnd = pcStart + pcRange;
+  return NULL; // success
+}
+
+/// Scan an eh_frame section to find an FDE for a pc
+template <typename A>
+bool CFI_Parser<A>::findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart,
+                            uint32_t sectionLength, pint_t fdeHint,
+                            FDE_Info *fdeInfo, CIE_Info *cieInfo) {
+  //fprintf(stderr, "findFDE(0x%llX)\n", (long long)pc);
+  pint_t p = (fdeHint != 0) ? fdeHint : ehSectionStart;
+  const pint_t ehSectionEnd = p + sectionLength;
+  while (p < ehSectionEnd) {
+    pint_t currentCFI = p;
+    //fprintf(stderr, "findFDE() CFI at 0x%llX\n", (long long)p);
+    pint_t cfiLength = addressSpace.get32(p);
+    p += 4;
+    if (cfiLength == 0xffffffff) {
+      // 0xffffffff means length is really next 8 bytes
+      cfiLength = (pint_t)addressSpace.get64(p);
+      p += 8;
+    }
+    if (cfiLength == 0)
+      return false; // end marker
+    uint32_t id = addressSpace.get32(p);
+    if (id == 0) {
+      // Skip over CIEs.
+      p += cfiLength;
+    } else {
+      // Process FDE to see if it covers pc.
+      pint_t nextCFI = p + cfiLength;
+      uint32_t ciePointer = addressSpace.get32(p);
+      pint_t cieStart = p - ciePointer;
+      // Validate pointer to CIE is within section.
+      if ((ehSectionStart <= cieStart) && (cieStart < ehSectionEnd)) {
+        if (parseCIE(addressSpace, cieStart, cieInfo) == NULL) {
+          p += 4;
+          // Parse pc begin and range.
+          pint_t pcStart =
+              addressSpace.getEncodedP(p, nextCFI, cieInfo->pointerEncoding);
+          pint_t pcRange = addressSpace.getEncodedP(
+              p, nextCFI, cieInfo->pointerEncoding & 0x0F);
+          // Test if pc is within the function this FDE covers.
+          if ((pcStart < pc) && (pc <= pcStart + pcRange)) {
+            // parse rest of info
+            fdeInfo->lsda = 0;
+            // check for augmentation length
+            if (cieInfo->fdesHaveAugmentationData) {
+              pint_t augLen = (pint_t)addressSpace.getULEB128(p, nextCFI);
+              pint_t endOfAug = p + augLen;
+              if (cieInfo->lsdaEncoding != DW_EH_PE_omit) {
+                // Peek at value (without indirection).  Zero means no LSDA.
+                pint_t lsdaStart = p;
+                if (addressSpace.getEncodedP(
+                        p, nextCFI, cieInfo->lsdaEncoding & 0x0F) != 0) {
+                  // Reset pointer and re-parse LSDA address.
+                  p = lsdaStart;
+                  fdeInfo->lsda = addressSpace
+                      .getEncodedP(p, nextCFI, cieInfo->lsdaEncoding);
+                }
+              }
+              p = endOfAug;
+            }
+            fdeInfo->fdeStart = currentCFI;
+            fdeInfo->fdeLength = nextCFI - currentCFI;
+            fdeInfo->fdeInstructions = p;
+            fdeInfo->pcStart = pcStart;
+            fdeInfo->pcEnd = pcStart + pcRange;
+            return true;
+          } else {
+            // pc is not in begin/range, skip this FDE
+          }
+        } else {
+          // Malformed CIE, now augmentation describing pc range encoding.
+        }
+      } else {
+        // malformed FDE.  CIE is bad
+      }
+      p = nextCFI;
+    }
+  }
+  return false;
+}
+
+/// Extract info from a CIE
+template <typename A>
+const char *CFI_Parser<A>::parseCIE(A &addressSpace, pint_t cie,
+                                    CIE_Info *cieInfo) {
+  cieInfo->pointerEncoding = 0;
+  cieInfo->lsdaEncoding = DW_EH_PE_omit;
+  cieInfo->personalityEncoding = 0;
+  cieInfo->personalityOffsetInCIE = 0;
+  cieInfo->personality = 0;
+  cieInfo->codeAlignFactor = 0;
+  cieInfo->dataAlignFactor = 0;
+  cieInfo->isSignalFrame = false;
+  cieInfo->fdesHaveAugmentationData = false;
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+  cieInfo->addressesSignedWithBKey = false;
+#endif
+  cieInfo->cieStart = cie;
+  pint_t p = cie;
+  pint_t cieLength = (pint_t)addressSpace.get32(p);
+  p += 4;
+  pint_t cieContentEnd = p + cieLength;
+  if (cieLength == 0xffffffff) {
+    // 0xffffffff means length is really next 8 bytes
+    cieLength = (pint_t)addressSpace.get64(p);
+    p += 8;
+    cieContentEnd = p + cieLength;
+  }
+  if (cieLength == 0)
+    return NULL;
+  // CIE ID is always 0
+  if (addressSpace.get32(p) != 0)
+    return "CIE ID is not zero";
+  p += 4;
+  // Version is always 1 or 3
+  uint8_t version = addressSpace.get8(p);
+  if ((version != 1) && (version != 3))
+    return "CIE version is not 1 or 3";
+  ++p;
+  // save start of augmentation string and find end
+  pint_t strStart = p;
+  while (addressSpace.get8(p) != 0)
+    ++p;
+  ++p;
+  // parse code aligment factor
+  cieInfo->codeAlignFactor = (uint32_t)addressSpace.getULEB128(p, cieContentEnd);
+  // parse data alignment factor
+  cieInfo->dataAlignFactor = (int)addressSpace.getSLEB128(p, cieContentEnd);
+  // parse return address register
+  uint64_t raReg = addressSpace.getULEB128(p, cieContentEnd);
+  assert(raReg < 255 && "return address register too large");
+  cieInfo->returnAddressRegister = (uint8_t)raReg;
+  // parse augmentation data based on augmentation string
+  const char *result = NULL;
+  if (addressSpace.get8(strStart) == 'z') {
+    // parse augmentation data length
+    addressSpace.getULEB128(p, cieContentEnd);
+    for (pint_t s = strStart; addressSpace.get8(s) != '\0'; ++s) {
+      switch (addressSpace.get8(s)) {
+      case 'z':
+        cieInfo->fdesHaveAugmentationData = true;
+        break;
+      case 'P':
+        cieInfo->personalityEncoding = addressSpace.get8(p);
+        ++p;
+        cieInfo->personalityOffsetInCIE = (uint8_t)(p - cie);
+        cieInfo->personality = addressSpace
+            .getEncodedP(p, cieContentEnd, cieInfo->personalityEncoding);
+        break;
+      case 'L':
+        cieInfo->lsdaEncoding = addressSpace.get8(p);
+        ++p;
+        break;
+      case 'R':
+        cieInfo->pointerEncoding = addressSpace.get8(p);
+        ++p;
+        break;
+      case 'S':
+        cieInfo->isSignalFrame = true;
+        break;
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+      case 'B':
+        cieInfo->addressesSignedWithBKey = true;
+        break;
+#endif
+      default:
+        // ignore unknown letters
+        break;
+      }
+    }
+  }
+  cieInfo->cieLength = cieContentEnd - cieInfo->cieStart;
+  cieInfo->cieInstructions = p;
+  return result;
+}
+
+
+/// "run" the DWARF instructions and create the abstact PrologInfo for an FDE
+template <typename A>
+bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
+                                         const FDE_Info &fdeInfo,
+                                         const CIE_Info &cieInfo, pint_t upToPC,
+                                         int arch, PrologInfo *results) {
+  // clear results
+  memset(results, '\0', sizeof(PrologInfo));
+  PrologInfoStackEntry *rememberStack = NULL;
+
+  // parse CIE then FDE instructions
+  return parseInstructions(addressSpace, cieInfo.cieInstructions,
+                           cieInfo.cieStart + cieInfo.cieLength, cieInfo,
+                           (pint_t)(-1), rememberStack, arch, results) &&
+         parseInstructions(addressSpace, fdeInfo.fdeInstructions,
+                           fdeInfo.fdeStart + fdeInfo.fdeLength, cieInfo,
+                           upToPC - fdeInfo.pcStart, rememberStack, arch,
+                           results);
+}
+
+/// "run" the DWARF instructions
+template <typename A>
+bool CFI_Parser<A>::parseInstructions(A &addressSpace, pint_t instructions,
+                                      pint_t instructionsEnd,
+                                      const CIE_Info &cieInfo, pint_t pcoffset,
+                                      PrologInfoStackEntry *&rememberStack,
+                                      int arch, PrologInfo *results) {
+  pint_t p = instructions;
+  pint_t codeOffset = 0;
+  PrologInfo initialState = *results;
+
+  _LIBUNWIND_TRACE_DWARF("parseInstructions(instructions=0x%0" PRIx64 ")\n",
+                         static_cast<uint64_t>(instructionsEnd));
+
+  // see DWARF Spec, section 6.4.2 for details on unwind opcodes
+  while ((p < instructionsEnd) && (codeOffset < pcoffset)) {
+    uint64_t reg;
+    uint64_t reg2;
+    int64_t offset;
+    uint64_t length;
+    uint8_t opcode = addressSpace.get8(p);
+    uint8_t operand;
+#if !defined(_LIBUNWIND_NO_HEAP)
+    PrologInfoStackEntry *entry;
+#endif
+    ++p;
+    switch (opcode) {
+    case DW_CFA_nop:
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_nop\n");
+      break;
+    case DW_CFA_set_loc:
+      codeOffset =
+          addressSpace.getEncodedP(p, instructionsEnd, cieInfo.pointerEncoding);
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_set_loc\n");
+      break;
+    case DW_CFA_advance_loc1:
+      codeOffset += (addressSpace.get8(p) * cieInfo.codeAlignFactor);
+      p += 1;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_advance_loc1: new offset=%" PRIu64 "\n",
+                             static_cast<uint64_t>(codeOffset));
+      break;
+    case DW_CFA_advance_loc2:
+      codeOffset += (addressSpace.get16(p) * cieInfo.codeAlignFactor);
+      p += 2;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_advance_loc2: new offset=%" PRIu64 "\n",
+                             static_cast<uint64_t>(codeOffset));
+      break;
+    case DW_CFA_advance_loc4:
+      codeOffset += (addressSpace.get32(p) * cieInfo.codeAlignFactor);
+      p += 4;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_advance_loc4: new offset=%" PRIu64 "\n",
+                             static_cast<uint64_t>(codeOffset));
+      break;
+    case DW_CFA_offset_extended:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      offset = (int64_t)addressSpace.getULEB128(p, instructionsEnd)
+                                                  * cieInfo.dataAlignFactor;
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0(
+                "malformed DW_CFA_offset_extended DWARF unwind, reg too big");
+        return false;
+      }
+      results->savedRegisters[reg].location = kRegisterInCFA;
+      results->savedRegisters[reg].value = offset;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_offset_extended(reg=%" PRIu64 ", "
+                             "offset=%" PRId64 ")\n",
+                             reg, offset);
+      break;
+    case DW_CFA_restore_extended:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0(
+            "malformed DW_CFA_restore_extended DWARF unwind, reg too big");
+        return false;
+      }
+      results->savedRegisters[reg] = initialState.savedRegisters[reg];
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_restore_extended(reg=%" PRIu64 ")\n", reg);
+      break;
+    case DW_CFA_undefined:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0(
+                "malformed DW_CFA_undefined DWARF unwind, reg too big");
+        return false;
+      }
+      results->savedRegisters[reg].location = kRegisterUnused;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_undefined(reg=%" PRIu64 ")\n", reg);
+      break;
+    case DW_CFA_same_value:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0(
+                "malformed DW_CFA_same_value DWARF unwind, reg too big");
+        return false;
+      }
+      // <rdar://problem/8456377> DW_CFA_same_value unsupported
+      // "same value" means register was stored in frame, but its current
+      // value has not changed, so no need to restore from frame.
+      // We model this as if the register was never saved.
+      results->savedRegisters[reg].location = kRegisterUnused;
+      // set flag to disable conversion to compact unwind
+      results->sameValueUsed = true;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_same_value(reg=%" PRIu64 ")\n", reg);
+      break;
+    case DW_CFA_register:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      reg2 = addressSpace.getULEB128(p, instructionsEnd);
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0(
+                "malformed DW_CFA_register DWARF unwind, reg too big");
+        return false;
+      }
+      if (reg2 > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0(
+                "malformed DW_CFA_register DWARF unwind, reg2 too big");
+        return false;
+      }
+      results->savedRegisters[reg].location = kRegisterInRegister;
+      results->savedRegisters[reg].value = (int64_t)reg2;
+      // set flag to disable conversion to compact unwind
+      results->registersInOtherRegisters = true;
+      _LIBUNWIND_TRACE_DWARF(
+          "DW_CFA_register(reg=%" PRIu64 ", reg2=%" PRIu64 ")\n", reg, reg2);
+      break;
+#if !defined(_LIBUNWIND_NO_HEAP)
+    case DW_CFA_remember_state:
+      // avoid operator new, because that would be an upward dependency
+      entry = (PrologInfoStackEntry *)malloc(sizeof(PrologInfoStackEntry));
+      if (entry != NULL) {
+        entry->next = rememberStack;
+        entry->info = *results;
+        rememberStack = entry;
+      } else {
+        return false;
+      }
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_remember_state\n");
+      break;
+    case DW_CFA_restore_state:
+      if (rememberStack != NULL) {
+        PrologInfoStackEntry *top = rememberStack;
+        *results = top->info;
+        rememberStack = top->next;
+        free((char *)top);
+      } else {
+        return false;
+      }
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_restore_state\n");
+      break;
+#endif
+    case DW_CFA_def_cfa:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      offset = (int64_t)addressSpace.getULEB128(p, instructionsEnd);
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0("malformed DW_CFA_def_cfa DWARF unwind, reg too big");
+        return false;
+      }
+      results->cfaRegister = (uint32_t)reg;
+      results->cfaRegisterOffset = (int32_t)offset;
+      _LIBUNWIND_TRACE_DWARF(
+          "DW_CFA_def_cfa(reg=%" PRIu64 ", offset=%" PRIu64 ")\n", reg, offset);
+      break;
+    case DW_CFA_def_cfa_register:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0(
+            "malformed DW_CFA_def_cfa_register DWARF unwind, reg too big");
+        return false;
+      }
+      results->cfaRegister = (uint32_t)reg;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa_register(%" PRIu64 ")\n", reg);
+      break;
+    case DW_CFA_def_cfa_offset:
+      results->cfaRegisterOffset = (int32_t)
+                                  addressSpace.getULEB128(p, instructionsEnd);
+      results->codeOffsetAtStackDecrement = (uint32_t)codeOffset;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa_offset(%d)\n",
+                             results->cfaRegisterOffset);
+      break;
+    case DW_CFA_def_cfa_expression:
+      results->cfaRegister = 0;
+      results->cfaExpression = (int64_t)p;
+      length = addressSpace.getULEB128(p, instructionsEnd);
+      assert(length < static_cast<pint_t>(~0) && "pointer overflow");
+      p += static_cast<pint_t>(length);
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa_expression(expression=0x%" PRIx64
+                             ", length=%" PRIu64 ")\n",
+                             results->cfaExpression, length);
+      break;
+    case DW_CFA_expression:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0(
+                "malformed DW_CFA_expression DWARF unwind, reg too big");
+        return false;
+      }
+      results->savedRegisters[reg].location = kRegisterAtExpression;
+      results->savedRegisters[reg].value = (int64_t)p;
+      length = addressSpace.getULEB128(p, instructionsEnd);
+      assert(length < static_cast<pint_t>(~0) && "pointer overflow");
+      p += static_cast<pint_t>(length);
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_expression(reg=%" PRIu64 ", "
+                             "expression=0x%" PRIx64 ", "
+                             "length=%" PRIu64 ")\n",
+                             reg, results->savedRegisters[reg].value, length);
+      break;
+    case DW_CFA_offset_extended_sf:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0(
+            "malformed DW_CFA_offset_extended_sf DWARF unwind, reg too big");
+        return false;
+      }
+      offset =
+          addressSpace.getSLEB128(p, instructionsEnd) * cieInfo.dataAlignFactor;
+      results->savedRegisters[reg].location = kRegisterInCFA;
+      results->savedRegisters[reg].value = offset;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_offset_extended_sf(reg=%" PRIu64 ", "
+                             "offset=%" PRId64 ")\n",
+                             reg, offset);
+      break;
+    case DW_CFA_def_cfa_sf:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      offset =
+          addressSpace.getSLEB128(p, instructionsEnd) * cieInfo.dataAlignFactor;
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0(
+                "malformed DW_CFA_def_cfa_sf DWARF unwind, reg too big");
+        return false;
+      }
+      results->cfaRegister = (uint32_t)reg;
+      results->cfaRegisterOffset = (int32_t)offset;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa_sf(reg=%" PRIu64 ", "
+                             "offset=%" PRId64 ")\n",
+                             reg, offset);
+      break;
+    case DW_CFA_def_cfa_offset_sf:
+      results->cfaRegisterOffset = (int32_t)
+        (addressSpace.getSLEB128(p, instructionsEnd) * cieInfo.dataAlignFactor);
+      results->codeOffsetAtStackDecrement = (uint32_t)codeOffset;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_def_cfa_offset_sf(%d)\n",
+                             results->cfaRegisterOffset);
+      break;
+    case DW_CFA_val_offset:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG(
+                "malformed DW_CFA_val_offset DWARF unwind, reg (%" PRIu64
+                ") out of range\n",
+                reg);
+        return false;
+      }
+      offset = (int64_t)addressSpace.getULEB128(p, instructionsEnd)
+                                                    * cieInfo.dataAlignFactor;
+      results->savedRegisters[reg].location = kRegisterOffsetFromCFA;
+      results->savedRegisters[reg].value = offset;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_val_offset(reg=%" PRIu64 ", "
+                             "offset=%" PRId64 "\n",
+                             reg, offset);
+      break;
+    case DW_CFA_val_offset_sf:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0(
+                "malformed DW_CFA_val_offset_sf DWARF unwind, reg too big");
+        return false;
+      }
+      offset =
+          addressSpace.getSLEB128(p, instructionsEnd) * cieInfo.dataAlignFactor;
+      results->savedRegisters[reg].location = kRegisterOffsetFromCFA;
+      results->savedRegisters[reg].value = offset;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_val_offset_sf(reg=%" PRIu64 ", "
+                             "offset=%" PRId64 "\n",
+                             reg, offset);
+      break;
+    case DW_CFA_val_expression:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0(
+                "malformed DW_CFA_val_expression DWARF unwind, reg too big");
+        return false;
+      }
+      results->savedRegisters[reg].location = kRegisterIsExpression;
+      results->savedRegisters[reg].value = (int64_t)p;
+      length = addressSpace.getULEB128(p, instructionsEnd);
+      assert(length < static_cast<pint_t>(~0) && "pointer overflow");
+      p += static_cast<pint_t>(length);
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_val_expression(reg=%" PRIu64 ", "
+                             "expression=0x%" PRIx64 ", length=%" PRIu64 ")\n",
+                             reg, results->savedRegisters[reg].value, length);
+      break;
+    case DW_CFA_GNU_args_size:
+      length = addressSpace.getULEB128(p, instructionsEnd);
+      results->spExtraArgSize = (uint32_t)length;
+      _LIBUNWIND_TRACE_DWARF("DW_CFA_GNU_args_size(%" PRIu64 ")\n", length);
+      break;
+    case DW_CFA_GNU_negative_offset_extended:
+      reg = addressSpace.getULEB128(p, instructionsEnd);
+      if (reg > kMaxRegisterNumber) {
+        _LIBUNWIND_LOG0("malformed DW_CFA_GNU_negative_offset_extended DWARF "
+                        "unwind, reg too big");
+        return false;
+      }
+      offset = (int64_t)addressSpace.getULEB128(p, instructionsEnd)
+                                                    * cieInfo.dataAlignFactor;
+      results->savedRegisters[reg].location = kRegisterInCFA;
+      results->savedRegisters[reg].value = -offset;
+      _LIBUNWIND_TRACE_DWARF(
+          "DW_CFA_GNU_negative_offset_extended(%" PRId64 ")\n", offset);
+      break;
+
+#if defined(_LIBUNWIND_TARGET_AARCH64) || defined(_LIBUNWIND_TARGET_SPARC)
+    // The same constant is used to represent different instructions on
+    // AArch64 (negate_ra_state) and SPARC (window_save).
+    static_assert(DW_CFA_AARCH64_negate_ra_state == DW_CFA_GNU_window_save,
+                  "uses the same constant");
+    case DW_CFA_AARCH64_negate_ra_state:
+      switch (arch) {
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+      case REGISTERS_ARM64:
+        results->savedRegisters[UNW_ARM64_RA_SIGN_STATE].value ^= 0x1;
+        _LIBUNWIND_TRACE_DWARF("DW_CFA_AARCH64_negate_ra_state\n");
+        break;
+#endif
+#if defined(_LIBUNWIND_TARGET_SPARC)
+      // case DW_CFA_GNU_window_save:
+      case REGISTERS_SPARC:
+        _LIBUNWIND_TRACE_DWARF("DW_CFA_GNU_window_save()\n");
+        for (reg = UNW_SPARC_O0; reg <= UNW_SPARC_O7; reg++) {
+          results->savedRegisters[reg].location = kRegisterInRegister;
+          results->savedRegisters[reg].value =
+              ((int64_t)reg - UNW_SPARC_O0) + UNW_SPARC_I0;
+        }
+
+        for (reg = UNW_SPARC_L0; reg <= UNW_SPARC_I7; reg++) {
+          results->savedRegisters[reg].location = kRegisterInCFA;
+          results->savedRegisters[reg].value =
+              ((int64_t)reg - UNW_SPARC_L0) * 4;
+        }
+        break;
+#endif
+      }
+      break;
+#else
+      (void)arch;
+#endif
+
+    default:
+      operand = opcode & 0x3F;
+      switch (opcode & 0xC0) {
+      case DW_CFA_offset:
+        reg = operand;
+        if (reg > kMaxRegisterNumber) {
+          _LIBUNWIND_LOG("malformed DW_CFA_offset DWARF unwind, reg (%" PRIu64
+                         ") out of range",
+                  reg);
+          return false;
+        }
+        offset = (int64_t)addressSpace.getULEB128(p, instructionsEnd)
+                                                    * cieInfo.dataAlignFactor;
+        results->savedRegisters[reg].location = kRegisterInCFA;
+        results->savedRegisters[reg].value = offset;
+        _LIBUNWIND_TRACE_DWARF("DW_CFA_offset(reg=%d, offset=%" PRId64 ")\n",
+                               operand, offset);
+        break;
+      case DW_CFA_advance_loc:
+        codeOffset += operand * cieInfo.codeAlignFactor;
+        _LIBUNWIND_TRACE_DWARF("DW_CFA_advance_loc: new offset=%" PRIu64 "\n",
+                               static_cast<uint64_t>(codeOffset));
+        break;
+      case DW_CFA_restore:
+        reg = operand;
+        if (reg > kMaxRegisterNumber) {
+          _LIBUNWIND_LOG("malformed DW_CFA_restore DWARF unwind, reg (%" PRIu64
+                         ") out of range",
+                  reg);
+          return false;
+        }
+        results->savedRegisters[reg] = initialState.savedRegisters[reg];
+        _LIBUNWIND_TRACE_DWARF("DW_CFA_restore(reg=%" PRIu64 ")\n",
+                               static_cast<uint64_t>(operand));
+        break;
+      default:
+        _LIBUNWIND_TRACE_DWARF("unknown CFA opcode 0x%02X\n", opcode);
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+} // namespace libunwind
+
+#endif // __DWARF_PARSER_HPP__
diff --git a/src/coreclr/src/nativeaot/libunwind/src/EHHeaderParser.hpp b/src/coreclr/src/nativeaot/libunwind/src/EHHeaderParser.hpp
new file mode 100644
index 0000000000000..0101835b8e63d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/EHHeaderParser.hpp
@@ -0,0 +1,167 @@
+//===------------------------- EHHeaderParser.hpp -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//  Parses ELF .eh_frame_hdr sections.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __EHHEADERPARSER_HPP__
+#define __EHHEADERPARSER_HPP__
+
+#include "libunwind.h"
+
+#include "DwarfParser.hpp"
+
+namespace libunwind {
+
+/// \brief EHHeaderParser does basic parsing of an ELF .eh_frame_hdr section.
+///
+/// See DWARF spec for details:
+///    http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+///
+template <typename A> class EHHeaderParser {
+public:
+  typedef typename A::pint_t pint_t;
+
+  /// Information encoded in the EH frame header.
+  struct EHHeaderInfo {
+    pint_t eh_frame_ptr;
+    size_t fde_count;
+    pint_t table;
+    uint8_t table_enc;
+  };
+
+  static bool decodeEHHdr(A &addressSpace, pint_t ehHdrStart, pint_t ehHdrEnd,
+                          EHHeaderInfo &ehHdrInfo);
+  static bool findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart,
+                      uint32_t sectionLength,
+                      typename CFI_Parser<A>::FDE_Info *fdeInfo,
+                      typename CFI_Parser<A>::CIE_Info *cieInfo);
+
+private:
+  static bool decodeTableEntry(A &addressSpace, pint_t &tableEntry,
+                               pint_t ehHdrStart, pint_t ehHdrEnd,
+                               uint8_t tableEnc,
+                               typename CFI_Parser<A>::FDE_Info *fdeInfo,
+                               typename CFI_Parser<A>::CIE_Info *cieInfo);
+  static size_t getTableEntrySize(uint8_t tableEnc);
+};
+
+template <typename A>
+bool EHHeaderParser<A>::decodeEHHdr(A &addressSpace, pint_t ehHdrStart,
+                                    pint_t ehHdrEnd, EHHeaderInfo &ehHdrInfo) {
+  pint_t p = ehHdrStart;
+  uint8_t version = addressSpace.get8(p++);
+  if (version != 1) {
+    _LIBUNWIND_LOG0("Unsupported .eh_frame_hdr version");
+    return false;
+  }
+
+  uint8_t eh_frame_ptr_enc = addressSpace.get8(p++);
+  uint8_t fde_count_enc = addressSpace.get8(p++);
+  ehHdrInfo.table_enc = addressSpace.get8(p++);
+
+  ehHdrInfo.eh_frame_ptr =
+      addressSpace.getEncodedP(p, ehHdrEnd, eh_frame_ptr_enc, ehHdrStart);
+  ehHdrInfo.fde_count =
+      fde_count_enc == DW_EH_PE_omit
+          ? 0
+          : addressSpace.getEncodedP(p, ehHdrEnd, fde_count_enc, ehHdrStart);
+  ehHdrInfo.table = p;
+
+  return true;
+}
+
+template <typename A>
+bool EHHeaderParser<A>::decodeTableEntry(
+    A &addressSpace, pint_t &tableEntry, pint_t ehHdrStart, pint_t ehHdrEnd,
+    uint8_t tableEnc, typename CFI_Parser<A>::FDE_Info *fdeInfo,
+    typename CFI_Parser<A>::CIE_Info *cieInfo) {
+  // Have to decode the whole FDE for the PC range anyway, so just throw away
+  // the PC start.
+  addressSpace.getEncodedP(tableEntry, ehHdrEnd, tableEnc, ehHdrStart);
+  pint_t fde =
+      addressSpace.getEncodedP(tableEntry, ehHdrEnd, tableEnc, ehHdrStart);
+  const char *message =
+      CFI_Parser<A>::decodeFDE(addressSpace, fde, fdeInfo, cieInfo);
+  if (message != NULL) {
+    _LIBUNWIND_DEBUG_LOG("EHHeaderParser::decodeTableEntry: bad fde: %s",
+                         message);
+    return false;
+  }
+
+  return true;
+}
+
+template <typename A>
+bool EHHeaderParser<A>::findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart,
+                                uint32_t sectionLength,
+                                typename CFI_Parser<A>::FDE_Info *fdeInfo,
+                                typename CFI_Parser<A>::CIE_Info *cieInfo) {
+  pint_t ehHdrEnd = ehHdrStart + sectionLength;
+
+  EHHeaderParser<A>::EHHeaderInfo hdrInfo;
+  if (!EHHeaderParser<A>::decodeEHHdr(addressSpace, ehHdrStart, ehHdrEnd,
+                                      hdrInfo))
+    return false;
+
+  size_t tableEntrySize = getTableEntrySize(hdrInfo.table_enc);
+  pint_t tableEntry;
+
+  size_t low = 0;
+  for (size_t len = hdrInfo.fde_count; len > 1;) {
+    size_t mid = low + (len / 2);
+    tableEntry = hdrInfo.table + mid * tableEntrySize;
+    pint_t start = addressSpace.getEncodedP(tableEntry, ehHdrEnd,
+                                            hdrInfo.table_enc, ehHdrStart);
+
+    if (start == pc) {
+      low = mid;
+      break;
+    } else if (start < pc) {
+      low = mid;
+      len -= (len / 2);
+    } else {
+      len /= 2;
+    }
+  }
+
+  tableEntry = hdrInfo.table + low * tableEntrySize;
+  if (decodeTableEntry(addressSpace, tableEntry, ehHdrStart, ehHdrEnd,
+                       hdrInfo.table_enc, fdeInfo, cieInfo)) {
+    if (pc >= fdeInfo->pcStart && pc < fdeInfo->pcEnd)
+      return true;
+  }
+
+  return false;
+}
+
+template <typename A>
+size_t EHHeaderParser<A>::getTableEntrySize(uint8_t tableEnc) {
+  switch (tableEnc & 0x0f) {
+  case DW_EH_PE_sdata2:
+  case DW_EH_PE_udata2:
+    return 4;
+  case DW_EH_PE_sdata4:
+  case DW_EH_PE_udata4:
+    return 8;
+  case DW_EH_PE_sdata8:
+  case DW_EH_PE_udata8:
+    return 16;
+  case DW_EH_PE_sleb128:
+  case DW_EH_PE_uleb128:
+    _LIBUNWIND_ABORT("Can't binary search on variable length encoded data.");
+  case DW_EH_PE_omit:
+    return 0;
+  default:
+    _LIBUNWIND_ABORT("Unknown DWARF encoding for search table.");
+  }
+}
+
+}
+
+#endif
diff --git a/src/coreclr/src/nativeaot/libunwind/src/RWMutex.hpp b/src/coreclr/src/nativeaot/libunwind/src/RWMutex.hpp
new file mode 100644
index 0000000000000..a37ac77144f38
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/RWMutex.hpp
@@ -0,0 +1,114 @@
+//===----------------------------- Registers.hpp --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// Abstract interface to shared reader/writer log, hiding platform and
+// configuration differences.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __RWMUTEX_HPP__
+#define __RWMUTEX_HPP__
+
+#if defined(_WIN32)
+#include <windows.h>
+#elif !defined(_LIBUNWIND_HAS_NO_THREADS)
+#include <pthread.h>
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA)
+#pragma comment(lib, "pthread")
+#endif
+#endif
+
+namespace libunwind {
+
+#if defined(_LIBUNWIND_HAS_NO_THREADS)
+
+class _LIBUNWIND_HIDDEN RWMutex {
+public:
+  bool lock_shared() { return true; }
+  bool unlock_shared() { return true; }
+  bool lock() { return true; }
+  bool unlock() { return true; }
+};
+
+#elif defined(_WIN32)
+
+class _LIBUNWIND_HIDDEN RWMutex {
+public:
+  bool lock_shared() {
+    AcquireSRWLockShared(&_lock);
+    return true;
+  }
+  bool unlock_shared() {
+    ReleaseSRWLockShared(&_lock);
+    return true;
+  }
+  bool lock() {
+    AcquireSRWLockExclusive(&_lock);
+    return true;
+  }
+  bool unlock() {
+    ReleaseSRWLockExclusive(&_lock);
+    return true;
+  }
+
+private:
+  SRWLOCK _lock = SRWLOCK_INIT;
+};
+
+#elif !defined(LIBUNWIND_USE_WEAK_PTHREAD)
+
+class _LIBUNWIND_HIDDEN RWMutex {
+public:
+  bool lock_shared() { return pthread_rwlock_rdlock(&_lock) == 0;  }
+  bool unlock_shared() { return pthread_rwlock_unlock(&_lock) == 0; }
+  bool lock() { return pthread_rwlock_wrlock(&_lock) == 0; }
+  bool unlock() { return pthread_rwlock_unlock(&_lock) == 0; }
+
+private:
+  pthread_rwlock_t _lock = PTHREAD_RWLOCK_INITIALIZER;
+};
+
+#else
+
+extern "C" int __attribute__((weak))
+pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+               void *(*start_routine)(void *), void *arg);
+extern "C" int __attribute__((weak))
+pthread_rwlock_rdlock(pthread_rwlock_t *lock);
+extern "C" int __attribute__((weak))
+pthread_rwlock_wrlock(pthread_rwlock_t *lock);
+extern "C" int __attribute__((weak))
+pthread_rwlock_unlock(pthread_rwlock_t *lock);
+
+// Calls to the locking functions are gated on pthread_create, and not the
+// functions themselves, because the data structure should only be locked if
+// another thread has been created. This is what similar libraries do.
+
+class _LIBUNWIND_HIDDEN RWMutex {
+public:
+  bool lock_shared() {
+    return !pthread_create || (pthread_rwlock_rdlock(&_lock) == 0);
+  }
+  bool unlock_shared() {
+    return !pthread_create || (pthread_rwlock_unlock(&_lock) == 0);
+  }
+  bool lock() {
+    return !pthread_create || (pthread_rwlock_wrlock(&_lock) == 0);
+  }
+  bool unlock() {
+    return !pthread_create || (pthread_rwlock_unlock(&_lock) == 0);
+  }
+
+private:
+  pthread_rwlock_t _lock = PTHREAD_RWLOCK_INITIALIZER;
+};
+
+#endif
+
+} // namespace libunwind
+
+#endif // __RWMUTEX_HPP__
diff --git a/src/coreclr/src/nativeaot/libunwind/src/Registers.hpp b/src/coreclr/src/nativeaot/libunwind/src/Registers.hpp
new file mode 100644
index 0000000000000..9f82d5c6766f5
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/Registers.hpp
@@ -0,0 +1,3718 @@
+//===----------------------------- Registers.hpp --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//  Models register sets for supported processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __REGISTERS_HPP__
+#define __REGISTERS_HPP__
+
+#include <stdint.h>
+#include <string.h>
+
+#include "libunwind.h"
+#include "config.h"
+
+namespace libunwind {
+
+// For emulating 128-bit registers
+struct v128 { uint32_t vec[4]; };
+
+enum {
+  REGISTERS_X86,
+  REGISTERS_X86_64,
+  REGISTERS_PPC,
+  REGISTERS_PPC64,
+  REGISTERS_ARM64,
+  REGISTERS_ARM,
+  REGISTERS_OR1K,
+  REGISTERS_MIPS_O32,
+  REGISTERS_MIPS_NEWABI,
+  REGISTERS_SPARC,
+};
+
+#if defined(_LIBUNWIND_TARGET_I386)
+/// Registers_x86 holds the register state of a thread in a 32-bit intel
+/// process.
+class _LIBUNWIND_HIDDEN Registers_x86 {
+public:
+  Registers_x86();
+  Registers_x86(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint32_t    getRegister(int num) const;
+  void        setRegister(int num, uint32_t value, uint32_t location);
+  uint32_t    getRegisterLocation(int num) const;
+  bool        validFloatRegister(int) const { return false; }
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int) const { return false; }
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
+  static const char *getRegisterName(int num);
+  void        jumpto();
+  static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86; }
+  static int  getArch() { return REGISTERS_X86; }
+
+  uint32_t  getSP() const          { return _registers.__esp; }
+  void      setSP(uint32_t value, uint32_t location)  { _registers.__esp = value; _registerLocations.__esp = location; }
+  uint32_t  getIP() const          { return _registers.__eip; }
+  void      setIP(uint32_t value, uint32_t location)  { _registers.__eip = value; _registerLocations.__eip = location; }
+  uint32_t  getEBP() const         { return _registers.__ebp; }
+  void      setEBP(uint32_t value, uint32_t location) { _registers.__ebp = value; _registerLocations.__ebp = location; }
+  uint32_t  getEBX() const         { return _registers.__ebx; }
+  void      setEBX(uint32_t value, uint32_t location) { _registers.__ebx = value; _registerLocations.__ebx = location; }
+  uint32_t  getECX() const         { return _registers.__ecx; }
+  void      setECX(uint32_t value, uint32_t location) { _registers.__ecx = value; _registerLocations.__ecx = location; }
+  uint32_t  getEDX() const         { return _registers.__edx; }
+  void      setEDX(uint32_t value, uint32_t location) { _registers.__edx = value; _registerLocations.__edx = location; }
+  uint32_t  getESI() const         { return _registers.__esi; }
+  void      setESI(uint32_t value, uint32_t location) { _registers.__esi = value; _registerLocations.__esi = location; }
+  uint32_t  getEDI() const         { return _registers.__edi; }
+  void      setEDI(uint32_t value, uint32_t location) { _registers.__edi = value; _registerLocations.__edi = location; }
+
+private:
+  struct GPRs {
+    unsigned int __eax;
+    unsigned int __ebx;
+    unsigned int __ecx;
+    unsigned int __edx;
+    unsigned int __edi;
+    unsigned int __esi;
+    unsigned int __ebp;
+    unsigned int __esp;
+    unsigned int __ss;
+    unsigned int __eflags;
+    unsigned int __eip;
+    unsigned int __cs;
+    unsigned int __ds;
+    unsigned int __es;
+    unsigned int __fs;
+    unsigned int __gs;
+  };
+  struct GPRLocations {
+    unsigned int __eax;
+    unsigned int __ebx;
+    unsigned int __ecx;
+    unsigned int __edx;
+    unsigned int __edi;
+    unsigned int __esi;
+    unsigned int __ebp;
+    unsigned int __esp;
+    unsigned int __eip;
+  };
+
+  GPRs _registers;
+  GPRLocations _registerLocations;
+};
+
+inline Registers_x86::Registers_x86(const void *registers) {
+  static_assert((check_fit<Registers_x86, unw_context_t>::does_fit),
+                "x86 registers do not fit into unw_context_t");
+  memcpy(&_registers, registers, sizeof(_registers));
+  memset(&_registerLocations, 0, sizeof(_registerLocations));
+}
+
+inline Registers_x86::Registers_x86() {
+  memset(&_registers, 0, sizeof(_registers));
+  memset(&_registerLocations, 0, sizeof(_registerLocations));
+}
+
+inline bool Registers_x86::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum < 0)
+    return false;
+  if (regNum > 7)
+    return false;
+  return true;
+}
+
+inline uint32_t Registers_x86::getRegister(int regNum) const {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return _registers.__eip;
+  case UNW_REG_SP:
+    return _registers.__esp;
+  case UNW_X86_EAX:
+    return _registers.__eax;
+  case UNW_X86_ECX:
+    return _registers.__ecx;
+  case UNW_X86_EDX:
+    return _registers.__edx;
+  case UNW_X86_EBX:
+    return _registers.__ebx;
+#if !defined(__APPLE__)
+  case UNW_X86_ESP:
+#else
+  case UNW_X86_EBP:
+#endif
+    return _registers.__ebp;
+#if !defined(__APPLE__)
+  case UNW_X86_EBP:
+#else
+  case UNW_X86_ESP:
+#endif
+    return _registers.__esp;
+  case UNW_X86_ESI:
+    return _registers.__esi;
+  case UNW_X86_EDI:
+    return _registers.__edi;
+  }
+  _LIBUNWIND_ABORT("unsupported x86 register");
+}
+
+inline void Registers_x86::setRegister(int regNum, uint32_t value, uint32_t location) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    _registers.__eip = value;
+    _registerLocations.__eip = location;
+    return;
+  case UNW_REG_SP:
+    _registers.__esp = value;
+    _registerLocations.__esp = location;
+    return;
+  case UNW_X86_EAX:
+    _registers.__eax = value;
+    _registerLocations.__eax = location;
+    return;
+  case UNW_X86_ECX:
+    _registers.__ecx = value;
+    _registerLocations.__ecx = location;
+    return;
+  case UNW_X86_EDX:
+    _registers.__edx = value;
+    _registerLocations.__edx = location;
+    return;
+  case UNW_X86_EBX:
+    _registers.__ebx = value;
+    _registerLocations.__ebx = location;
+    return;
+#if !defined(__APPLE__)
+  case UNW_X86_ESP:
+#else
+  case UNW_X86_EBP:
+#endif
+    _registers.__ebp = value;
+    _registerLocations.__ebp = location;
+    return;
+#if !defined(__APPLE__)
+  case UNW_X86_EBP:
+#else
+  case UNW_X86_ESP:
+#endif
+    _registers.__esp = value;
+    _registerLocations.__esp = location;
+    return;
+  case UNW_X86_ESI:
+    _registers.__esi = value;
+    _registerLocations.__esi = location;
+    return;
+  case UNW_X86_EDI:
+    _registers.__edi = value;
+    _registerLocations.__edi = location;
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported x86 register");
+}
+
+inline uint32_t Registers_x86::getRegisterLocation(int regNum) const {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return _registerLocations.__eip;
+  case UNW_REG_SP:
+    return _registerLocations.__esp;
+  case UNW_X86_EAX:
+    return _registerLocations.__eax;
+  case UNW_X86_ECX:
+    return _registerLocations.__ecx;
+  case UNW_X86_EDX:
+    return _registerLocations.__edx;
+  case UNW_X86_EBX:
+    return _registerLocations.__ebx;
+  case UNW_X86_EBP:
+    return _registerLocations.__ebp;
+  case UNW_X86_ESP:
+    return _registerLocations.__esp;
+  case UNW_X86_ESI:
+    return _registerLocations.__esi;
+  case UNW_X86_EDI:
+    return _registerLocations.__edi;
+  }
+  _LIBUNWIND_ABORT("unsupported x86 register");
+}
+
+inline const char *Registers_x86::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return "ip";
+  case UNW_REG_SP:
+    return "esp";
+  case UNW_X86_EAX:
+    return "eax";
+  case UNW_X86_ECX:
+    return "ecx";
+  case UNW_X86_EDX:
+    return "edx";
+  case UNW_X86_EBX:
+    return "ebx";
+  case UNW_X86_EBP:
+    return "ebp";
+  case UNW_X86_ESP:
+    return "esp";
+  case UNW_X86_ESI:
+    return "esi";
+  case UNW_X86_EDI:
+    return "edi";
+  default:
+    return "unknown register";
+  }
+}
+
+inline double Registers_x86::getFloatRegister(int) const {
+  _LIBUNWIND_ABORT("no x86 float registers");
+}
+
+inline void Registers_x86::setFloatRegister(int, double) {
+  _LIBUNWIND_ABORT("no x86 float registers");
+}
+
+inline v128 Registers_x86::getVectorRegister(int) const {
+  _LIBUNWIND_ABORT("no x86 vector registers");
+}
+
+inline void Registers_x86::setVectorRegister(int, v128) {
+  _LIBUNWIND_ABORT("no x86 vector registers");
+}
+#endif // _LIBUNWIND_TARGET_I386
+
+
+#if defined(_LIBUNWIND_TARGET_X86_64)
+/// Registers_x86_64  holds the register state of a thread in a 64-bit intel
+/// process.
+class _LIBUNWIND_HIDDEN Registers_x86_64 {
+public:
+  Registers_x86_64();
+  Registers_x86_64(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint64_t    getRegister(int num) const;
+  void        setRegister(int num, uint64_t value, uint64_t location);
+  uint64_t    getRegisterLocation(int num) const;
+  bool        validFloatRegister(int) const { return false; }
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
+  static const char *getRegisterName(int num);
+  void        jumpto();
+  static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_64; }
+  static int  getArch() { return REGISTERS_X86_64; }
+
+  uint64_t  getSP() const          { return _registers.__rsp; }
+  void      setSP(uint64_t value, uint64_t location)  { _registers.__rsp = value; _registerLocations.__rsp = location;}
+  uint64_t  getIP() const          { return _registers.__rip; }
+  void      setIP(uint64_t value, uint64_t location)  { _registers.__rip = value; _registerLocations.__rip = location; }
+  uint64_t  getRBP() const         { return _registers.__rbp; }
+  void      setRBP(uint64_t value, uint64_t location) { _registers.__rbp = value; _registerLocations.__rbp = location; }
+  uint64_t  getRBX() const         { return _registers.__rbx; }
+  void      setRBX(uint64_t value, uint64_t location) { _registers.__rbx = value; _registerLocations.__rbx = location; }
+  uint64_t  getR12() const         { return _registers.__r12; }
+  void      setR12(uint64_t value, uint64_t location) { _registers.__r12 = value; _registerLocations.__r12 = location; }
+  uint64_t  getR13() const         { return _registers.__r13; }
+  void      setR13(uint64_t value, uint64_t location) { _registers.__r13 = value; _registerLocations.__r13 = location; }
+  uint64_t  getR14() const         { return _registers.__r14; }
+  void      setR14(uint64_t value, uint64_t location) { _registers.__r14 = value; _registerLocations.__r14 = location; }
+  uint64_t  getR15() const         { return _registers.__r15; }
+  void      setR15(uint64_t value, uint64_t location) { _registers.__r15 = value; _registerLocations.__r15 = location; }
+
+private:
+  struct GPRs {
+    uint64_t __rax;
+    uint64_t __rbx;
+    uint64_t __rcx;
+    uint64_t __rdx;
+    uint64_t __rdi;
+    uint64_t __rsi;
+    uint64_t __rbp;
+    uint64_t __rsp;
+    uint64_t __r8;
+    uint64_t __r9;
+    uint64_t __r10;
+    uint64_t __r11;
+    uint64_t __r12;
+    uint64_t __r13;
+    uint64_t __r14;
+    uint64_t __r15;
+    uint64_t __rip;
+    uint64_t __rflags;
+    uint64_t __cs;
+    uint64_t __fs;
+    uint64_t __gs;
+#if defined(_WIN64)
+    uint64_t __padding; // 16-byte align
+#endif
+  };
+  struct GPRLocations {
+    uint64_t __rax;
+    uint64_t __rbx;
+    uint64_t __rcx;
+    uint64_t __rdx;
+    uint64_t __rdi;
+    uint64_t __rsi;
+    uint64_t __rbp;
+    uint64_t __rsp;
+    uint64_t __r8;
+    uint64_t __r9;
+    uint64_t __r10;
+    uint64_t __r11;
+    uint64_t __r12;
+    uint64_t __r13;
+    uint64_t __r14;
+    uint64_t __r15;
+    uint64_t __rip;
+  };
+  GPRs _registers;
+  GPRLocations _registerLocations;
+#if defined(_WIN64)
+  v128 _xmm[16];
+#endif
+};
+
+inline Registers_x86_64::Registers_x86_64(const void *registers) {
+  static_assert((check_fit<Registers_x86_64, unw_context_t>::does_fit),
+                "x86_64 registers do not fit into unw_context_t");
+  memcpy(&_registers, registers, sizeof(_registers));
+  memset(&_registerLocations, 0, sizeof(_registerLocations));
+}
+
+inline Registers_x86_64::Registers_x86_64() {
+  memset(&_registers, 0, sizeof(_registers));
+  memset(&_registerLocations, 0, sizeof(_registerLocations));
+}
+
+inline bool Registers_x86_64::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum < 0)
+    return false;
+  if (regNum > 15)
+    return false;
+  return true;
+}
+
+inline uint64_t Registers_x86_64::getRegister(int regNum) const {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return _registers.__rip;
+  case UNW_REG_SP:
+    return _registers.__rsp;
+  case UNW_X86_64_RAX:
+    return _registers.__rax;
+  case UNW_X86_64_RDX:
+    return _registers.__rdx;
+  case UNW_X86_64_RCX:
+    return _registers.__rcx;
+  case UNW_X86_64_RBX:
+    return _registers.__rbx;
+  case UNW_X86_64_RSI:
+    return _registers.__rsi;
+  case UNW_X86_64_RDI:
+    return _registers.__rdi;
+  case UNW_X86_64_RBP:
+    return _registers.__rbp;
+  case UNW_X86_64_RSP:
+    return _registers.__rsp;
+  case UNW_X86_64_R8:
+    return _registers.__r8;
+  case UNW_X86_64_R9:
+    return _registers.__r9;
+  case UNW_X86_64_R10:
+    return _registers.__r10;
+  case UNW_X86_64_R11:
+    return _registers.__r11;
+  case UNW_X86_64_R12:
+    return _registers.__r12;
+  case UNW_X86_64_R13:
+    return _registers.__r13;
+  case UNW_X86_64_R14:
+    return _registers.__r14;
+  case UNW_X86_64_R15:
+    return _registers.__r15;
+  }
+  _LIBUNWIND_ABORT("unsupported x86_64 register");
+}
+
+inline uint64_t Registers_x86_64::getRegisterLocation(int regNum) const {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return _registerLocations.__rip;
+  case UNW_REG_SP:
+    return _registerLocations.__rsp;
+  case UNW_X86_64_RAX:
+    return _registerLocations.__rax;
+  case UNW_X86_64_RDX:
+    return _registerLocations.__rdx;
+  case UNW_X86_64_RCX:
+    return _registerLocations.__rcx;
+  case UNW_X86_64_RBX:
+    return _registerLocations.__rbx;
+  case UNW_X86_64_RSI:
+    return _registerLocations.__rsi;
+  case UNW_X86_64_RDI:
+    return _registerLocations.__rdi;
+  case UNW_X86_64_RBP:
+    return _registerLocations.__rbp;
+  case UNW_X86_64_RSP:
+    return _registerLocations.__rsp;
+  case UNW_X86_64_R8:
+    return _registerLocations.__r8;
+  case UNW_X86_64_R9:
+    return _registerLocations.__r9;
+  case UNW_X86_64_R10:
+    return _registerLocations.__r10;
+  case UNW_X86_64_R11:
+    return _registerLocations.__r11;
+  case UNW_X86_64_R12:
+    return _registerLocations.__r12;
+  case UNW_X86_64_R13:
+    return _registerLocations.__r13;
+  case UNW_X86_64_R14:
+    return _registerLocations.__r14;
+  case UNW_X86_64_R15:
+    return _registerLocations.__r15;
+  }
+  _LIBUNWIND_ABORT("unsupported x86_64 register");
+}
+
+inline void Registers_x86_64::setRegister(int regNum, uint64_t value, uint64_t location) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    _registers.__rip = value;
+    _registerLocations.__rip = location;
+    return;
+  case UNW_REG_SP:
+    _registers.__rsp = value;
+    _registerLocations.__rsp = location;
+    return;
+  case UNW_X86_64_RAX:
+    _registers.__rax = value;
+    _registerLocations.__rax = location;
+    return;
+  case UNW_X86_64_RDX:
+    _registers.__rdx = value;
+    _registerLocations.__rdx = location;
+    return;
+  case UNW_X86_64_RCX:
+    _registers.__rcx = value;
+    _registerLocations.__rcx = location;
+    return;
+  case UNW_X86_64_RBX:
+    _registers.__rbx = value;
+    _registerLocations.__rbx = location;
+    return;
+  case UNW_X86_64_RSI:
+    _registers.__rsi = value;
+    _registerLocations.__rsi = location;
+    return;
+  case UNW_X86_64_RDI:
+    _registers.__rdi = value;
+    _registerLocations.__rdi = location;
+    return;
+  case UNW_X86_64_RBP:
+    _registers.__rbp = value;
+    _registerLocations.__rbp = location;
+    return;
+  case UNW_X86_64_RSP:
+    _registers.__rsp = value;
+    _registerLocations.__rsp = location;
+    return;
+  case UNW_X86_64_R8:
+    _registers.__r8 = value;
+    _registerLocations.__r8 = location;
+    return;
+  case UNW_X86_64_R9:
+    _registers.__r9 = value;
+    _registerLocations.__r9 = location;
+    return;
+  case UNW_X86_64_R10:
+    _registers.__r10 = value;
+    _registerLocations.__r10 = location;
+    return;
+  case UNW_X86_64_R11:
+    _registers.__r11 = value;
+    _registerLocations.__r11 = location;
+    return;
+  case UNW_X86_64_R12:
+    _registers.__r12 = value;
+    _registerLocations.__r12 = location;
+    return;
+  case UNW_X86_64_R13:
+    _registers.__r13 = value;
+    _registerLocations.__r13 = location;
+    return;
+  case UNW_X86_64_R14:
+    _registers.__r14 = value;
+    _registerLocations.__r14 = location;
+    return;
+  case UNW_X86_64_R15:
+    _registers.__r15 = value;
+    _registerLocations.__r15 = location;
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported x86_64 register");
+}
+
+inline const char *Registers_x86_64::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return "rip";
+  case UNW_REG_SP:
+    return "rsp";
+  case UNW_X86_64_RAX:
+    return "rax";
+  case UNW_X86_64_RDX:
+    return "rdx";
+  case UNW_X86_64_RCX:
+    return "rcx";
+  case UNW_X86_64_RBX:
+    return "rbx";
+  case UNW_X86_64_RSI:
+    return "rsi";
+  case UNW_X86_64_RDI:
+    return "rdi";
+  case UNW_X86_64_RBP:
+    return "rbp";
+  case UNW_X86_64_RSP:
+    return "rsp";
+  case UNW_X86_64_R8:
+    return "r8";
+  case UNW_X86_64_R9:
+    return "r9";
+  case UNW_X86_64_R10:
+    return "r10";
+  case UNW_X86_64_R11:
+    return "r11";
+  case UNW_X86_64_R12:
+    return "r12";
+  case UNW_X86_64_R13:
+    return "r13";
+  case UNW_X86_64_R14:
+    return "r14";
+  case UNW_X86_64_R15:
+    return "r15";
+  case UNW_X86_64_XMM0:
+    return "xmm0";
+  case UNW_X86_64_XMM1:
+    return "xmm1";
+  case UNW_X86_64_XMM2:
+    return "xmm2";
+  case UNW_X86_64_XMM3:
+    return "xmm3";
+  case UNW_X86_64_XMM4:
+    return "xmm4";
+  case UNW_X86_64_XMM5:
+    return "xmm5";
+  case UNW_X86_64_XMM6:
+    return "xmm6";
+  case UNW_X86_64_XMM7:
+    return "xmm7";
+  case UNW_X86_64_XMM8:
+    return "xmm8";
+  case UNW_X86_64_XMM9:
+    return "xmm9";
+  case UNW_X86_64_XMM10:
+    return "xmm10";
+  case UNW_X86_64_XMM11:
+    return "xmm11";
+  case UNW_X86_64_XMM12:
+    return "xmm12";
+  case UNW_X86_64_XMM13:
+    return "xmm13";
+  case UNW_X86_64_XMM14:
+    return "xmm14";
+  case UNW_X86_64_XMM15:
+    return "xmm15";
+  default:
+    return "unknown register";
+  }
+}
+
+inline double Registers_x86_64::getFloatRegister(int) const {
+  _LIBUNWIND_ABORT("no x86_64 float registers");
+}
+
+inline void Registers_x86_64::setFloatRegister(int, double) {
+  _LIBUNWIND_ABORT("no x86_64 float registers");
+}
+
+inline bool Registers_x86_64::validVectorRegister(int regNum) const {
+#if defined(_WIN64)
+  if (regNum < UNW_X86_64_XMM0)
+    return false;
+  if (regNum > UNW_X86_64_XMM15)
+    return false;
+  return true;
+#else
+  (void)regNum; // suppress unused parameter warning
+  return false;
+#endif
+}
+
+inline v128 Registers_x86_64::getVectorRegister(int regNum) const {
+#if defined(_WIN64)
+  assert(validVectorRegister(regNum));
+  return _xmm[regNum - UNW_X86_64_XMM0];
+#else
+  (void)regNum; // suppress unused parameter warning
+  _LIBUNWIND_ABORT("no x86_64 vector registers");
+#endif
+}
+
+inline void Registers_x86_64::setVectorRegister(int regNum, v128 value) {
+#if defined(_WIN64)
+  assert(validVectorRegister(regNum));
+  _xmm[regNum - UNW_X86_64_XMM0] = value;
+#else
+  (void)regNum; (void)value; // suppress unused parameter warnings
+  _LIBUNWIND_ABORT("no x86_64 vector registers");
+#endif
+}
+#endif // _LIBUNWIND_TARGET_X86_64
+
+
+#if defined(_LIBUNWIND_TARGET_PPC)
+/// Registers_ppc holds the register state of a thread in a 32-bit PowerPC
+/// process.
+class _LIBUNWIND_HIDDEN Registers_ppc {
+public:
+  Registers_ppc();
+  Registers_ppc(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint32_t    getRegister(int num) const;
+  void        setRegister(int num, uint32_t value);
+  bool        validFloatRegister(int num) const;
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
+  static const char *getRegisterName(int num);
+  void        jumpto();
+  static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC; }
+  static int  getArch() { return REGISTERS_PPC; }
+
+  uint64_t  getSP() const         { return _registers.__r1; }
+  void      setSP(uint32_t value) { _registers.__r1 = value; }
+  uint64_t  getIP() const         { return _registers.__srr0; }
+  void      setIP(uint32_t value) { _registers.__srr0 = value; }
+
+private:
+  struct ppc_thread_state_t {
+    unsigned int __srr0; /* Instruction address register (PC) */
+    unsigned int __srr1; /* Machine state register (supervisor) */
+    unsigned int __r0;
+    unsigned int __r1;
+    unsigned int __r2;
+    unsigned int __r3;
+    unsigned int __r4;
+    unsigned int __r5;
+    unsigned int __r6;
+    unsigned int __r7;
+    unsigned int __r8;
+    unsigned int __r9;
+    unsigned int __r10;
+    unsigned int __r11;
+    unsigned int __r12;
+    unsigned int __r13;
+    unsigned int __r14;
+    unsigned int __r15;
+    unsigned int __r16;
+    unsigned int __r17;
+    unsigned int __r18;
+    unsigned int __r19;
+    unsigned int __r20;
+    unsigned int __r21;
+    unsigned int __r22;
+    unsigned int __r23;
+    unsigned int __r24;
+    unsigned int __r25;
+    unsigned int __r26;
+    unsigned int __r27;
+    unsigned int __r28;
+    unsigned int __r29;
+    unsigned int __r30;
+    unsigned int __r31;
+    unsigned int __cr;     /* Condition register */
+    unsigned int __xer;    /* User's integer exception register */
+    unsigned int __lr;     /* Link register */
+    unsigned int __ctr;    /* Count register */
+    unsigned int __mq;     /* MQ register (601 only) */
+    unsigned int __vrsave; /* Vector Save Register */
+  };
+
+  struct ppc_float_state_t {
+    double __fpregs[32];
+
+    unsigned int __fpscr_pad; /* fpscr is 64 bits, 32 bits of rubbish */
+    unsigned int __fpscr;     /* floating point status register */
+  };
+
+  ppc_thread_state_t _registers;
+  ppc_float_state_t  _floatRegisters;
+  v128               _vectorRegisters[32]; // offset 424
+};
+
+inline Registers_ppc::Registers_ppc(const void *registers) {
+  static_assert((check_fit<Registers_ppc, unw_context_t>::does_fit),
+                "ppc registers do not fit into unw_context_t");
+  memcpy(&_registers, static_cast<const uint8_t *>(registers),
+         sizeof(_registers));
+  static_assert(sizeof(ppc_thread_state_t) == 160,
+                "expected float register offset to be 160");
+  memcpy(&_floatRegisters,
+         static_cast<const uint8_t *>(registers) + sizeof(ppc_thread_state_t),
+         sizeof(_floatRegisters));
+  static_assert(sizeof(ppc_thread_state_t) + sizeof(ppc_float_state_t) == 424,
+                "expected vector register offset to be 424 bytes");
+  memcpy(_vectorRegisters,
+         static_cast<const uint8_t *>(registers) + sizeof(ppc_thread_state_t) +
+             sizeof(ppc_float_state_t),
+         sizeof(_vectorRegisters));
+}
+
+inline Registers_ppc::Registers_ppc() {
+  memset(&_registers, 0, sizeof(_registers));
+  memset(&_floatRegisters, 0, sizeof(_floatRegisters));
+  memset(&_vectorRegisters, 0, sizeof(_vectorRegisters));
+}
+
+inline bool Registers_ppc::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum == UNW_PPC_VRSAVE)
+    return true;
+  if (regNum < 0)
+    return false;
+  if (regNum <= UNW_PPC_R31)
+    return true;
+  if (regNum == UNW_PPC_MQ)
+    return true;
+  if (regNum == UNW_PPC_LR)
+    return true;
+  if (regNum == UNW_PPC_CTR)
+    return true;
+  if ((UNW_PPC_CR0 <= regNum) && (regNum <= UNW_PPC_CR7))
+    return true;
+  return false;
+}
+
+inline uint32_t Registers_ppc::getRegister(int regNum) const {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return _registers.__srr0;
+  case UNW_REG_SP:
+    return _registers.__r1;
+  case UNW_PPC_R0:
+    return _registers.__r0;
+  case UNW_PPC_R1:
+    return _registers.__r1;
+  case UNW_PPC_R2:
+    return _registers.__r2;
+  case UNW_PPC_R3:
+    return _registers.__r3;
+  case UNW_PPC_R4:
+    return _registers.__r4;
+  case UNW_PPC_R5:
+    return _registers.__r5;
+  case UNW_PPC_R6:
+    return _registers.__r6;
+  case UNW_PPC_R7:
+    return _registers.__r7;
+  case UNW_PPC_R8:
+    return _registers.__r8;
+  case UNW_PPC_R9:
+    return _registers.__r9;
+  case UNW_PPC_R10:
+    return _registers.__r10;
+  case UNW_PPC_R11:
+    return _registers.__r11;
+  case UNW_PPC_R12:
+    return _registers.__r12;
+  case UNW_PPC_R13:
+    return _registers.__r13;
+  case UNW_PPC_R14:
+    return _registers.__r14;
+  case UNW_PPC_R15:
+    return _registers.__r15;
+  case UNW_PPC_R16:
+    return _registers.__r16;
+  case UNW_PPC_R17:
+    return _registers.__r17;
+  case UNW_PPC_R18:
+    return _registers.__r18;
+  case UNW_PPC_R19:
+    return _registers.__r19;
+  case UNW_PPC_R20:
+    return _registers.__r20;
+  case UNW_PPC_R21:
+    return _registers.__r21;
+  case UNW_PPC_R22:
+    return _registers.__r22;
+  case UNW_PPC_R23:
+    return _registers.__r23;
+  case UNW_PPC_R24:
+    return _registers.__r24;
+  case UNW_PPC_R25:
+    return _registers.__r25;
+  case UNW_PPC_R26:
+    return _registers.__r26;
+  case UNW_PPC_R27:
+    return _registers.__r27;
+  case UNW_PPC_R28:
+    return _registers.__r28;
+  case UNW_PPC_R29:
+    return _registers.__r29;
+  case UNW_PPC_R30:
+    return _registers.__r30;
+  case UNW_PPC_R31:
+    return _registers.__r31;
+  case UNW_PPC_LR:
+    return _registers.__lr;
+  case UNW_PPC_CR0:
+    return (_registers.__cr & 0xF0000000);
+  case UNW_PPC_CR1:
+    return (_registers.__cr & 0x0F000000);
+  case UNW_PPC_CR2:
+    return (_registers.__cr & 0x00F00000);
+  case UNW_PPC_CR3:
+    return (_registers.__cr & 0x000F0000);
+  case UNW_PPC_CR4:
+    return (_registers.__cr & 0x0000F000);
+  case UNW_PPC_CR5:
+    return (_registers.__cr & 0x00000F00);
+  case UNW_PPC_CR6:
+    return (_registers.__cr & 0x000000F0);
+  case UNW_PPC_CR7:
+    return (_registers.__cr & 0x0000000F);
+  case UNW_PPC_VRSAVE:
+    return _registers.__vrsave;
+  }
+  _LIBUNWIND_ABORT("unsupported ppc register");
+}
+
+inline void Registers_ppc::setRegister(int regNum, uint32_t value) {
+  //fprintf(stderr, "Registers_ppc::setRegister(%d, 0x%08X)\n", regNum, value);
+  switch (regNum) {
+  case UNW_REG_IP:
+    _registers.__srr0 = value;
+    return;
+  case UNW_REG_SP:
+    _registers.__r1 = value;
+    return;
+  case UNW_PPC_R0:
+    _registers.__r0 = value;
+    return;
+  case UNW_PPC_R1:
+    _registers.__r1 = value;
+    return;
+  case UNW_PPC_R2:
+    _registers.__r2 = value;
+    return;
+  case UNW_PPC_R3:
+    _registers.__r3 = value;
+    return;
+  case UNW_PPC_R4:
+    _registers.__r4 = value;
+    return;
+  case UNW_PPC_R5:
+    _registers.__r5 = value;
+    return;
+  case UNW_PPC_R6:
+    _registers.__r6 = value;
+    return;
+  case UNW_PPC_R7:
+    _registers.__r7 = value;
+    return;
+  case UNW_PPC_R8:
+    _registers.__r8 = value;
+    return;
+  case UNW_PPC_R9:
+    _registers.__r9 = value;
+    return;
+  case UNW_PPC_R10:
+    _registers.__r10 = value;
+    return;
+  case UNW_PPC_R11:
+    _registers.__r11 = value;
+    return;
+  case UNW_PPC_R12:
+    _registers.__r12 = value;
+    return;
+  case UNW_PPC_R13:
+    _registers.__r13 = value;
+    return;
+  case UNW_PPC_R14:
+    _registers.__r14 = value;
+    return;
+  case UNW_PPC_R15:
+    _registers.__r15 = value;
+    return;
+  case UNW_PPC_R16:
+    _registers.__r16 = value;
+    return;
+  case UNW_PPC_R17:
+    _registers.__r17 = value;
+    return;
+  case UNW_PPC_R18:
+    _registers.__r18 = value;
+    return;
+  case UNW_PPC_R19:
+    _registers.__r19 = value;
+    return;
+  case UNW_PPC_R20:
+    _registers.__r20 = value;
+    return;
+  case UNW_PPC_R21:
+    _registers.__r21 = value;
+    return;
+  case UNW_PPC_R22:
+    _registers.__r22 = value;
+    return;
+  case UNW_PPC_R23:
+    _registers.__r23 = value;
+    return;
+  case UNW_PPC_R24:
+    _registers.__r24 = value;
+    return;
+  case UNW_PPC_R25:
+    _registers.__r25 = value;
+    return;
+  case UNW_PPC_R26:
+    _registers.__r26 = value;
+    return;
+  case UNW_PPC_R27:
+    _registers.__r27 = value;
+    return;
+  case UNW_PPC_R28:
+    _registers.__r28 = value;
+    return;
+  case UNW_PPC_R29:
+    _registers.__r29 = value;
+    return;
+  case UNW_PPC_R30:
+    _registers.__r30 = value;
+    return;
+  case UNW_PPC_R31:
+    _registers.__r31 = value;
+    return;
+  case UNW_PPC_MQ:
+    _registers.__mq = value;
+    return;
+  case UNW_PPC_LR:
+    _registers.__lr = value;
+    return;
+  case UNW_PPC_CTR:
+    _registers.__ctr = value;
+    return;
+  case UNW_PPC_CR0:
+    _registers.__cr &= 0x0FFFFFFF;
+    _registers.__cr |= (value & 0xF0000000);
+    return;
+  case UNW_PPC_CR1:
+    _registers.__cr &= 0xF0FFFFFF;
+    _registers.__cr |= (value & 0x0F000000);
+    return;
+  case UNW_PPC_CR2:
+    _registers.__cr &= 0xFF0FFFFF;
+    _registers.__cr |= (value & 0x00F00000);
+    return;
+  case UNW_PPC_CR3:
+    _registers.__cr &= 0xFFF0FFFF;
+    _registers.__cr |= (value & 0x000F0000);
+    return;
+  case UNW_PPC_CR4:
+    _registers.__cr &= 0xFFFF0FFF;
+    _registers.__cr |= (value & 0x0000F000);
+    return;
+  case UNW_PPC_CR5:
+    _registers.__cr &= 0xFFFFF0FF;
+    _registers.__cr |= (value & 0x00000F00);
+    return;
+  case UNW_PPC_CR6:
+    _registers.__cr &= 0xFFFFFF0F;
+    _registers.__cr |= (value & 0x000000F0);
+    return;
+  case UNW_PPC_CR7:
+    _registers.__cr &= 0xFFFFFFF0;
+    _registers.__cr |= (value & 0x0000000F);
+    return;
+  case UNW_PPC_VRSAVE:
+    _registers.__vrsave = value;
+    return;
+    // not saved
+    return;
+  case UNW_PPC_XER:
+    _registers.__xer = value;
+    return;
+  case UNW_PPC_AP:
+  case UNW_PPC_VSCR:
+  case UNW_PPC_SPEFSCR:
+    // not saved
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported ppc register");
+}
+
+inline bool Registers_ppc::validFloatRegister(int regNum) const {
+  if (regNum < UNW_PPC_F0)
+    return false;
+  if (regNum > UNW_PPC_F31)
+    return false;
+  return true;
+}
+
+inline double Registers_ppc::getFloatRegister(int regNum) const {
+  assert(validFloatRegister(regNum));
+  return _floatRegisters.__fpregs[regNum - UNW_PPC_F0];
+}
+
+inline void Registers_ppc::setFloatRegister(int regNum, double value) {
+  assert(validFloatRegister(regNum));
+  _floatRegisters.__fpregs[regNum - UNW_PPC_F0] = value;
+}
+
+inline bool Registers_ppc::validVectorRegister(int regNum) const {
+  if (regNum < UNW_PPC_V0)
+    return false;
+  if (regNum > UNW_PPC_V31)
+    return false;
+  return true;
+}
+
+inline v128 Registers_ppc::getVectorRegister(int regNum) const {
+  assert(validVectorRegister(regNum));
+  v128 result = _vectorRegisters[regNum - UNW_PPC_V0];
+  return result;
+}
+
+inline void Registers_ppc::setVectorRegister(int regNum, v128 value) {
+  assert(validVectorRegister(regNum));
+  _vectorRegisters[regNum - UNW_PPC_V0] = value;
+}
+
+inline const char *Registers_ppc::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return "ip";
+  case UNW_REG_SP:
+    return "sp";
+  case UNW_PPC_R0:
+    return "r0";
+  case UNW_PPC_R1:
+    return "r1";
+  case UNW_PPC_R2:
+    return "r2";
+  case UNW_PPC_R3:
+    return "r3";
+  case UNW_PPC_R4:
+    return "r4";
+  case UNW_PPC_R5:
+    return "r5";
+  case UNW_PPC_R6:
+    return "r6";
+  case UNW_PPC_R7:
+    return "r7";
+  case UNW_PPC_R8:
+    return "r8";
+  case UNW_PPC_R9:
+    return "r9";
+  case UNW_PPC_R10:
+    return "r10";
+  case UNW_PPC_R11:
+    return "r11";
+  case UNW_PPC_R12:
+    return "r12";
+  case UNW_PPC_R13:
+    return "r13";
+  case UNW_PPC_R14:
+    return "r14";
+  case UNW_PPC_R15:
+    return "r15";
+  case UNW_PPC_R16:
+    return "r16";
+  case UNW_PPC_R17:
+    return "r17";
+  case UNW_PPC_R18:
+    return "r18";
+  case UNW_PPC_R19:
+    return "r19";
+  case UNW_PPC_R20:
+    return "r20";
+  case UNW_PPC_R21:
+    return "r21";
+  case UNW_PPC_R22:
+    return "r22";
+  case UNW_PPC_R23:
+    return "r23";
+  case UNW_PPC_R24:
+    return "r24";
+  case UNW_PPC_R25:
+    return "r25";
+  case UNW_PPC_R26:
+    return "r26";
+  case UNW_PPC_R27:
+    return "r27";
+  case UNW_PPC_R28:
+    return "r28";
+  case UNW_PPC_R29:
+    return "r29";
+  case UNW_PPC_R30:
+    return "r30";
+  case UNW_PPC_R31:
+    return "r31";
+  case UNW_PPC_F0:
+    return "fp0";
+  case UNW_PPC_F1:
+    return "fp1";
+  case UNW_PPC_F2:
+    return "fp2";
+  case UNW_PPC_F3:
+    return "fp3";
+  case UNW_PPC_F4:
+    return "fp4";
+  case UNW_PPC_F5:
+    return "fp5";
+  case UNW_PPC_F6:
+    return "fp6";
+  case UNW_PPC_F7:
+    return "fp7";
+  case UNW_PPC_F8:
+    return "fp8";
+  case UNW_PPC_F9:
+    return "fp9";
+  case UNW_PPC_F10:
+    return "fp10";
+  case UNW_PPC_F11:
+    return "fp11";
+  case UNW_PPC_F12:
+    return "fp12";
+  case UNW_PPC_F13:
+    return "fp13";
+  case UNW_PPC_F14:
+    return "fp14";
+  case UNW_PPC_F15:
+    return "fp15";
+  case UNW_PPC_F16:
+    return "fp16";
+  case UNW_PPC_F17:
+    return "fp17";
+  case UNW_PPC_F18:
+    return "fp18";
+  case UNW_PPC_F19:
+    return "fp19";
+  case UNW_PPC_F20:
+    return "fp20";
+  case UNW_PPC_F21:
+    return "fp21";
+  case UNW_PPC_F22:
+    return "fp22";
+  case UNW_PPC_F23:
+    return "fp23";
+  case UNW_PPC_F24:
+    return "fp24";
+  case UNW_PPC_F25:
+    return "fp25";
+  case UNW_PPC_F26:
+    return "fp26";
+  case UNW_PPC_F27:
+    return "fp27";
+  case UNW_PPC_F28:
+    return "fp28";
+  case UNW_PPC_F29:
+    return "fp29";
+  case UNW_PPC_F30:
+    return "fp30";
+  case UNW_PPC_F31:
+    return "fp31";
+  case UNW_PPC_LR:
+    return "lr";
+  default:
+    return "unknown register";
+  }
+
+}
+#endif // _LIBUNWIND_TARGET_PPC
+
+#if defined(_LIBUNWIND_TARGET_PPC64)
+/// Registers_ppc64 holds the register state of a thread in a 64-bit PowerPC
+/// process.
+class _LIBUNWIND_HIDDEN Registers_ppc64 {
+public:
+  Registers_ppc64();
+  Registers_ppc64(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint64_t    getRegister(int num) const;
+  void        setRegister(int num, uint64_t value, uint64_t location);
+  uint64_t    getRegisterLocation(int num) const;
+  bool        validFloatRegister(int num) const;
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
+  static const char *getRegisterName(int num);
+  void        jumpto();
+  static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC64; }
+  static int  getArch() { return REGISTERS_PPC64; }
+
+  uint64_t  getSP() const         { return _registers.__r1; }
+  void      setSP(uint64_t value) { _registers.__r1 = value; }
+  uint64_t  getIP() const         { return _registers.__srr0; }
+  void      setIP(uint64_t value) { _registers.__srr0 = value; }
+
+private:
+  struct ppc64_thread_state_t {
+    uint64_t __srr0;    // Instruction address register (PC)
+    uint64_t __srr1;    // Machine state register (supervisor)
+    uint64_t __r0;
+    uint64_t __r1;
+    uint64_t __r2;
+    uint64_t __r3;
+    uint64_t __r4;
+    uint64_t __r5;
+    uint64_t __r6;
+    uint64_t __r7;
+    uint64_t __r8;
+    uint64_t __r9;
+    uint64_t __r10;
+    uint64_t __r11;
+    uint64_t __r12;
+    uint64_t __r13;
+    uint64_t __r14;
+    uint64_t __r15;
+    uint64_t __r16;
+    uint64_t __r17;
+    uint64_t __r18;
+    uint64_t __r19;
+    uint64_t __r20;
+    uint64_t __r21;
+    uint64_t __r22;
+    uint64_t __r23;
+    uint64_t __r24;
+    uint64_t __r25;
+    uint64_t __r26;
+    uint64_t __r27;
+    uint64_t __r28;
+    uint64_t __r29;
+    uint64_t __r30;
+    uint64_t __r31;
+    uint64_t __cr;      // Condition register
+    uint64_t __xer;     // User's integer exception register
+    uint64_t __lr;      // Link register
+    uint64_t __ctr;     // Count register
+    uint64_t __vrsave;  // Vector Save Register
+  };
+
+  union ppc64_vsr_t {
+    struct asfloat_s {
+      double f;
+      uint64_t v2;
+    } asfloat;
+    v128 v;
+  };
+
+  ppc64_thread_state_t _registers;
+  ppc64_vsr_t          _vectorScalarRegisters[64];
+
+  static int getVectorRegNum(int num);
+};
+
+inline Registers_ppc64::Registers_ppc64(const void *registers) {
+  static_assert((check_fit<Registers_ppc64, unw_context_t>::does_fit),
+                "ppc64 registers do not fit into unw_context_t");
+  memcpy(&_registers, static_cast<const uint8_t *>(registers),
+         sizeof(_registers));
+  static_assert(sizeof(_registers) == 312,
+                "expected vector scalar register offset to be 312");
+  memcpy(&_vectorScalarRegisters,
+         static_cast<const uint8_t *>(registers) + sizeof(_registers),
+         sizeof(_vectorScalarRegisters));
+  static_assert(sizeof(_registers) +
+                sizeof(_vectorScalarRegisters) == 1336,
+                "expected vector register offset to be 1336 bytes");
+}
+
+inline Registers_ppc64::Registers_ppc64() {
+  memset(&_registers, 0, sizeof(_registers));
+  memset(&_vectorScalarRegisters, 0, sizeof(_vectorScalarRegisters));
+}
+
+inline bool Registers_ppc64::validRegister(int regNum) const {
+  switch (regNum) {
+  case UNW_REG_IP:
+  case UNW_REG_SP:
+  case UNW_PPC64_XER:
+  case UNW_PPC64_LR:
+  case UNW_PPC64_CTR:
+  case UNW_PPC64_VRSAVE:
+      return true;
+  }
+
+  if (regNum >= UNW_PPC64_R0 && regNum <= UNW_PPC64_R31)
+    return true;
+  if (regNum >= UNW_PPC64_CR0 && regNum <= UNW_PPC64_CR7)
+    return true;
+
+  return false;
+}
+
+inline uint64_t Registers_ppc64::getRegister(int regNum) const {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return _registers.__srr0;
+  case UNW_PPC64_R0:
+    return _registers.__r0;
+  case UNW_PPC64_R1:
+  case UNW_REG_SP:
+    return _registers.__r1;
+  case UNW_PPC64_R2:
+    return _registers.__r2;
+  case UNW_PPC64_R3:
+    return _registers.__r3;
+  case UNW_PPC64_R4:
+    return _registers.__r4;
+  case UNW_PPC64_R5:
+    return _registers.__r5;
+  case UNW_PPC64_R6:
+    return _registers.__r6;
+  case UNW_PPC64_R7:
+    return _registers.__r7;
+  case UNW_PPC64_R8:
+    return _registers.__r8;
+  case UNW_PPC64_R9:
+    return _registers.__r9;
+  case UNW_PPC64_R10:
+    return _registers.__r10;
+  case UNW_PPC64_R11:
+    return _registers.__r11;
+  case UNW_PPC64_R12:
+    return _registers.__r12;
+  case UNW_PPC64_R13:
+    return _registers.__r13;
+  case UNW_PPC64_R14:
+    return _registers.__r14;
+  case UNW_PPC64_R15:
+    return _registers.__r15;
+  case UNW_PPC64_R16:
+    return _registers.__r16;
+  case UNW_PPC64_R17:
+    return _registers.__r17;
+  case UNW_PPC64_R18:
+    return _registers.__r18;
+  case UNW_PPC64_R19:
+    return _registers.__r19;
+  case UNW_PPC64_R20:
+    return _registers.__r20;
+  case UNW_PPC64_R21:
+    return _registers.__r21;
+  case UNW_PPC64_R22:
+    return _registers.__r22;
+  case UNW_PPC64_R23:
+    return _registers.__r23;
+  case UNW_PPC64_R24:
+    return _registers.__r24;
+  case UNW_PPC64_R25:
+    return _registers.__r25;
+  case UNW_PPC64_R26:
+    return _registers.__r26;
+  case UNW_PPC64_R27:
+    return _registers.__r27;
+  case UNW_PPC64_R28:
+    return _registers.__r28;
+  case UNW_PPC64_R29:
+    return _registers.__r29;
+  case UNW_PPC64_R30:
+    return _registers.__r30;
+  case UNW_PPC64_R31:
+    return _registers.__r31;
+  case UNW_PPC64_CR0:
+    return (_registers.__cr & 0xF0000000);
+  case UNW_PPC64_CR1:
+    return (_registers.__cr & 0x0F000000);
+  case UNW_PPC64_CR2:
+    return (_registers.__cr & 0x00F00000);
+  case UNW_PPC64_CR3:
+    return (_registers.__cr & 0x000F0000);
+  case UNW_PPC64_CR4:
+    return (_registers.__cr & 0x0000F000);
+  case UNW_PPC64_CR5:
+    return (_registers.__cr & 0x00000F00);
+  case UNW_PPC64_CR6:
+    return (_registers.__cr & 0x000000F0);
+  case UNW_PPC64_CR7:
+    return (_registers.__cr & 0x0000000F);
+  case UNW_PPC64_XER:
+    return _registers.__xer;
+  case UNW_PPC64_LR:
+    return _registers.__lr;
+  case UNW_PPC64_CTR:
+    return _registers.__ctr;
+  case UNW_PPC64_VRSAVE:
+    return _registers.__vrsave;
+  }
+  _LIBUNWIND_ABORT("unsupported ppc64 register");
+}
+
+inline void Registers_ppc64::setRegister(int regNum, uint64_t value) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    _registers.__srr0 = value;
+    return;
+  case UNW_PPC64_R0:
+    _registers.__r0 = value;
+    return;
+  case UNW_PPC64_R1:
+  case UNW_REG_SP:
+    _registers.__r1 = value;
+    return;
+  case UNW_PPC64_R2:
+    _registers.__r2 = value;
+    return;
+  case UNW_PPC64_R3:
+    _registers.__r3 = value;
+    return;
+  case UNW_PPC64_R4:
+    _registers.__r4 = value;
+    return;
+  case UNW_PPC64_R5:
+    _registers.__r5 = value;
+    return;
+  case UNW_PPC64_R6:
+    _registers.__r6 = value;
+    return;
+  case UNW_PPC64_R7:
+    _registers.__r7 = value;
+    return;
+  case UNW_PPC64_R8:
+    _registers.__r8 = value;
+    return;
+  case UNW_PPC64_R9:
+    _registers.__r9 = value;
+    return;
+  case UNW_PPC64_R10:
+    _registers.__r10 = value;
+    return;
+  case UNW_PPC64_R11:
+    _registers.__r11 = value;
+    return;
+  case UNW_PPC64_R12:
+    _registers.__r12 = value;
+    return;
+  case UNW_PPC64_R13:
+    _registers.__r13 = value;
+    return;
+  case UNW_PPC64_R14:
+    _registers.__r14 = value;
+    return;
+  case UNW_PPC64_R15:
+    _registers.__r15 = value;
+    return;
+  case UNW_PPC64_R16:
+    _registers.__r16 = value;
+    return;
+  case UNW_PPC64_R17:
+    _registers.__r17 = value;
+    return;
+  case UNW_PPC64_R18:
+    _registers.__r18 = value;
+    return;
+  case UNW_PPC64_R19:
+    _registers.__r19 = value;
+    return;
+  case UNW_PPC64_R20:
+    _registers.__r20 = value;
+    return;
+  case UNW_PPC64_R21:
+    _registers.__r21 = value;
+    return;
+  case UNW_PPC64_R22:
+    _registers.__r22 = value;
+    return;
+  case UNW_PPC64_R23:
+    _registers.__r23 = value;
+    return;
+  case UNW_PPC64_R24:
+    _registers.__r24 = value;
+    return;
+  case UNW_PPC64_R25:
+    _registers.__r25 = value;
+    return;
+  case UNW_PPC64_R26:
+    _registers.__r26 = value;
+    return;
+  case UNW_PPC64_R27:
+    _registers.__r27 = value;
+    return;
+  case UNW_PPC64_R28:
+    _registers.__r28 = value;
+    return;
+  case UNW_PPC64_R29:
+    _registers.__r29 = value;
+    return;
+  case UNW_PPC64_R30:
+    _registers.__r30 = value;
+    return;
+  case UNW_PPC64_R31:
+    _registers.__r31 = value;
+    return;
+  case UNW_PPC64_CR0:
+    _registers.__cr &= 0x0FFFFFFF;
+    _registers.__cr |= (value & 0xF0000000);
+    return;
+  case UNW_PPC64_CR1:
+    _registers.__cr &= 0xF0FFFFFF;
+    _registers.__cr |= (value & 0x0F000000);
+    return;
+  case UNW_PPC64_CR2:
+    _registers.__cr &= 0xFF0FFFFF;
+    _registers.__cr |= (value & 0x00F00000);
+    return;
+  case UNW_PPC64_CR3:
+    _registers.__cr &= 0xFFF0FFFF;
+    _registers.__cr |= (value & 0x000F0000);
+    return;
+  case UNW_PPC64_CR4:
+    _registers.__cr &= 0xFFFF0FFF;
+    _registers.__cr |= (value & 0x0000F000);
+    return;
+  case UNW_PPC64_CR5:
+    _registers.__cr &= 0xFFFFF0FF;
+    _registers.__cr |= (value & 0x00000F00);
+    return;
+  case UNW_PPC64_CR6:
+    _registers.__cr &= 0xFFFFFF0F;
+    _registers.__cr |= (value & 0x000000F0);
+    return;
+  case UNW_PPC64_CR7:
+    _registers.__cr &= 0xFFFFFFF0;
+    _registers.__cr |= (value & 0x0000000F);
+    return;
+  case UNW_PPC64_XER:
+    _registers.__xer = value;
+    return;
+  case UNW_PPC64_LR:
+    _registers.__lr = value;
+    return;
+  case UNW_PPC64_CTR:
+    _registers.__ctr = value;
+    return;
+  case UNW_PPC64_VRSAVE:
+    _registers.__vrsave = value;
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported ppc64 register");
+}
+
+inline bool Registers_ppc64::validFloatRegister(int regNum) const {
+  return regNum >= UNW_PPC64_F0 && regNum <= UNW_PPC64_F31;
+}
+
+inline double Registers_ppc64::getFloatRegister(int regNum) const {
+  assert(validFloatRegister(regNum));
+  return _vectorScalarRegisters[regNum - UNW_PPC64_F0].asfloat.f;
+}
+
+inline void Registers_ppc64::setFloatRegister(int regNum, double value) {
+  assert(validFloatRegister(regNum));
+  _vectorScalarRegisters[regNum - UNW_PPC64_F0].asfloat.f = value;
+}
+
+inline bool Registers_ppc64::validVectorRegister(int regNum) const {
+#ifdef PPC64_HAS_VMX
+  if (regNum >= UNW_PPC64_VS0 && regNum <= UNW_PPC64_VS31)
+    return true;
+  if (regNum >= UNW_PPC64_VS32 && regNum <= UNW_PPC64_VS63)
+    return true;
+#else
+  if (regNum >= UNW_PPC64_V0 && regNum <= UNW_PPC64_V31)
+    return true;
+#endif
+  return false;
+}
+
+inline int Registers_ppc64::getVectorRegNum(int num)
+{
+  if (num >= UNW_PPC64_VS0 && num <= UNW_PPC64_VS31)
+    return num - UNW_PPC64_VS0;
+  else
+    return num - UNW_PPC64_VS32 + 32;
+}
+
+inline v128 Registers_ppc64::getVectorRegister(int regNum) const {
+  assert(validVectorRegister(regNum));
+  return _vectorScalarRegisters[getVectorRegNum(regNum)].v;
+}
+
+inline void Registers_ppc64::setVectorRegister(int regNum, v128 value) {
+  assert(validVectorRegister(regNum));
+  _vectorScalarRegisters[getVectorRegNum(regNum)].v = value;
+}
+
+inline const char *Registers_ppc64::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return "ip";
+  case UNW_REG_SP:
+    return "sp";
+  case UNW_PPC64_R0:
+    return "r0";
+  case UNW_PPC64_R1:
+    return "r1";
+  case UNW_PPC64_R2:
+    return "r2";
+  case UNW_PPC64_R3:
+    return "r3";
+  case UNW_PPC64_R4:
+    return "r4";
+  case UNW_PPC64_R5:
+    return "r5";
+  case UNW_PPC64_R6:
+    return "r6";
+  case UNW_PPC64_R7:
+    return "r7";
+  case UNW_PPC64_R8:
+    return "r8";
+  case UNW_PPC64_R9:
+    return "r9";
+  case UNW_PPC64_R10:
+    return "r10";
+  case UNW_PPC64_R11:
+    return "r11";
+  case UNW_PPC64_R12:
+    return "r12";
+  case UNW_PPC64_R13:
+    return "r13";
+  case UNW_PPC64_R14:
+    return "r14";
+  case UNW_PPC64_R15:
+    return "r15";
+  case UNW_PPC64_R16:
+    return "r16";
+  case UNW_PPC64_R17:
+    return "r17";
+  case UNW_PPC64_R18:
+    return "r18";
+  case UNW_PPC64_R19:
+    return "r19";
+  case UNW_PPC64_R20:
+    return "r20";
+  case UNW_PPC64_R21:
+    return "r21";
+  case UNW_PPC64_R22:
+    return "r22";
+  case UNW_PPC64_R23:
+    return "r23";
+  case UNW_PPC64_R24:
+    return "r24";
+  case UNW_PPC64_R25:
+    return "r25";
+  case UNW_PPC64_R26:
+    return "r26";
+  case UNW_PPC64_R27:
+    return "r27";
+  case UNW_PPC64_R28:
+    return "r28";
+  case UNW_PPC64_R29:
+    return "r29";
+  case UNW_PPC64_R30:
+    return "r30";
+  case UNW_PPC64_R31:
+    return "r31";
+  case UNW_PPC64_CR0:
+    return "cr0";
+  case UNW_PPC64_CR1:
+    return "cr1";
+  case UNW_PPC64_CR2:
+    return "cr2";
+  case UNW_PPC64_CR3:
+    return "cr3";
+  case UNW_PPC64_CR4:
+    return "cr4";
+  case UNW_PPC64_CR5:
+    return "cr5";
+  case UNW_PPC64_CR6:
+    return "cr6";
+  case UNW_PPC64_CR7:
+    return "cr7";
+  case UNW_PPC64_XER:
+    return "xer";
+  case UNW_PPC64_LR:
+    return "lr";
+  case UNW_PPC64_CTR:
+    return "ctr";
+  case UNW_PPC64_VRSAVE:
+    return "vrsave";
+  case UNW_PPC64_F0:
+    return "fp0";
+  case UNW_PPC64_F1:
+    return "fp1";
+  case UNW_PPC64_F2:
+    return "fp2";
+  case UNW_PPC64_F3:
+    return "fp3";
+  case UNW_PPC64_F4:
+    return "fp4";
+  case UNW_PPC64_F5:
+    return "fp5";
+  case UNW_PPC64_F6:
+    return "fp6";
+  case UNW_PPC64_F7:
+    return "fp7";
+  case UNW_PPC64_F8:
+    return "fp8";
+  case UNW_PPC64_F9:
+    return "fp9";
+  case UNW_PPC64_F10:
+    return "fp10";
+  case UNW_PPC64_F11:
+    return "fp11";
+  case UNW_PPC64_F12:
+    return "fp12";
+  case UNW_PPC64_F13:
+    return "fp13";
+  case UNW_PPC64_F14:
+    return "fp14";
+  case UNW_PPC64_F15:
+    return "fp15";
+  case UNW_PPC64_F16:
+    return "fp16";
+  case UNW_PPC64_F17:
+    return "fp17";
+  case UNW_PPC64_F18:
+    return "fp18";
+  case UNW_PPC64_F19:
+    return "fp19";
+  case UNW_PPC64_F20:
+    return "fp20";
+  case UNW_PPC64_F21:
+    return "fp21";
+  case UNW_PPC64_F22:
+    return "fp22";
+  case UNW_PPC64_F23:
+    return "fp23";
+  case UNW_PPC64_F24:
+    return "fp24";
+  case UNW_PPC64_F25:
+    return "fp25";
+  case UNW_PPC64_F26:
+    return "fp26";
+  case UNW_PPC64_F27:
+    return "fp27";
+  case UNW_PPC64_F28:
+    return "fp28";
+  case UNW_PPC64_F29:
+    return "fp29";
+  case UNW_PPC64_F30:
+    return "fp30";
+  case UNW_PPC64_F31:
+    return "fp31";
+  case UNW_PPC64_V0:
+    return "v0";
+  case UNW_PPC64_V1:
+    return "v1";
+  case UNW_PPC64_V2:
+    return "v2";
+  case UNW_PPC64_V3:
+    return "v3";
+  case UNW_PPC64_V4:
+    return "v4";
+  case UNW_PPC64_V5:
+    return "v5";
+  case UNW_PPC64_V6:
+    return "v6";
+  case UNW_PPC64_V7:
+    return "v7";
+  case UNW_PPC64_V8:
+    return "v8";
+  case UNW_PPC64_V9:
+    return "v9";
+  case UNW_PPC64_V10:
+    return "v10";
+  case UNW_PPC64_V11:
+    return "v11";
+  case UNW_PPC64_V12:
+    return "v12";
+  case UNW_PPC64_V13:
+    return "v13";
+  case UNW_PPC64_V14:
+    return "v14";
+  case UNW_PPC64_V15:
+    return "v15";
+  case UNW_PPC64_V16:
+    return "v16";
+  case UNW_PPC64_V17:
+    return "v17";
+  case UNW_PPC64_V18:
+    return "v18";
+  case UNW_PPC64_V19:
+    return "v19";
+  case UNW_PPC64_V20:
+    return "v20";
+  case UNW_PPC64_V21:
+    return "v21";
+  case UNW_PPC64_V22:
+    return "v22";
+  case UNW_PPC64_V23:
+    return "v23";
+  case UNW_PPC64_V24:
+    return "v24";
+  case UNW_PPC64_V25:
+    return "v25";
+  case UNW_PPC64_V26:
+    return "v26";
+  case UNW_PPC64_V27:
+    return "v27";
+  case UNW_PPC64_V28:
+    return "v28";
+  case UNW_PPC64_V29:
+    return "v29";
+  case UNW_PPC64_V30:
+    return "v30";
+  case UNW_PPC64_V31:
+    return "v31";
+  }
+  return "unknown register";
+}
+#endif // _LIBUNWIND_TARGET_PPC64
+
+
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+/// Registers_arm64  holds the register state of a thread in a 64-bit arm
+/// process.
+class _LIBUNWIND_HIDDEN Registers_arm64 {
+public:
+  Registers_arm64();
+  Registers_arm64(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint64_t    getRegister(int num) const;
+  void        setRegister(int num, uint64_t value, uint64_t location);
+  bool        validFloatRegister(int num) const;
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
+  uint64_t    getRegisterLocation(int regNum) const;
+  static const char *getRegisterName(int num);
+  void        jumpto();
+  static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; }
+  static int  getArch() { return REGISTERS_ARM64; }
+
+  uint64_t  getSP() const         { return _registers.__sp; }
+  void      setSP(uint64_t value, uint64_t location) { _registers.__sp = value; }
+  uint64_t  getIP() const         { return _registers.__pc; }
+  void      setIP(uint64_t value, uint64_t location) { _registers.__pc = value; }
+  uint64_t  getFP() const         { return _registers.__fp; }
+  void      setFP(uint64_t value, uint64_t location) { _registers.__fp = value; }
+
+private:
+  struct GPRs {
+    uint64_t __x[29]; // x0-x28
+    uint64_t __fp;    // Frame pointer x29
+    uint64_t __lr;    // Link register x30
+    uint64_t __sp;    // Stack pointer x31
+    uint64_t __pc;    // Program counter
+    uint64_t __ra_sign_state; // RA sign state register
+  };
+
+  struct GPRLocations {
+    uint64_t __x[29]; // x0-x28
+    uint64_t __fp;    // Frame pointer x29
+    uint64_t __lr;    // Link register x30
+    uint64_t __sp;    // Stack pointer x31
+    uint64_t __pc;    // Program counter
+    uint64_t padding; // 16-byte align
+  };
+
+  GPRs    _registers;
+  GPRLocations _registerLocations;
+  double  _vectorHalfRegisters[32];
+  // Currently only the lower double in 128-bit vectore registers
+  // is perserved during unwinding.  We could define new register
+  // numbers (> 96) which mean whole vector registers, then this
+  // struct would need to change to contain whole vector registers.
+};
+
+inline Registers_arm64::Registers_arm64(const void *registers) {
+  static_assert((check_fit<Registers_arm64, unw_context_t>::does_fit),
+                "arm64 registers do not fit into unw_context_t");
+  memcpy(&_registers, registers, sizeof(_registers));
+  memset(&_registerLocations, 0, sizeof(_registerLocations));
+  static_assert(
+      sizeof(GPRs) == 0x110,
+                "expected VFP registers to be at offset 272");
+  memcpy(_vectorHalfRegisters,
+         static_cast<const uint8_t *>(registers) + sizeof(GPRs),
+         sizeof(_vectorHalfRegisters));
+}
+
+inline Registers_arm64::Registers_arm64() {
+  memset(&_registers, 0, sizeof(_registers));
+  memset(&_registerLocations, 0, sizeof(_registerLocations));
+  memset(&_vectorHalfRegisters, 0, sizeof(_vectorHalfRegisters));
+}
+
+inline bool Registers_arm64::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum < 0)
+    return false;
+  if (regNum > 95)
+    return false;
+  if (regNum == UNW_ARM64_RA_SIGN_STATE)
+    return true;
+  if ((regNum > 31) && (regNum < 64))
+    return false;
+  return true;
+}
+
+inline uint64_t Registers_arm64::getRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return _registers.__pc;
+  if (regNum == UNW_REG_SP)
+    return _registers.__sp;
+  if (regNum == UNW_ARM64_RA_SIGN_STATE)
+    return _registers.__ra_sign_state;
+  if ((regNum >= 0) && (regNum < 32))
+    return _registers.__x[regNum];
+  _LIBUNWIND_ABORT("unsupported arm64 register");
+}
+
+inline void Registers_arm64::setRegister(int regNum, uint64_t value, uint64_t location) {
+  if (regNum == UNW_REG_IP) {
+    _registers.__pc = value;
+    _registerLocations.__pc = location;
+  }
+  else if (regNum == UNW_REG_SP) {
+    _registers.__sp = value;
+    _registerLocations.__sp = location;
+  }
+  else if (regNum == UNW_ARM64_RA_SIGN_STATE)
+    _registers.__ra_sign_state = value;
+  else if ((regNum >= 0) && (regNum < 32)) {
+    _registers.__x[regNum] = value;
+    _registerLocations.__x[regNum] = location;
+  }
+  else
+    _LIBUNWIND_ABORT("unsupported arm64 register");
+}
+
+inline uint64_t Registers_arm64::getRegisterLocation(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return _registerLocations.__pc;
+  if (regNum == UNW_REG_SP)
+    return _registerLocations.__sp;
+  if ((regNum >= 0) && (regNum < 32))
+    return _registerLocations.__x[regNum];
+  _LIBUNWIND_ABORT("unsupported arm64 register");
+}
+
+inline const char *Registers_arm64::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return "pc";
+  case UNW_REG_SP:
+    return "sp";
+  case UNW_ARM64_X0:
+    return "x0";
+  case UNW_ARM64_X1:
+    return "x1";
+  case UNW_ARM64_X2:
+    return "x2";
+  case UNW_ARM64_X3:
+    return "x3";
+  case UNW_ARM64_X4:
+    return "x4";
+  case UNW_ARM64_X5:
+    return "x5";
+  case UNW_ARM64_X6:
+    return "x6";
+  case UNW_ARM64_X7:
+    return "x7";
+  case UNW_ARM64_X8:
+    return "x8";
+  case UNW_ARM64_X9:
+    return "x9";
+  case UNW_ARM64_X10:
+    return "x10";
+  case UNW_ARM64_X11:
+    return "x11";
+  case UNW_ARM64_X12:
+    return "x12";
+  case UNW_ARM64_X13:
+    return "x13";
+  case UNW_ARM64_X14:
+    return "x14";
+  case UNW_ARM64_X15:
+    return "x15";
+  case UNW_ARM64_X16:
+    return "x16";
+  case UNW_ARM64_X17:
+    return "x17";
+  case UNW_ARM64_X18:
+    return "x18";
+  case UNW_ARM64_X19:
+    return "x19";
+  case UNW_ARM64_X20:
+    return "x20";
+  case UNW_ARM64_X21:
+    return "x21";
+  case UNW_ARM64_X22:
+    return "x22";
+  case UNW_ARM64_X23:
+    return "x23";
+  case UNW_ARM64_X24:
+    return "x24";
+  case UNW_ARM64_X25:
+    return "x25";
+  case UNW_ARM64_X26:
+    return "x26";
+  case UNW_ARM64_X27:
+    return "x27";
+  case UNW_ARM64_X28:
+    return "x28";
+  case UNW_ARM64_X29:
+    return "fp";
+  case UNW_ARM64_X30:
+    return "lr";
+  case UNW_ARM64_X31:
+    return "sp";
+  case UNW_ARM64_D0:
+    return "d0";
+  case UNW_ARM64_D1:
+    return "d1";
+  case UNW_ARM64_D2:
+    return "d2";
+  case UNW_ARM64_D3:
+    return "d3";
+  case UNW_ARM64_D4:
+    return "d4";
+  case UNW_ARM64_D5:
+    return "d5";
+  case UNW_ARM64_D6:
+    return "d6";
+  case UNW_ARM64_D7:
+    return "d7";
+  case UNW_ARM64_D8:
+    return "d8";
+  case UNW_ARM64_D9:
+    return "d9";
+  case UNW_ARM64_D10:
+    return "d10";
+  case UNW_ARM64_D11:
+    return "d11";
+  case UNW_ARM64_D12:
+    return "d12";
+  case UNW_ARM64_D13:
+    return "d13";
+  case UNW_ARM64_D14:
+    return "d14";
+  case UNW_ARM64_D15:
+    return "d15";
+  case UNW_ARM64_D16:
+    return "d16";
+  case UNW_ARM64_D17:
+    return "d17";
+  case UNW_ARM64_D18:
+    return "d18";
+  case UNW_ARM64_D19:
+    return "d19";
+  case UNW_ARM64_D20:
+    return "d20";
+  case UNW_ARM64_D21:
+    return "d21";
+  case UNW_ARM64_D22:
+    return "d22";
+  case UNW_ARM64_D23:
+    return "d23";
+  case UNW_ARM64_D24:
+    return "d24";
+  case UNW_ARM64_D25:
+    return "d25";
+  case UNW_ARM64_D26:
+    return "d26";
+  case UNW_ARM64_D27:
+    return "d27";
+  case UNW_ARM64_D28:
+    return "d28";
+  case UNW_ARM64_D29:
+    return "d29";
+  case UNW_ARM64_D30:
+    return "d30";
+  case UNW_ARM64_D31:
+    return "d31";
+  default:
+    return "unknown register";
+  }
+}
+
+inline bool Registers_arm64::validFloatRegister(int regNum) const {
+  if (regNum < UNW_ARM64_D0)
+    return false;
+  if (regNum > UNW_ARM64_D31)
+    return false;
+  return true;
+}
+
+inline double Registers_arm64::getFloatRegister(int regNum) const {
+  assert(validFloatRegister(regNum));
+  return _vectorHalfRegisters[regNum - UNW_ARM64_D0];
+}
+
+inline void Registers_arm64::setFloatRegister(int regNum, double value) {
+  assert(validFloatRegister(regNum));
+  _vectorHalfRegisters[regNum - UNW_ARM64_D0] = value;
+}
+
+inline bool Registers_arm64::validVectorRegister(int) const {
+  return false;
+}
+
+inline v128 Registers_arm64::getVectorRegister(int) const {
+  _LIBUNWIND_ABORT("no arm64 vector register support yet");
+}
+
+inline void Registers_arm64::setVectorRegister(int, v128) {
+  _LIBUNWIND_ABORT("no arm64 vector register support yet");
+}
+#endif // _LIBUNWIND_TARGET_AARCH64
+
+#if defined(_LIBUNWIND_TARGET_ARM)
+/// Registers_arm holds the register state of a thread in a 32-bit arm
+/// process.
+///
+/// NOTE: Assumes VFPv3. On ARM processors without a floating point unit,
+/// this uses more memory than required.
+class _LIBUNWIND_HIDDEN Registers_arm {
+public:
+  Registers_arm();
+  Registers_arm(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint32_t    getRegister(int num) const;
+  void        setRegister(int num, uint32_t value, uint32_t location);
+  uint32_t    getRegisterLocation(int num) const;
+  bool        validFloatRegister(int num) const;
+  unw_fpreg_t getFloatRegister(int num);
+  void        setFloatRegister(int num, unw_fpreg_t value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
+  static const char *getRegisterName(int num);
+  void        jumpto() {
+    restoreSavedFloatRegisters();
+    restoreCoreAndJumpTo();
+  }
+  static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM; }
+  static int  getArch() { return REGISTERS_ARM; }
+
+  uint32_t  getSP() const         { return _registers.__sp; }
+  void      setSP(uint32_t value, uint32_t location) { _registers.__sp = value; _registerLocations.__sp = location; }
+  uint32_t  getIP() const         { return _registers.__pc; }
+  void      setIP(uint32_t value, uint32_t location) { _registers.__pc = value; _registerLocations.__pc = location; }
+
+  void saveVFPAsX() {
+    assert(_use_X_for_vfp_save || !_saved_vfp_d0_d15);
+    _use_X_for_vfp_save = true;
+  }
+
+  void restoreSavedFloatRegisters() {
+    if (_saved_vfp_d0_d15) {
+      if (_use_X_for_vfp_save)
+        restoreVFPWithFLDMX(_vfp_d0_d15_pad);
+      else
+        restoreVFPWithFLDMD(_vfp_d0_d15_pad);
+    }
+    if (_saved_vfp_d16_d31)
+      restoreVFPv3(_vfp_d16_d31);
+#if defined(__ARM_WMMX)
+    if (_saved_iwmmx)
+      restoreiWMMX(_iwmmx);
+    if (_saved_iwmmx_control)
+      restoreiWMMXControl(_iwmmx_control);
+#endif
+  }
+
+private:
+  struct GPRs {
+    uint32_t __r[13]; // r0-r12
+    uint32_t __sp;    // Stack pointer r13
+    uint32_t __lr;    // Link register r14
+    uint32_t __pc;    // Program counter r15
+  };
+
+  struct GPRLocations {
+    uint32_t __r[13]; // r0-r12
+    uint32_t __sp;    // Stack pointer r13
+    uint32_t __lr;    // Link register r14
+    uint32_t __pc;    // Program counter r15
+  };
+
+  static void saveVFPWithFSTMD(void *);
+  static void saveVFPWithFSTMX(void*);
+  static void saveVFPv3(void*);
+  static void restoreVFPWithFLDMD(void*);
+  static void restoreVFPWithFLDMX(void*);
+  static void restoreVFPv3(void*);
+#if defined(__ARM_WMMX)
+  static void saveiWMMX(void*);
+  static void saveiWMMXControl(uint32_t*);
+  static void restoreiWMMX(void*);
+  static void restoreiWMMXControl(uint32_t*);
+#endif
+  void restoreCoreAndJumpTo();
+
+  // ARM registers
+  GPRs _registers;
+  GPRLocations _registerLocations;
+
+  // We save floating point registers lazily because we can't know ahead of
+  // time which ones are used. See EHABI #4.7.
+
+  // Whether D0-D15 are saved in the FTSMX instead of FSTMD format.
+  //
+  // See EHABI #7.5 that explains how matching instruction sequences for load
+  // and store need to be used to correctly restore the exact register bits.
+  bool _use_X_for_vfp_save;
+  // Whether VFP D0-D15 are saved.
+  bool _saved_vfp_d0_d15;
+  // Whether VFPv3 D16-D31 are saved.
+  bool _saved_vfp_d16_d31;
+  // VFP registers D0-D15, + padding if saved using FSTMX
+  unw_fpreg_t _vfp_d0_d15_pad[17];
+  // VFPv3 registers D16-D31, always saved using FSTMD
+  unw_fpreg_t _vfp_d16_d31[16];
+#if defined(__ARM_WMMX)
+  // Whether iWMMX data registers are saved.
+  bool _saved_iwmmx;
+  // Whether iWMMX control registers are saved.
+  mutable bool _saved_iwmmx_control;
+  // iWMMX registers
+  unw_fpreg_t _iwmmx[16];
+  // iWMMX control registers
+  mutable uint32_t _iwmmx_control[4];
+#endif
+};
+
+inline Registers_arm::Registers_arm(const void *registers)
+  : _use_X_for_vfp_save(false),
+    _saved_vfp_d0_d15(false),
+    _saved_vfp_d16_d31(false) {
+  static_assert((check_fit<Registers_arm, unw_context_t>::does_fit),
+                "arm registers do not fit into unw_context_t");
+  // See __unw_getcontext() note about data.
+  memcpy(&_registers, registers, sizeof(_registers));
+  memset(&_registerLocations, 0, sizeof(_registerLocations));
+  memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad));
+  memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31));
+#if defined(__ARM_WMMX)
+  _saved_iwmmx = false;
+  _saved_iwmmx_control = false;
+  memset(&_iwmmx, 0, sizeof(_iwmmx));
+  memset(&_iwmmx_control, 0, sizeof(_iwmmx_control));
+#endif
+}
+
+inline Registers_arm::Registers_arm()
+  : _use_X_for_vfp_save(false),
+    _saved_vfp_d0_d15(false),
+    _saved_vfp_d16_d31(false) {
+  memset(&_registers, 0, sizeof(_registers));
+  memset(&_registerLocations, 0, sizeof(_registerLocations));
+  memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad));
+  memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31));
+#if defined(__ARM_WMMX)
+  _saved_iwmmx = false;
+  _saved_iwmmx_control = false;
+  memset(&_iwmmx, 0, sizeof(_iwmmx));
+  memset(&_iwmmx_control, 0, sizeof(_iwmmx_control));
+#endif
+}
+
+inline bool Registers_arm::validRegister(int regNum) const {
+  // Returns true for all non-VFP registers supported by the EHABI
+  // virtual register set (VRS).
+  if (regNum == UNW_REG_IP)
+    return true;
+
+  if (regNum == UNW_REG_SP)
+    return true;
+
+  if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R15)
+    return true;
+
+#if defined(__ARM_WMMX)
+  if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3)
+    return true;
+#endif
+
+  return false;
+}
+
+inline uint32_t Registers_arm::getRegister(int regNum) const {
+  if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP)
+    return _registers.__sp;
+
+  if (regNum == UNW_ARM_LR)
+    return _registers.__lr;
+
+  if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP)
+    return _registers.__pc;
+
+  if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R12)
+    return _registers.__r[regNum];
+
+#if defined(__ARM_WMMX)
+  if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) {
+    if (!_saved_iwmmx_control) {
+      _saved_iwmmx_control = true;
+      saveiWMMXControl(_iwmmx_control);
+    }
+    return _iwmmx_control[regNum - UNW_ARM_WC0];
+  }
+#endif
+
+  _LIBUNWIND_ABORT("unsupported arm register");
+}
+
+inline void Registers_arm::setRegister(int regNum, uint32_t value, uint32_t location) {
+  if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP) {
+    _registers.__sp = value;
+    _registerLocations.__sp = location;
+    return;
+  }
+
+  if (regNum == UNW_ARM_LR) {
+    _registers.__lr = value;
+    _registerLocations.__lr = location;
+    return;
+  }
+
+  if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP) {
+    _registers.__pc = value;
+    _registerLocations.__pc = location;
+    return;
+  }
+
+  if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R12) {
+    _registers.__r[regNum] = value;
+    _registerLocations.__r[regNum] = location;
+    return;
+  }
+
+#if defined(__ARM_WMMX)
+  if (regNum >= UNW_ARM_WC0 && regNum <= UNW_ARM_WC3) {
+    if (!_saved_iwmmx_control) {
+      _saved_iwmmx_control = true;
+      saveiWMMXControl(_iwmmx_control);
+    }
+    _iwmmx_control[regNum - UNW_ARM_WC0] = value;
+    return;
+  }
+#endif
+
+  _LIBUNWIND_ABORT("unsupported arm register");
+}
+
+inline uint32_t Registers_arm::getRegisterLocation(int regNum) const {
+  if (regNum == UNW_REG_SP || regNum == UNW_ARM_SP)
+    return _registerLocations.__sp;
+
+  if (regNum == UNW_ARM_LR)
+    return _registerLocations.__lr;
+
+  if (regNum == UNW_REG_IP || regNum == UNW_ARM_IP)
+    return _registerLocations.__pc;
+
+  if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R12)
+    return _registerLocations.__r[regNum];
+
+  _LIBUNWIND_ABORT("unsupported arm register");
+}
+
+inline const char *Registers_arm::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
+  case UNW_ARM_IP: // UNW_ARM_R15 is alias
+    return "pc";
+  case UNW_ARM_LR: // UNW_ARM_R14 is alias
+    return "lr";
+  case UNW_REG_SP:
+  case UNW_ARM_SP: // UNW_ARM_R13 is alias
+    return "sp";
+  case UNW_ARM_R0:
+    return "r0";
+  case UNW_ARM_R1:
+    return "r1";
+  case UNW_ARM_R2:
+    return "r2";
+  case UNW_ARM_R3:
+    return "r3";
+  case UNW_ARM_R4:
+    return "r4";
+  case UNW_ARM_R5:
+    return "r5";
+  case UNW_ARM_R6:
+    return "r6";
+  case UNW_ARM_R7:
+    return "r7";
+  case UNW_ARM_R8:
+    return "r8";
+  case UNW_ARM_R9:
+    return "r9";
+  case UNW_ARM_R10:
+    return "r10";
+  case UNW_ARM_R11:
+    return "r11";
+  case UNW_ARM_R12:
+    return "r12";
+  case UNW_ARM_S0:
+    return "s0";
+  case UNW_ARM_S1:
+    return "s1";
+  case UNW_ARM_S2:
+    return "s2";
+  case UNW_ARM_S3:
+    return "s3";
+  case UNW_ARM_S4:
+    return "s4";
+  case UNW_ARM_S5:
+    return "s5";
+  case UNW_ARM_S6:
+    return "s6";
+  case UNW_ARM_S7:
+    return "s7";
+  case UNW_ARM_S8:
+    return "s8";
+  case UNW_ARM_S9:
+    return "s9";
+  case UNW_ARM_S10:
+    return "s10";
+  case UNW_ARM_S11:
+    return "s11";
+  case UNW_ARM_S12:
+    return "s12";
+  case UNW_ARM_S13:
+    return "s13";
+  case UNW_ARM_S14:
+    return "s14";
+  case UNW_ARM_S15:
+    return "s15";
+  case UNW_ARM_S16:
+    return "s16";
+  case UNW_ARM_S17:
+    return "s17";
+  case UNW_ARM_S18:
+    return "s18";
+  case UNW_ARM_S19:
+    return "s19";
+  case UNW_ARM_S20:
+    return "s20";
+  case UNW_ARM_S21:
+    return "s21";
+  case UNW_ARM_S22:
+    return "s22";
+  case UNW_ARM_S23:
+    return "s23";
+  case UNW_ARM_S24:
+    return "s24";
+  case UNW_ARM_S25:
+    return "s25";
+  case UNW_ARM_S26:
+    return "s26";
+  case UNW_ARM_S27:
+    return "s27";
+  case UNW_ARM_S28:
+    return "s28";
+  case UNW_ARM_S29:
+    return "s29";
+  case UNW_ARM_S30:
+    return "s30";
+  case UNW_ARM_S31:
+    return "s31";
+  case UNW_ARM_D0:
+    return "d0";
+  case UNW_ARM_D1:
+    return "d1";
+  case UNW_ARM_D2:
+    return "d2";
+  case UNW_ARM_D3:
+    return "d3";
+  case UNW_ARM_D4:
+    return "d4";
+  case UNW_ARM_D5:
+    return "d5";
+  case UNW_ARM_D6:
+    return "d6";
+  case UNW_ARM_D7:
+    return "d7";
+  case UNW_ARM_D8:
+    return "d8";
+  case UNW_ARM_D9:
+    return "d9";
+  case UNW_ARM_D10:
+    return "d10";
+  case UNW_ARM_D11:
+    return "d11";
+  case UNW_ARM_D12:
+    return "d12";
+  case UNW_ARM_D13:
+    return "d13";
+  case UNW_ARM_D14:
+    return "d14";
+  case UNW_ARM_D15:
+    return "d15";
+  case UNW_ARM_D16:
+    return "d16";
+  case UNW_ARM_D17:
+    return "d17";
+  case UNW_ARM_D18:
+    return "d18";
+  case UNW_ARM_D19:
+    return "d19";
+  case UNW_ARM_D20:
+    return "d20";
+  case UNW_ARM_D21:
+    return "d21";
+  case UNW_ARM_D22:
+    return "d22";
+  case UNW_ARM_D23:
+    return "d23";
+  case UNW_ARM_D24:
+    return "d24";
+  case UNW_ARM_D25:
+    return "d25";
+  case UNW_ARM_D26:
+    return "d26";
+  case UNW_ARM_D27:
+    return "d27";
+  case UNW_ARM_D28:
+    return "d28";
+  case UNW_ARM_D29:
+    return "d29";
+  case UNW_ARM_D30:
+    return "d30";
+  case UNW_ARM_D31:
+    return "d31";
+  default:
+    return "unknown register";
+  }
+}
+
+inline bool Registers_arm::validFloatRegister(int regNum) const {
+  // NOTE: Consider the intel MMX registers floating points so the
+  // __unw_get_fpreg can be used to transmit the 64-bit data back.
+  return ((regNum >= UNW_ARM_D0) && (regNum <= UNW_ARM_D31))
+#if defined(__ARM_WMMX)
+      || ((regNum >= UNW_ARM_WR0) && (regNum <= UNW_ARM_WR15))
+#endif
+      ;
+}
+
+inline unw_fpreg_t Registers_arm::getFloatRegister(int regNum) {
+  if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D15) {
+    if (!_saved_vfp_d0_d15) {
+      _saved_vfp_d0_d15 = true;
+      if (_use_X_for_vfp_save)
+        saveVFPWithFSTMX(_vfp_d0_d15_pad);
+      else
+        saveVFPWithFSTMD(_vfp_d0_d15_pad);
+    }
+    return _vfp_d0_d15_pad[regNum - UNW_ARM_D0];
+  }
+
+  if (regNum >= UNW_ARM_D16 && regNum <= UNW_ARM_D31) {
+    if (!_saved_vfp_d16_d31) {
+      _saved_vfp_d16_d31 = true;
+      saveVFPv3(_vfp_d16_d31);
+    }
+    return _vfp_d16_d31[regNum - UNW_ARM_D16];
+  }
+
+#if defined(__ARM_WMMX)
+  if (regNum >= UNW_ARM_WR0 && regNum <= UNW_ARM_WR15) {
+    if (!_saved_iwmmx) {
+      _saved_iwmmx = true;
+      saveiWMMX(_iwmmx);
+    }
+    return _iwmmx[regNum - UNW_ARM_WR0];
+  }
+#endif
+
+  _LIBUNWIND_ABORT("Unknown ARM float register");
+}
+
+inline void Registers_arm::setFloatRegister(int regNum, unw_fpreg_t value) {
+  if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D15) {
+    if (!_saved_vfp_d0_d15) {
+      _saved_vfp_d0_d15 = true;
+      if (_use_X_for_vfp_save)
+        saveVFPWithFSTMX(_vfp_d0_d15_pad);
+      else
+        saveVFPWithFSTMD(_vfp_d0_d15_pad);
+    }
+    _vfp_d0_d15_pad[regNum - UNW_ARM_D0] = value;
+    return;
+  }
+
+  if (regNum >= UNW_ARM_D16 && regNum <= UNW_ARM_D31) {
+    if (!_saved_vfp_d16_d31) {
+      _saved_vfp_d16_d31 = true;
+      saveVFPv3(_vfp_d16_d31);
+    }
+    _vfp_d16_d31[regNum - UNW_ARM_D16] = value;
+    return;
+  }
+
+#if defined(__ARM_WMMX)
+  if (regNum >= UNW_ARM_WR0 && regNum <= UNW_ARM_WR15) {
+    if (!_saved_iwmmx) {
+      _saved_iwmmx = true;
+      saveiWMMX(_iwmmx);
+    }
+    _iwmmx[regNum - UNW_ARM_WR0] = value;
+    return;
+  }
+#endif
+
+  _LIBUNWIND_ABORT("Unknown ARM float register");
+}
+
+inline bool Registers_arm::validVectorRegister(int) const {
+  return false;
+}
+
+inline v128 Registers_arm::getVectorRegister(int) const {
+  _LIBUNWIND_ABORT("ARM vector support not implemented");
+}
+
+inline void Registers_arm::setVectorRegister(int, v128) {
+  _LIBUNWIND_ABORT("ARM vector support not implemented");
+}
+#endif // _LIBUNWIND_TARGET_ARM
+
+
+#if defined(_LIBUNWIND_TARGET_OR1K)
+/// Registers_or1k holds the register state of a thread in an OpenRISC1000
+/// process.
+class _LIBUNWIND_HIDDEN Registers_or1k {
+public:
+  Registers_or1k();
+  Registers_or1k(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint32_t    getRegister(int num) const;
+  void        setRegister(int num, uint32_t value);
+  bool        validFloatRegister(int num) const;
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
+  static const char *getRegisterName(int num);
+  void        jumpto();
+  static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_OR1K; }
+  static int  getArch() { return REGISTERS_OR1K; }
+
+  uint64_t  getSP() const         { return _registers.__r[1]; }
+  void      setSP(uint32_t value) { _registers.__r[1] = value; }
+  uint64_t  getIP() const         { return _registers.__pc; }
+  void      setIP(uint32_t value) { _registers.__pc = value; }
+
+private:
+  struct or1k_thread_state_t {
+    unsigned int __r[32]; // r0-r31
+    unsigned int __pc;    // Program counter
+    unsigned int __epcr;  // Program counter at exception
+  };
+
+  or1k_thread_state_t _registers;
+};
+
+inline Registers_or1k::Registers_or1k(const void *registers) {
+  static_assert((check_fit<Registers_or1k, unw_context_t>::does_fit),
+                "or1k registers do not fit into unw_context_t");
+  memcpy(&_registers, static_cast<const uint8_t *>(registers),
+         sizeof(_registers));
+}
+
+inline Registers_or1k::Registers_or1k() {
+  memset(&_registers, 0, sizeof(_registers));
+}
+
+inline bool Registers_or1k::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum < 0)
+    return false;
+  if (regNum <= UNW_OR1K_R31)
+    return true;
+  if (regNum == UNW_OR1K_EPCR)
+    return true;
+  return false;
+}
+
+inline uint32_t Registers_or1k::getRegister(int regNum) const {
+  if (regNum >= UNW_OR1K_R0 && regNum <= UNW_OR1K_R31)
+    return _registers.__r[regNum - UNW_OR1K_R0];
+
+  switch (regNum) {
+  case UNW_REG_IP:
+    return _registers.__pc;
+  case UNW_REG_SP:
+    return _registers.__r[1];
+  case UNW_OR1K_EPCR:
+    return _registers.__epcr;
+  }
+  _LIBUNWIND_ABORT("unsupported or1k register");
+}
+
+inline void Registers_or1k::setRegister(int regNum, uint32_t value) {
+  if (regNum >= UNW_OR1K_R0 && regNum <= UNW_OR1K_R31) {
+    _registers.__r[regNum - UNW_OR1K_R0] = value;
+    return;
+  }
+
+  switch (regNum) {
+  case UNW_REG_IP:
+    _registers.__pc = value;
+    return;
+  case UNW_REG_SP:
+    _registers.__r[1] = value;
+    return;
+  case UNW_OR1K_EPCR:
+    _registers.__epcr = value;
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported or1k register");
+}
+
+inline bool Registers_or1k::validFloatRegister(int /* regNum */) const {
+  return false;
+}
+
+inline double Registers_or1k::getFloatRegister(int /* regNum */) const {
+  _LIBUNWIND_ABORT("or1k float support not implemented");
+}
+
+inline void Registers_or1k::setFloatRegister(int /* regNum */,
+                                             double /* value */) {
+  _LIBUNWIND_ABORT("or1k float support not implemented");
+}
+
+inline bool Registers_or1k::validVectorRegister(int /* regNum */) const {
+  return false;
+}
+
+inline v128 Registers_or1k::getVectorRegister(int /* regNum */) const {
+  _LIBUNWIND_ABORT("or1k vector support not implemented");
+}
+
+inline void Registers_or1k::setVectorRegister(int /* regNum */, v128 /* value */) {
+  _LIBUNWIND_ABORT("or1k vector support not implemented");
+}
+
+inline const char *Registers_or1k::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_OR1K_R0:
+    return "r0";
+  case UNW_OR1K_R1:
+    return "r1";
+  case UNW_OR1K_R2:
+    return "r2";
+  case UNW_OR1K_R3:
+    return "r3";
+  case UNW_OR1K_R4:
+    return "r4";
+  case UNW_OR1K_R5:
+    return "r5";
+  case UNW_OR1K_R6:
+    return "r6";
+  case UNW_OR1K_R7:
+    return "r7";
+  case UNW_OR1K_R8:
+    return "r8";
+  case UNW_OR1K_R9:
+    return "r9";
+  case UNW_OR1K_R10:
+    return "r10";
+  case UNW_OR1K_R11:
+    return "r11";
+  case UNW_OR1K_R12:
+    return "r12";
+  case UNW_OR1K_R13:
+    return "r13";
+  case UNW_OR1K_R14:
+    return "r14";
+  case UNW_OR1K_R15:
+    return "r15";
+  case UNW_OR1K_R16:
+    return "r16";
+  case UNW_OR1K_R17:
+    return "r17";
+  case UNW_OR1K_R18:
+    return "r18";
+  case UNW_OR1K_R19:
+    return "r19";
+  case UNW_OR1K_R20:
+    return "r20";
+  case UNW_OR1K_R21:
+    return "r21";
+  case UNW_OR1K_R22:
+    return "r22";
+  case UNW_OR1K_R23:
+    return "r23";
+  case UNW_OR1K_R24:
+    return "r24";
+  case UNW_OR1K_R25:
+    return "r25";
+  case UNW_OR1K_R26:
+    return "r26";
+  case UNW_OR1K_R27:
+    return "r27";
+  case UNW_OR1K_R28:
+    return "r28";
+  case UNW_OR1K_R29:
+    return "r29";
+  case UNW_OR1K_R30:
+    return "r30";
+  case UNW_OR1K_R31:
+    return "r31";
+  case UNW_OR1K_EPCR:
+    return "EPCR";
+  default:
+    return "unknown register";
+  }
+
+}
+#endif // _LIBUNWIND_TARGET_OR1K
+
+#if defined(_LIBUNWIND_TARGET_MIPS_O32)
+/// Registers_mips_o32 holds the register state of a thread in a 32-bit MIPS
+/// process.
+class _LIBUNWIND_HIDDEN Registers_mips_o32 {
+public:
+  Registers_mips_o32();
+  Registers_mips_o32(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint32_t    getRegister(int num) const;
+  void        setRegister(int num, uint32_t value);
+  bool        validFloatRegister(int num) const;
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
+  static const char *getRegisterName(int num);
+  void        jumpto();
+  static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_MIPS; }
+  static int  getArch() { return REGISTERS_MIPS_O32; }
+
+  uint32_t  getSP() const         { return _registers.__r[29]; }
+  void      setSP(uint32_t value) { _registers.__r[29] = value; }
+  uint32_t  getIP() const         { return _registers.__pc; }
+  void      setIP(uint32_t value) { _registers.__pc = value; }
+
+private:
+  struct mips_o32_thread_state_t {
+    uint32_t __r[32];
+    uint32_t __pc;
+    uint32_t __hi;
+    uint32_t __lo;
+  };
+
+  mips_o32_thread_state_t _registers;
+#ifdef __mips_hard_float
+  /// O32 with 32-bit floating point registers only uses half of this
+  /// space.  However, using the same layout for 32-bit vs 64-bit
+  /// floating point registers results in a single context size for
+  /// O32 with hard float.
+  uint32_t _padding;
+  double _floats[32];
+#endif
+};
+
+inline Registers_mips_o32::Registers_mips_o32(const void *registers) {
+  static_assert((check_fit<Registers_mips_o32, unw_context_t>::does_fit),
+                "mips_o32 registers do not fit into unw_context_t");
+  memcpy(&_registers, static_cast<const uint8_t *>(registers),
+         sizeof(_registers));
+}
+
+inline Registers_mips_o32::Registers_mips_o32() {
+  memset(&_registers, 0, sizeof(_registers));
+}
+
+inline bool Registers_mips_o32::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum < 0)
+    return false;
+  if (regNum <= UNW_MIPS_R31)
+    return true;
+#if __mips_isa_rev != 6
+  if (regNum == UNW_MIPS_HI)
+    return true;
+  if (regNum == UNW_MIPS_LO)
+    return true;
+#endif
+#if defined(__mips_hard_float) && __mips_fpr == 32
+  if (regNum >= UNW_MIPS_F0 && regNum <= UNW_MIPS_F31)
+    return true;
+#endif
+  // FIXME: DSP accumulator registers, MSA registers
+  return false;
+}
+
+inline uint32_t Registers_mips_o32::getRegister(int regNum) const {
+  if (regNum >= UNW_MIPS_R0 && regNum <= UNW_MIPS_R31)
+    return _registers.__r[regNum - UNW_MIPS_R0];
+#if defined(__mips_hard_float) && __mips_fpr == 32
+  if (regNum >= UNW_MIPS_F0 && regNum <= UNW_MIPS_F31) {
+    uint32_t *p;
+
+    if (regNum % 2 == 0)
+      p = (uint32_t *)&_floats[regNum - UNW_MIPS_F0];
+    else
+      p = (uint32_t *)&_floats[(regNum - 1) - UNW_MIPS_F0] + 1;
+    return *p;
+  }
+#endif
+
+  switch (regNum) {
+  case UNW_REG_IP:
+    return _registers.__pc;
+  case UNW_REG_SP:
+    return _registers.__r[29];
+  case UNW_MIPS_HI:
+    return _registers.__hi;
+  case UNW_MIPS_LO:
+    return _registers.__lo;
+  }
+  _LIBUNWIND_ABORT("unsupported mips_o32 register");
+}
+
+inline void Registers_mips_o32::setRegister(int regNum, uint32_t value) {
+  if (regNum >= UNW_MIPS_R0 && regNum <= UNW_MIPS_R31) {
+    _registers.__r[regNum - UNW_MIPS_R0] = value;
+    return;
+  }
+#if defined(__mips_hard_float) && __mips_fpr == 32
+  if (regNum >= UNW_MIPS_F0 && regNum <= UNW_MIPS_F31) {
+    uint32_t *p;
+
+    if (regNum % 2 == 0)
+      p = (uint32_t *)&_floats[regNum - UNW_MIPS_F0];
+    else
+      p = (uint32_t *)&_floats[(regNum - 1) - UNW_MIPS_F0] + 1;
+    *p = value;
+    return;
+  }
+#endif
+
+  switch (regNum) {
+  case UNW_REG_IP:
+    _registers.__pc = value;
+    return;
+  case UNW_REG_SP:
+    _registers.__r[29] = value;
+    return;
+  case UNW_MIPS_HI:
+    _registers.__hi = value;
+    return;
+  case UNW_MIPS_LO:
+    _registers.__lo = value;
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported mips_o32 register");
+}
+
+inline bool Registers_mips_o32::validFloatRegister(int regNum) const {
+#if defined(__mips_hard_float) && __mips_fpr == 64
+  if (regNum >= UNW_MIPS_F0 && regNum <= UNW_MIPS_F31)
+    return true;
+#endif
+  return false;
+}
+
+inline double Registers_mips_o32::getFloatRegister(int regNum) const {
+#if defined(__mips_hard_float) && __mips_fpr == 64
+  assert(validFloatRegister(regNum));
+  return _floats[regNum - UNW_MIPS_F0];
+#else
+  _LIBUNWIND_ABORT("mips_o32 float support not implemented");
+#endif
+}
+
+inline void Registers_mips_o32::setFloatRegister(int regNum,
+                                                 double value) {
+#if defined(__mips_hard_float) && __mips_fpr == 64
+  assert(validFloatRegister(regNum));
+  _floats[regNum - UNW_MIPS_F0] = value;
+#else
+  _LIBUNWIND_ABORT("mips_o32 float support not implemented");
+#endif
+}
+
+inline bool Registers_mips_o32::validVectorRegister(int /* regNum */) const {
+  return false;
+}
+
+inline v128 Registers_mips_o32::getVectorRegister(int /* regNum */) const {
+  _LIBUNWIND_ABORT("mips_o32 vector support not implemented");
+}
+
+inline void Registers_mips_o32::setVectorRegister(int /* regNum */, v128 /* value */) {
+  _LIBUNWIND_ABORT("mips_o32 vector support not implemented");
+}
+
+inline const char *Registers_mips_o32::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_MIPS_R0:
+    return "$0";
+  case UNW_MIPS_R1:
+    return "$1";
+  case UNW_MIPS_R2:
+    return "$2";
+  case UNW_MIPS_R3:
+    return "$3";
+  case UNW_MIPS_R4:
+    return "$4";
+  case UNW_MIPS_R5:
+    return "$5";
+  case UNW_MIPS_R6:
+    return "$6";
+  case UNW_MIPS_R7:
+    return "$7";
+  case UNW_MIPS_R8:
+    return "$8";
+  case UNW_MIPS_R9:
+    return "$9";
+  case UNW_MIPS_R10:
+    return "$10";
+  case UNW_MIPS_R11:
+    return "$11";
+  case UNW_MIPS_R12:
+    return "$12";
+  case UNW_MIPS_R13:
+    return "$13";
+  case UNW_MIPS_R14:
+    return "$14";
+  case UNW_MIPS_R15:
+    return "$15";
+  case UNW_MIPS_R16:
+    return "$16";
+  case UNW_MIPS_R17:
+    return "$17";
+  case UNW_MIPS_R18:
+    return "$18";
+  case UNW_MIPS_R19:
+    return "$19";
+  case UNW_MIPS_R20:
+    return "$20";
+  case UNW_MIPS_R21:
+    return "$21";
+  case UNW_MIPS_R22:
+    return "$22";
+  case UNW_MIPS_R23:
+    return "$23";
+  case UNW_MIPS_R24:
+    return "$24";
+  case UNW_MIPS_R25:
+    return "$25";
+  case UNW_MIPS_R26:
+    return "$26";
+  case UNW_MIPS_R27:
+    return "$27";
+  case UNW_MIPS_R28:
+    return "$28";
+  case UNW_MIPS_R29:
+    return "$29";
+  case UNW_MIPS_R30:
+    return "$30";
+  case UNW_MIPS_R31:
+    return "$31";
+  case UNW_MIPS_F0:
+    return "$f0";
+  case UNW_MIPS_F1:
+    return "$f1";
+  case UNW_MIPS_F2:
+    return "$f2";
+  case UNW_MIPS_F3:
+    return "$f3";
+  case UNW_MIPS_F4:
+    return "$f4";
+  case UNW_MIPS_F5:
+    return "$f5";
+  case UNW_MIPS_F6:
+    return "$f6";
+  case UNW_MIPS_F7:
+    return "$f7";
+  case UNW_MIPS_F8:
+    return "$f8";
+  case UNW_MIPS_F9:
+    return "$f9";
+  case UNW_MIPS_F10:
+    return "$f10";
+  case UNW_MIPS_F11:
+    return "$f11";
+  case UNW_MIPS_F12:
+    return "$f12";
+  case UNW_MIPS_F13:
+    return "$f13";
+  case UNW_MIPS_F14:
+    return "$f14";
+  case UNW_MIPS_F15:
+    return "$f15";
+  case UNW_MIPS_F16:
+    return "$f16";
+  case UNW_MIPS_F17:
+    return "$f17";
+  case UNW_MIPS_F18:
+    return "$f18";
+  case UNW_MIPS_F19:
+    return "$f19";
+  case UNW_MIPS_F20:
+    return "$f20";
+  case UNW_MIPS_F21:
+    return "$f21";
+  case UNW_MIPS_F22:
+    return "$f22";
+  case UNW_MIPS_F23:
+    return "$f23";
+  case UNW_MIPS_F24:
+    return "$f24";
+  case UNW_MIPS_F25:
+    return "$f25";
+  case UNW_MIPS_F26:
+    return "$f26";
+  case UNW_MIPS_F27:
+    return "$f27";
+  case UNW_MIPS_F28:
+    return "$f28";
+  case UNW_MIPS_F29:
+    return "$f29";
+  case UNW_MIPS_F30:
+    return "$f30";
+  case UNW_MIPS_F31:
+    return "$f31";
+  case UNW_MIPS_HI:
+    return "$hi";
+  case UNW_MIPS_LO:
+    return "$lo";
+  default:
+    return "unknown register";
+  }
+}
+#endif // _LIBUNWIND_TARGET_MIPS_O32
+
+#if defined(_LIBUNWIND_TARGET_MIPS_NEWABI)
+/// Registers_mips_newabi holds the register state of a thread in a
+/// MIPS process using NEWABI (the N32 or N64 ABIs).
+class _LIBUNWIND_HIDDEN Registers_mips_newabi {
+public:
+  Registers_mips_newabi();
+  Registers_mips_newabi(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint64_t    getRegister(int num) const;
+  void        setRegister(int num, uint64_t value);
+  bool        validFloatRegister(int num) const;
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
+  static const char *getRegisterName(int num);
+  void        jumpto();
+  static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_MIPS; }
+  static int  getArch() { return REGISTERS_MIPS_NEWABI; }
+
+  uint64_t  getSP() const         { return _registers.__r[29]; }
+  void      setSP(uint64_t value) { _registers.__r[29] = value; }
+  uint64_t  getIP() const         { return _registers.__pc; }
+  void      setIP(uint64_t value) { _registers.__pc = value; }
+
+private:
+  struct mips_newabi_thread_state_t {
+    uint64_t __r[32];
+    uint64_t __pc;
+    uint64_t __hi;
+    uint64_t __lo;
+  };
+
+  mips_newabi_thread_state_t _registers;
+#ifdef __mips_hard_float
+  double _floats[32];
+#endif
+};
+
+inline Registers_mips_newabi::Registers_mips_newabi(const void *registers) {
+  static_assert((check_fit<Registers_mips_newabi, unw_context_t>::does_fit),
+                "mips_newabi registers do not fit into unw_context_t");
+  memcpy(&_registers, static_cast<const uint8_t *>(registers),
+         sizeof(_registers));
+}
+
+inline Registers_mips_newabi::Registers_mips_newabi() {
+  memset(&_registers, 0, sizeof(_registers));
+}
+
+inline bool Registers_mips_newabi::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum < 0)
+    return false;
+  if (regNum <= UNW_MIPS_R31)
+    return true;
+#if __mips_isa_rev != 6
+  if (regNum == UNW_MIPS_HI)
+    return true;
+  if (regNum == UNW_MIPS_LO)
+    return true;
+#endif
+  // FIXME: Hard float, DSP accumulator registers, MSA registers
+  return false;
+}
+
+inline uint64_t Registers_mips_newabi::getRegister(int regNum) const {
+  if (regNum >= UNW_MIPS_R0 && regNum <= UNW_MIPS_R31)
+    return _registers.__r[regNum - UNW_MIPS_R0];
+
+  switch (regNum) {
+  case UNW_REG_IP:
+    return _registers.__pc;
+  case UNW_REG_SP:
+    return _registers.__r[29];
+  case UNW_MIPS_HI:
+    return _registers.__hi;
+  case UNW_MIPS_LO:
+    return _registers.__lo;
+  }
+  _LIBUNWIND_ABORT("unsupported mips_newabi register");
+}
+
+inline void Registers_mips_newabi::setRegister(int regNum, uint64_t value) {
+  if (regNum >= UNW_MIPS_R0 && regNum <= UNW_MIPS_R31) {
+    _registers.__r[regNum - UNW_MIPS_R0] = value;
+    return;
+  }
+
+  switch (regNum) {
+  case UNW_REG_IP:
+    _registers.__pc = value;
+    return;
+  case UNW_REG_SP:
+    _registers.__r[29] = value;
+    return;
+  case UNW_MIPS_HI:
+    _registers.__hi = value;
+    return;
+  case UNW_MIPS_LO:
+    _registers.__lo = value;
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported mips_newabi register");
+}
+
+inline bool Registers_mips_newabi::validFloatRegister(int regNum) const {
+#ifdef __mips_hard_float
+  if (regNum >= UNW_MIPS_F0 && regNum <= UNW_MIPS_F31)
+    return true;
+#endif
+  return false;
+}
+
+inline double Registers_mips_newabi::getFloatRegister(int regNum) const {
+#ifdef __mips_hard_float
+  assert(validFloatRegister(regNum));
+  return _floats[regNum - UNW_MIPS_F0];
+#else
+  _LIBUNWIND_ABORT("mips_newabi float support not implemented");
+#endif
+}
+
+inline void Registers_mips_newabi::setFloatRegister(int regNum,
+                                                    double value) {
+#ifdef __mips_hard_float
+  assert(validFloatRegister(regNum));
+  _floats[regNum - UNW_MIPS_F0] = value;
+#else
+  _LIBUNWIND_ABORT("mips_newabi float support not implemented");
+#endif
+}
+
+inline bool Registers_mips_newabi::validVectorRegister(int /* regNum */) const {
+  return false;
+}
+
+inline v128 Registers_mips_newabi::getVectorRegister(int /* regNum */) const {
+  _LIBUNWIND_ABORT("mips_newabi vector support not implemented");
+}
+
+inline void Registers_mips_newabi::setVectorRegister(int /* regNum */, v128 /* value */) {
+  _LIBUNWIND_ABORT("mips_newabi vector support not implemented");
+}
+
+inline const char *Registers_mips_newabi::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_MIPS_R0:
+    return "$0";
+  case UNW_MIPS_R1:
+    return "$1";
+  case UNW_MIPS_R2:
+    return "$2";
+  case UNW_MIPS_R3:
+    return "$3";
+  case UNW_MIPS_R4:
+    return "$4";
+  case UNW_MIPS_R5:
+    return "$5";
+  case UNW_MIPS_R6:
+    return "$6";
+  case UNW_MIPS_R7:
+    return "$7";
+  case UNW_MIPS_R8:
+    return "$8";
+  case UNW_MIPS_R9:
+    return "$9";
+  case UNW_MIPS_R10:
+    return "$10";
+  case UNW_MIPS_R11:
+    return "$11";
+  case UNW_MIPS_R12:
+    return "$12";
+  case UNW_MIPS_R13:
+    return "$13";
+  case UNW_MIPS_R14:
+    return "$14";
+  case UNW_MIPS_R15:
+    return "$15";
+  case UNW_MIPS_R16:
+    return "$16";
+  case UNW_MIPS_R17:
+    return "$17";
+  case UNW_MIPS_R18:
+    return "$18";
+  case UNW_MIPS_R19:
+    return "$19";
+  case UNW_MIPS_R20:
+    return "$20";
+  case UNW_MIPS_R21:
+    return "$21";
+  case UNW_MIPS_R22:
+    return "$22";
+  case UNW_MIPS_R23:
+    return "$23";
+  case UNW_MIPS_R24:
+    return "$24";
+  case UNW_MIPS_R25:
+    return "$25";
+  case UNW_MIPS_R26:
+    return "$26";
+  case UNW_MIPS_R27:
+    return "$27";
+  case UNW_MIPS_R28:
+    return "$28";
+  case UNW_MIPS_R29:
+    return "$29";
+  case UNW_MIPS_R30:
+    return "$30";
+  case UNW_MIPS_R31:
+    return "$31";
+  case UNW_MIPS_F0:
+    return "$f0";
+  case UNW_MIPS_F1:
+    return "$f1";
+  case UNW_MIPS_F2:
+    return "$f2";
+  case UNW_MIPS_F3:
+    return "$f3";
+  case UNW_MIPS_F4:
+    return "$f4";
+  case UNW_MIPS_F5:
+    return "$f5";
+  case UNW_MIPS_F6:
+    return "$f6";
+  case UNW_MIPS_F7:
+    return "$f7";
+  case UNW_MIPS_F8:
+    return "$f8";
+  case UNW_MIPS_F9:
+    return "$f9";
+  case UNW_MIPS_F10:
+    return "$f10";
+  case UNW_MIPS_F11:
+    return "$f11";
+  case UNW_MIPS_F12:
+    return "$f12";
+  case UNW_MIPS_F13:
+    return "$f13";
+  case UNW_MIPS_F14:
+    return "$f14";
+  case UNW_MIPS_F15:
+    return "$f15";
+  case UNW_MIPS_F16:
+    return "$f16";
+  case UNW_MIPS_F17:
+    return "$f17";
+  case UNW_MIPS_F18:
+    return "$f18";
+  case UNW_MIPS_F19:
+    return "$f19";
+  case UNW_MIPS_F20:
+    return "$f20";
+  case UNW_MIPS_F21:
+    return "$f21";
+  case UNW_MIPS_F22:
+    return "$f22";
+  case UNW_MIPS_F23:
+    return "$f23";
+  case UNW_MIPS_F24:
+    return "$f24";
+  case UNW_MIPS_F25:
+    return "$f25";
+  case UNW_MIPS_F26:
+    return "$f26";
+  case UNW_MIPS_F27:
+    return "$f27";
+  case UNW_MIPS_F28:
+    return "$f28";
+  case UNW_MIPS_F29:
+    return "$f29";
+  case UNW_MIPS_F30:
+    return "$f30";
+  case UNW_MIPS_F31:
+    return "$f31";
+  case UNW_MIPS_HI:
+    return "$hi";
+  case UNW_MIPS_LO:
+    return "$lo";
+  default:
+    return "unknown register";
+  }
+}
+#endif // _LIBUNWIND_TARGET_MIPS_NEWABI
+
+#if defined(_LIBUNWIND_TARGET_SPARC)
+/// Registers_sparc holds the register state of a thread in a 32-bit Sparc
+/// process.
+class _LIBUNWIND_HIDDEN Registers_sparc {
+public:
+  Registers_sparc();
+  Registers_sparc(const void *registers);
+
+  bool        validRegister(int num) const;
+  uint32_t    getRegister(int num) const;
+  void        setRegister(int num, uint32_t value);
+  bool        validFloatRegister(int num) const;
+  double      getFloatRegister(int num) const;
+  void        setFloatRegister(int num, double value);
+  bool        validVectorRegister(int num) const;
+  v128        getVectorRegister(int num) const;
+  void        setVectorRegister(int num, v128 value);
+  static const char *getRegisterName(int num);
+  void        jumpto();
+  static int  lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_SPARC; }
+  static int  getArch() { return REGISTERS_SPARC; }
+
+  uint64_t  getSP() const         { return _registers.__regs[UNW_SPARC_O6]; }
+  void      setSP(uint32_t value) { _registers.__regs[UNW_SPARC_O6] = value; }
+  uint64_t  getIP() const         { return _registers.__regs[UNW_SPARC_O7]; }
+  void      setIP(uint32_t value) { _registers.__regs[UNW_SPARC_O7] = value; }
+
+private:
+  struct sparc_thread_state_t {
+    unsigned int __regs[32];
+  };
+
+  sparc_thread_state_t _registers;
+};
+
+inline Registers_sparc::Registers_sparc(const void *registers) {
+  static_assert((check_fit<Registers_sparc, unw_context_t>::does_fit),
+                "sparc registers do not fit into unw_context_t");
+  memcpy(&_registers, static_cast<const uint8_t *>(registers),
+         sizeof(_registers));
+}
+
+inline Registers_sparc::Registers_sparc() {
+  memset(&_registers, 0, sizeof(_registers));
+}
+
+inline bool Registers_sparc::validRegister(int regNum) const {
+  if (regNum == UNW_REG_IP)
+    return true;
+  if (regNum == UNW_REG_SP)
+    return true;
+  if (regNum < 0)
+    return false;
+  if (regNum <= UNW_SPARC_I7)
+    return true;
+  return false;
+}
+
+inline uint32_t Registers_sparc::getRegister(int regNum) const {
+  if ((UNW_SPARC_G0 <= regNum) && (regNum <= UNW_SPARC_I7)) {
+    return _registers.__regs[regNum];
+  }
+
+  switch (regNum) {
+  case UNW_REG_IP:
+    return _registers.__regs[UNW_SPARC_O7];
+  case UNW_REG_SP:
+    return _registers.__regs[UNW_SPARC_O6];
+  }
+  _LIBUNWIND_ABORT("unsupported sparc register");
+}
+
+inline void Registers_sparc::setRegister(int regNum, uint32_t value) {
+  if ((UNW_SPARC_G0 <= regNum) && (regNum <= UNW_SPARC_I7)) {
+    _registers.__regs[regNum] = value;
+    return;
+  }
+
+  switch (regNum) {
+  case UNW_REG_IP:
+    _registers.__regs[UNW_SPARC_O7] = value;
+    return;
+  case UNW_REG_SP:
+    _registers.__regs[UNW_SPARC_O6] = value;
+    return;
+  }
+  _LIBUNWIND_ABORT("unsupported sparc register");
+}
+
+inline bool Registers_sparc::validFloatRegister(int) const { return false; }
+
+inline double Registers_sparc::getFloatRegister(int) const {
+  _LIBUNWIND_ABORT("no Sparc float registers");
+}
+
+inline void Registers_sparc::setFloatRegister(int, double) {
+  _LIBUNWIND_ABORT("no Sparc float registers");
+}
+
+inline bool Registers_sparc::validVectorRegister(int) const { return false; }
+
+inline v128 Registers_sparc::getVectorRegister(int) const {
+  _LIBUNWIND_ABORT("no Sparc vector registers");
+}
+
+inline void Registers_sparc::setVectorRegister(int, v128) {
+  _LIBUNWIND_ABORT("no Sparc vector registers");
+}
+
+inline const char *Registers_sparc::getRegisterName(int regNum) {
+  switch (regNum) {
+  case UNW_REG_IP:
+    return "pc";
+  case UNW_SPARC_G0:
+    return "g0";
+  case UNW_SPARC_G1:
+    return "g1";
+  case UNW_SPARC_G2:
+    return "g2";
+  case UNW_SPARC_G3:
+    return "g3";
+  case UNW_SPARC_G4:
+    return "g4";
+  case UNW_SPARC_G5:
+    return "g5";
+  case UNW_SPARC_G6:
+    return "g6";
+  case UNW_SPARC_G7:
+    return "g7";
+  case UNW_SPARC_O0:
+    return "o0";
+  case UNW_SPARC_O1:
+    return "o1";
+  case UNW_SPARC_O2:
+    return "o2";
+  case UNW_SPARC_O3:
+    return "o3";
+  case UNW_SPARC_O4:
+    return "o4";
+  case UNW_SPARC_O5:
+    return "o5";
+  case UNW_REG_SP:
+  case UNW_SPARC_O6:
+    return "sp";
+  case UNW_SPARC_O7:
+    return "o7";
+  case UNW_SPARC_L0:
+    return "l0";
+  case UNW_SPARC_L1:
+    return "l1";
+  case UNW_SPARC_L2:
+    return "l2";
+  case UNW_SPARC_L3:
+    return "l3";
+  case UNW_SPARC_L4:
+    return "l4";
+  case UNW_SPARC_L5:
+    return "l5";
+  case UNW_SPARC_L6:
+    return "l6";
+  case UNW_SPARC_L7:
+    return "l7";
+  case UNW_SPARC_I0:
+    return "i0";
+  case UNW_SPARC_I1:
+    return "i1";
+  case UNW_SPARC_I2:
+    return "i2";
+  case UNW_SPARC_I3:
+    return "i3";
+  case UNW_SPARC_I4:
+    return "i4";
+  case UNW_SPARC_I5:
+    return "i5";
+  case UNW_SPARC_I6:
+    return "fp";
+  case UNW_SPARC_I7:
+    return "i7";
+  default:
+    return "unknown register";
+  }
+}
+#endif // _LIBUNWIND_TARGET_SPARC
+
+} // namespace libunwind
+
+#endif // __REGISTERS_HPP__
diff --git a/src/coreclr/src/nativeaot/libunwind/src/Unwind-EHABI.cpp b/src/coreclr/src/nativeaot/libunwind/src/Unwind-EHABI.cpp
new file mode 100644
index 0000000000000..3e0bb0c9de825
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/Unwind-EHABI.cpp
@@ -0,0 +1,992 @@
+//===--------------------------- Unwind-EHABI.cpp -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//  Implements ARM zero-cost C++ exceptions
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unwind-EHABI.h"
+
+#if defined(_LIBUNWIND_ARM_EHABI)
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "config.h"
+#include "libunwind.h"
+#include "libunwind_ext.h"
+#include "unwind.h"
+
+namespace {
+
+// Strange order: take words in order, but inside word, take from most to least
+// signinficant byte.
+uint8_t getByte(const uint32_t* data, size_t offset) {
+  const uint8_t* byteData = reinterpret_cast<const uint8_t*>(data);
+  return byteData[(offset & ~(size_t)0x03) + (3 - (offset & (size_t)0x03))];
+}
+
+const char* getNextWord(const char* data, uint32_t* out) {
+  *out = *reinterpret_cast<const uint32_t*>(data);
+  return data + 4;
+}
+
+const char* getNextNibble(const char* data, uint32_t* out) {
+  *out = *reinterpret_cast<const uint16_t*>(data);
+  return data + 2;
+}
+
+struct Descriptor {
+  // See # 9.2
+  typedef enum {
+    SU16 = 0, // Short descriptor, 16-bit entries
+    LU16 = 1, // Long descriptor,  16-bit entries
+    LU32 = 3, // Long descriptor,  32-bit entries
+    RESERVED0 =  4, RESERVED1 =  5, RESERVED2  = 6,  RESERVED3  =  7,
+    RESERVED4 =  8, RESERVED5 =  9, RESERVED6  = 10, RESERVED7  = 11,
+    RESERVED8 = 12, RESERVED9 = 13, RESERVED10 = 14, RESERVED11 = 15
+  } Format;
+
+  // See # 9.2
+  typedef enum {
+    CLEANUP = 0x0,
+    FUNC    = 0x1,
+    CATCH   = 0x2,
+    INVALID = 0x4
+  } Kind;
+};
+
+_Unwind_Reason_Code ProcessDescriptors(
+    _Unwind_State state,
+    _Unwind_Control_Block* ucbp,
+    struct _Unwind_Context* context,
+    Descriptor::Format format,
+    const char* descriptorStart,
+    uint32_t flags) {
+
+  // EHT is inlined in the index using compact form. No descriptors. #5
+  if (flags & 0x1)
+    return _URC_CONTINUE_UNWIND;
+
+  // TODO: We should check the state here, and determine whether we need to
+  // perform phase1 or phase2 unwinding.
+  (void)state;
+
+  const char* descriptor = descriptorStart;
+  uint32_t descriptorWord;
+  getNextWord(descriptor, &descriptorWord);
+  while (descriptorWord) {
+    // Read descriptor based on # 9.2.
+    uint32_t length;
+    uint32_t offset;
+    switch (format) {
+      case Descriptor::LU32:
+        descriptor = getNextWord(descriptor, &length);
+        descriptor = getNextWord(descriptor, &offset);
+      case Descriptor::LU16:
+        descriptor = getNextNibble(descriptor, &length);
+        descriptor = getNextNibble(descriptor, &offset);
+      default:
+        assert(false);
+        return _URC_FAILURE;
+    }
+
+    // See # 9.2 table for decoding the kind of descriptor. It's a 2-bit value.
+    Descriptor::Kind kind =
+        static_cast<Descriptor::Kind>((length & 0x1) | ((offset & 0x1) << 1));
+
+    // Clear off flag from last bit.
+    length &= ~1u;
+    offset &= ~1u;
+    uintptr_t scopeStart = ucbp->pr_cache.fnstart + offset;
+    uintptr_t scopeEnd = scopeStart + length;
+    uintptr_t pc = _Unwind_GetIP(context);
+    bool isInScope = (scopeStart <= pc) && (pc < scopeEnd);
+
+    switch (kind) {
+      case Descriptor::CLEANUP: {
+        // TODO(ajwong): Handle cleanup descriptors.
+        break;
+      }
+      case Descriptor::FUNC: {
+        // TODO(ajwong): Handle function descriptors.
+        break;
+      }
+      case Descriptor::CATCH: {
+        // Catch descriptors require gobbling one more word.
+        uint32_t landing_pad;
+        descriptor = getNextWord(descriptor, &landing_pad);
+
+        if (isInScope) {
+          // TODO(ajwong): This is only phase1 compatible logic. Implement
+          // phase2.
+          landing_pad = signExtendPrel31(landing_pad & ~0x80000000);
+          if (landing_pad == 0xffffffff) {
+            return _URC_HANDLER_FOUND;
+          } else if (landing_pad == 0xfffffffe) {
+            return _URC_FAILURE;
+          } else {
+            /*
+            bool is_reference_type = landing_pad & 0x80000000;
+            void* matched_object;
+            if (__cxxabiv1::__cxa_type_match(
+                    ucbp, reinterpret_cast<const std::type_info *>(landing_pad),
+                    is_reference_type,
+                    &matched_object) != __cxxabiv1::ctm_failed)
+                return _URC_HANDLER_FOUND;
+                */
+            _LIBUNWIND_ABORT("Type matching not implemented");
+          }
+        }
+        break;
+      }
+      default:
+        _LIBUNWIND_ABORT("Invalid descriptor kind found.");
+    }
+
+    getNextWord(descriptor, &descriptorWord);
+  }
+
+  return _URC_CONTINUE_UNWIND;
+}
+
+static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state,
+                                          _Unwind_Control_Block* ucbp,
+                                          struct _Unwind_Context* context) {
+  // Read the compact model EHT entry's header # 6.3
+  const uint32_t* unwindingData = ucbp->pr_cache.ehtp;
+  assert((*unwindingData & 0xf0000000) == 0x80000000 && "Must be a compact entry");
+  Descriptor::Format format =
+      static_cast<Descriptor::Format>((*unwindingData & 0x0f000000) >> 24);
+
+  const char *lsda =
+      reinterpret_cast<const char *>(_Unwind_GetLanguageSpecificData(context));
+
+  // Handle descriptors before unwinding so they are processed in the context
+  // of the correct stack frame.
+  _Unwind_Reason_Code result =
+      ProcessDescriptors(state, ucbp, context, format, lsda,
+                         ucbp->pr_cache.additional);
+
+  if (result != _URC_CONTINUE_UNWIND)
+    return result;
+
+  if (__unw_step(reinterpret_cast<unw_cursor_t *>(context)) != UNW_STEP_SUCCESS)
+    return _URC_FAILURE;
+  return _URC_CONTINUE_UNWIND;
+}
+
+// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_CORE /
+// _UVRSD_UINT32.
+uint32_t RegisterMask(uint8_t start, uint8_t count_minus_one) {
+  return ((1U << (count_minus_one + 1)) - 1) << start;
+}
+
+// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_VFP /
+// _UVRSD_DOUBLE.
+uint32_t RegisterRange(uint8_t start, uint8_t count_minus_one) {
+  return ((uint32_t)start << 16) | ((uint32_t)count_minus_one + 1);
+}
+
+} // end anonymous namespace
+
+/**
+ * Decodes an EHT entry.
+ *
+ * @param data Pointer to EHT.
+ * @param[out] off Offset from return value (in bytes) to begin interpretation.
+ * @param[out] len Number of bytes in unwind code.
+ * @return Pointer to beginning of unwind code.
+ */
+extern "C" const uint32_t*
+decode_eht_entry(const uint32_t* data, size_t* off, size_t* len) {
+  if ((*data & 0x80000000) == 0) {
+    // 6.2: Generic Model
+    //
+    // EHT entry is a prel31 pointing to the PR, followed by data understood
+    // only by the personality routine. Fortunately, all existing assembler
+    // implementations, including GNU assembler, LLVM integrated assembler,
+    // and ARM assembler, assume that the unwind opcodes come after the
+    // personality routine address.
+    *off = 1; // First byte is size data.
+    *len = (((data[1] >> 24) & 0xff) + 1) * 4;
+    data++; // Skip the first word, which is the prel31 offset.
+  } else {
+    // 6.3: ARM Compact Model
+    //
+    // EHT entries here correspond to the __aeabi_unwind_cpp_pr[012] PRs indeded
+    // by format:
+    Descriptor::Format format =
+        static_cast<Descriptor::Format>((*data & 0x0f000000) >> 24);
+    switch (format) {
+      case Descriptor::SU16:
+        *len = 4;
+        *off = 1;
+        break;
+      case Descriptor::LU16:
+      case Descriptor::LU32:
+        *len = 4 + 4 * ((*data & 0x00ff0000) >> 16);
+        *off = 2;
+        break;
+      default:
+        return nullptr;
+    }
+  }
+  return data;
+}
+
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_VRS_Interpret(_Unwind_Context *context, const uint32_t *data,
+                      size_t offset, size_t len) {
+  bool wrotePC = false;
+  bool finish = false;
+  while (offset < len && !finish) {
+    uint8_t byte = getByte(data, offset++);
+    if ((byte & 0x80) == 0) {
+      uint32_t sp;
+      _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp);
+      if (byte & 0x40)
+        sp -= (((uint32_t)byte & 0x3f) << 2) + 4;
+      else
+        sp += ((uint32_t)byte << 2) + 4;
+      _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp, NULL);
+    } else {
+      switch (byte & 0xf0) {
+        case 0x80: {
+          if (offset >= len)
+            return _URC_FAILURE;
+          uint32_t registers =
+              (((uint32_t)byte & 0x0f) << 12) |
+              (((uint32_t)getByte(data, offset++)) << 4);
+          if (!registers)
+            return _URC_FAILURE;
+          if (registers & (1 << 15))
+            wrotePC = true;
+          _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
+          break;
+        }
+        case 0x90: {
+          uint8_t reg = byte & 0x0f;
+          if (reg == 13 || reg == 15)
+            return _URC_FAILURE;
+          uint32_t sp;
+          _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_R0 + reg,
+                          _UVRSD_UINT32, &sp);
+          _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
+                          &sp, NULL);
+          break;
+        }
+        case 0xa0: {
+          uint32_t registers = RegisterMask(4, byte & 0x07);
+          if (byte & 0x08)
+            registers |= 1 << 14;
+          _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
+          break;
+        }
+        case 0xb0: {
+          switch (byte) {
+            case 0xb0:
+              finish = true;
+              break;
+            case 0xb1: {
+              if (offset >= len)
+                return _URC_FAILURE;
+              uint8_t registers = getByte(data, offset++);
+              if (registers & 0xf0 || !registers)
+                return _URC_FAILURE;
+              _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
+              break;
+            }
+            case 0xb2: {
+              uint32_t addend = 0;
+              uint32_t shift = 0;
+              // This decodes a uleb128 value.
+              while (true) {
+                if (offset >= len)
+                  return _URC_FAILURE;
+                uint32_t v = getByte(data, offset++);
+                addend |= (v & 0x7f) << shift;
+                if ((v & 0x80) == 0)
+                  break;
+                shift += 7;
+              }
+              uint32_t sp;
+              _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
+                              &sp);
+              sp += 0x204 + (addend << 2);
+              _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
+                              &sp, NULL);
+              break;
+            }
+            case 0xb3: {
+              uint8_t v = getByte(data, offset++);
+              _Unwind_VRS_Pop(context, _UVRSC_VFP,
+                              RegisterRange(static_cast<uint8_t>(v >> 4),
+                                            v & 0x0f), _UVRSD_VFPX);
+              break;
+            }
+            case 0xb4:
+            case 0xb5:
+            case 0xb6:
+            case 0xb7:
+              return _URC_FAILURE;
+            default:
+              _Unwind_VRS_Pop(context, _UVRSC_VFP,
+                              RegisterRange(8, byte & 0x07), _UVRSD_VFPX);
+              break;
+          }
+          break;
+        }
+        case 0xc0: {
+          switch (byte) {
+#if defined(__ARM_WMMX)
+            case 0xc0:
+            case 0xc1:
+            case 0xc2:
+            case 0xc3:
+            case 0xc4:
+            case 0xc5:
+              _Unwind_VRS_Pop(context, _UVRSC_WMMXD,
+                              RegisterRange(10, byte & 0x7), _UVRSD_DOUBLE);
+              break;
+            case 0xc6: {
+              uint8_t v = getByte(data, offset++);
+              uint8_t start = static_cast<uint8_t>(v >> 4);
+              uint8_t count_minus_one = v & 0xf;
+              if (start + count_minus_one >= 16)
+                return _URC_FAILURE;
+              _Unwind_VRS_Pop(context, _UVRSC_WMMXD,
+                              RegisterRange(start, count_minus_one),
+                              _UVRSD_DOUBLE);
+              break;
+            }
+            case 0xc7: {
+              uint8_t v = getByte(data, offset++);
+              if (!v || v & 0xf0)
+                return _URC_FAILURE;
+              _Unwind_VRS_Pop(context, _UVRSC_WMMXC, v, _UVRSD_DOUBLE);
+              break;
+            }
+#endif
+            case 0xc8:
+            case 0xc9: {
+              uint8_t v = getByte(data, offset++);
+              uint8_t start =
+                  static_cast<uint8_t>(((byte == 0xc8) ? 16 : 0) + (v >> 4));
+              uint8_t count_minus_one = v & 0xf;
+              if (start + count_minus_one >= 32)
+                return _URC_FAILURE;
+              _Unwind_VRS_Pop(context, _UVRSC_VFP,
+                              RegisterRange(start, count_minus_one),
+                              _UVRSD_DOUBLE);
+              break;
+            }
+            default:
+              return _URC_FAILURE;
+          }
+          break;
+        }
+        case 0xd0: {
+          if (byte & 0x08)
+            return _URC_FAILURE;
+          _Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(8, byte & 0x7),
+                          _UVRSD_DOUBLE);
+          break;
+        }
+        default:
+          return _URC_FAILURE;
+      }
+    }
+  }
+  if (!wrotePC) {
+    uint32_t lr;
+    _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_LR, _UVRSD_UINT32, &lr);
+    _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_IP, _UVRSD_UINT32, &lr, NULL);
+  }
+  return _URC_CONTINUE_UNWIND;
+}
+
+extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
+__aeabi_unwind_cpp_pr0(_Unwind_State state, _Unwind_Control_Block *ucbp,
+                       _Unwind_Context *context) {
+  return unwindOneFrame(state, ucbp, context);
+}
+
+extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
+__aeabi_unwind_cpp_pr1(_Unwind_State state, _Unwind_Control_Block *ucbp,
+                       _Unwind_Context *context) {
+  return unwindOneFrame(state, ucbp, context);
+}
+
+extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
+__aeabi_unwind_cpp_pr2(_Unwind_State state, _Unwind_Control_Block *ucbp,
+                       _Unwind_Context *context) {
+  return unwindOneFrame(state, ucbp, context);
+}
+
+static _Unwind_Reason_Code
+unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) {
+  // EHABI #7.3 discusses preserving the VRS in a "temporary VRS" during
+  // phase 1 and then restoring it to the "primary VRS" for phase 2. The
+  // effect is phase 2 doesn't see any of the VRS manipulations from phase 1.
+  // In this implementation, the phases don't share the VRS backing store.
+  // Instead, they are passed the original |uc| and they create a new VRS
+  // from scratch thus achieving the same effect.
+  __unw_init_local(cursor, uc);
+
+  // Walk each frame looking for a place to stop.
+  for (bool handlerNotFound = true; handlerNotFound;) {
+
+    // See if frame has code to run (has personality routine).
+    unw_proc_info_t frameInfo;
+    if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase1(ex_ojb=%p): __unw_get_proc_info "
+          "failed => _URC_FATAL_PHASE1_ERROR",
+          static_cast<void *>(exception_object));
+      return _URC_FATAL_PHASE1_ERROR;
+    }
+
+    // When tracing, print state information.
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionBuf[512];
+      const char *functionName = functionBuf;
+      unw_word_t offset;
+      if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
+                               &offset) != UNW_ESUCCESS) ||
+          (frameInfo.start_ip + offset > frameInfo.end_ip))
+        functionName = ".anonymous.";
+      unw_word_t pc;
+      __unw_get_reg(cursor, UNW_REG_IP, &pc);
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase1(ex_ojb=%p): pc=0x%" PRIxPTR ", start_ip=0x%" PRIxPTR ", func=%s, "
+          "lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR,
+          static_cast<void *>(exception_object), pc,
+          frameInfo.start_ip, functionName,
+          frameInfo.lsda, frameInfo.handler);
+    }
+
+    // If there is a personality routine, ask it if it will want to stop at
+    // this frame.
+    if (frameInfo.handler != 0) {
+      __personality_routine p =
+          (__personality_routine)(long)(frameInfo.handler);
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase1(ex_ojb=%p): calling personality function %p",
+          static_cast<void *>(exception_object),
+          reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(p)));
+      struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor);
+      exception_object->pr_cache.fnstart = frameInfo.start_ip;
+      exception_object->pr_cache.ehtp =
+          (_Unwind_EHT_Header *)frameInfo.unwind_info;
+      exception_object->pr_cache.additional = frameInfo.flags;
+      _Unwind_Reason_Code personalityResult =
+          (*p)(_US_VIRTUAL_UNWIND_FRAME, exception_object, context);
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase1(ex_ojb=%p): personality result %d start_ip %x ehtp %p "
+          "additional %x",
+          static_cast<void *>(exception_object), personalityResult,
+          exception_object->pr_cache.fnstart,
+          static_cast<void *>(exception_object->pr_cache.ehtp),
+          exception_object->pr_cache.additional);
+      switch (personalityResult) {
+      case _URC_HANDLER_FOUND:
+        // found a catch clause or locals that need destructing in this frame
+        // stop search and remember stack pointer at the frame
+        handlerNotFound = false;
+        // p should have initialized barrier_cache. EHABI #7.3.5
+        _LIBUNWIND_TRACE_UNWINDING(
+            "unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND",
+            static_cast<void *>(exception_object));
+        return _URC_NO_REASON;
+
+      case _URC_CONTINUE_UNWIND:
+        _LIBUNWIND_TRACE_UNWINDING(
+            "unwind_phase1(ex_ojb=%p): _URC_CONTINUE_UNWIND",
+            static_cast<void *>(exception_object));
+        // continue unwinding
+        break;
+
+      // EHABI #7.3.3
+      case _URC_FAILURE:
+        return _URC_FAILURE;
+
+      default:
+        // something went wrong
+        _LIBUNWIND_TRACE_UNWINDING(
+            "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR",
+            static_cast<void *>(exception_object));
+        return _URC_FATAL_PHASE1_ERROR;
+      }
+    }
+  }
+  return _URC_NO_REASON;
+}
+
+static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor,
+                                         _Unwind_Exception *exception_object,
+                                         bool resume) {
+  // See comment at the start of unwind_phase1 regarding VRS integrity.
+  __unw_init_local(cursor, uc);
+
+  _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)",
+                             static_cast<void *>(exception_object));
+  int frame_count = 0;
+
+  // Walk each frame until we reach where search phase said to stop.
+  while (true) {
+    // Ask libunwind to get next frame (skip over first which is
+    // _Unwind_RaiseException or _Unwind_Resume).
+    //
+    // Resume only ever makes sense for 1 frame.
+    _Unwind_State state =
+        resume ? _US_UNWIND_FRAME_RESUME : _US_UNWIND_FRAME_STARTING;
+    if (resume && frame_count == 1) {
+      // On a resume, first unwind the _Unwind_Resume() frame. The next frame
+      // is now the landing pad for the cleanup from a previous execution of
+      // phase2. To continue unwindingly correctly, replace VRS[15] with the
+      // IP of the frame that the previous run of phase2 installed the context
+      // for. After this, continue unwinding as if normal.
+      //
+      // See #7.4.6 for details.
+      __unw_set_reg(cursor, UNW_REG_IP,
+                    exception_object->unwinder_cache.reserved2, NULL);
+      resume = false;
+    }
+
+    // Get info about this frame.
+    unw_word_t sp;
+    unw_proc_info_t frameInfo;
+    __unw_get_reg(cursor, UNW_REG_SP, &sp);
+    if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2(ex_ojb=%p): __unw_get_proc_info "
+          "failed => _URC_FATAL_PHASE2_ERROR",
+          static_cast<void *>(exception_object));
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
+    // When tracing, print state information.
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionBuf[512];
+      const char *functionName = functionBuf;
+      unw_word_t offset;
+      if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
+                               &offset) != UNW_ESUCCESS) ||
+          (frameInfo.start_ip + offset > frameInfo.end_ip))
+        functionName = ".anonymous.";
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2(ex_ojb=%p): start_ip=0x%" PRIxPTR ", func=%s, sp=0x%" PRIxPTR ", "
+          "lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR "",
+          static_cast<void *>(exception_object), frameInfo.start_ip,
+          functionName, sp, frameInfo.lsda,
+          frameInfo.handler);
+    }
+
+    // If there is a personality routine, tell it we are unwinding.
+    if (frameInfo.handler != 0) {
+      __personality_routine p =
+          (__personality_routine)(long)(frameInfo.handler);
+      struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor);
+      // EHABI #7.2
+      exception_object->pr_cache.fnstart = frameInfo.start_ip;
+      exception_object->pr_cache.ehtp =
+          (_Unwind_EHT_Header *)frameInfo.unwind_info;
+      exception_object->pr_cache.additional = frameInfo.flags;
+      _Unwind_Reason_Code personalityResult =
+          (*p)(state, exception_object, context);
+      switch (personalityResult) {
+      case _URC_CONTINUE_UNWIND:
+        // Continue unwinding
+        _LIBUNWIND_TRACE_UNWINDING(
+            "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND",
+            static_cast<void *>(exception_object));
+        // EHABI #7.2
+        if (sp == exception_object->barrier_cache.sp) {
+          // Phase 1 said we would stop at this frame, but we did not...
+          _LIBUNWIND_ABORT("during phase1 personality function said it would "
+                           "stop here, but now in phase2 it did not stop here");
+        }
+        break;
+      case _URC_INSTALL_CONTEXT:
+        _LIBUNWIND_TRACE_UNWINDING(
+            "unwind_phase2(ex_ojb=%p): _URC_INSTALL_CONTEXT",
+            static_cast<void *>(exception_object));
+        // Personality routine says to transfer control to landing pad.
+        // We may get control back if landing pad calls _Unwind_Resume().
+        if (_LIBUNWIND_TRACING_UNWINDING) {
+          unw_word_t pc;
+          __unw_get_reg(cursor, UNW_REG_IP, &pc);
+          __unw_get_reg(cursor, UNW_REG_SP, &sp);
+          _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering "
+                                     "user code with ip=0x%" PRIxPTR ", sp=0x%" PRIxPTR,
+                                     static_cast<void *>(exception_object),
+                                     pc, sp);
+        }
+
+        {
+          // EHABI #7.4.1 says we need to preserve pc for when _Unwind_Resume
+          // is called back, to find this same frame.
+          unw_word_t pc;
+          __unw_get_reg(cursor, UNW_REG_IP, &pc);
+          exception_object->unwinder_cache.reserved2 = (uint32_t)pc;
+        }
+        __unw_resume(cursor);
+        // __unw_resume() only returns if there was an error.
+        return _URC_FATAL_PHASE2_ERROR;
+
+      // # EHABI #7.4.3
+      case _URC_FAILURE:
+        abort();
+
+      default:
+        // Personality routine returned an unknown result code.
+        _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d",
+                      personalityResult);
+        return _URC_FATAL_PHASE2_ERROR;
+      }
+    }
+    frame_count++;
+  }
+
+  // Clean up phase did not resume at the frame that the search phase
+  // said it would...
+  return _URC_FATAL_PHASE2_ERROR;
+}
+
+/// Called by __cxa_throw.  Only returns if there is a fatal error.
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_RaiseException(_Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)",
+                       static_cast<void *>(exception_object));
+  unw_context_t uc;
+  unw_cursor_t cursor;
+  __unw_getcontext(&uc);
+
+  // This field for is for compatibility with GCC to say this isn't a forced
+  // unwind. EHABI #7.2
+  exception_object->unwinder_cache.reserved1 = 0;
+
+  // phase 1: the search phase
+  _Unwind_Reason_Code phase1 = unwind_phase1(&uc, &cursor, exception_object);
+  if (phase1 != _URC_NO_REASON)
+    return phase1;
+
+  // phase 2: the clean up phase
+  return unwind_phase2(&uc, &cursor, exception_object, false);
+}
+
+_LIBUNWIND_EXPORT void _Unwind_Complete(_Unwind_Exception* exception_object) {
+  // This is to be called when exception handling completes to give us a chance
+  // to perform any housekeeping. EHABI #7.2. But we have nothing to do here.
+  (void)exception_object;
+}
+
+/// When _Unwind_RaiseException() is in phase2, it hands control
+/// to the personality function at each frame.  The personality
+/// may force a jump to a landing pad in that function, the landing
+/// pad code may then call _Unwind_Resume() to continue with the
+/// unwinding.  Note: the call to _Unwind_Resume() is from compiler
+/// generated user code.  All other _Unwind_* routines are called
+/// by the C++ runtime __cxa_* routines.
+///
+/// Note: re-throwing an exception (as opposed to continuing the unwind)
+/// is implemented by having the code call __cxa_rethrow() which
+/// in turn calls _Unwind_Resume_or_Rethrow().
+_LIBUNWIND_EXPORT void
+_Unwind_Resume(_Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)",
+                       static_cast<void *>(exception_object));
+  unw_context_t uc;
+  unw_cursor_t cursor;
+  __unw_getcontext(&uc);
+
+  // _Unwind_RaiseException on EHABI will always set the reserved1 field to 0,
+  // which is in the same position as private_1 below.
+  // TODO(ajwong): Who wronte the above? Why is it true?
+  unwind_phase2(&uc, &cursor, exception_object, true);
+
+  // Clients assume _Unwind_Resume() does not return, so all we can do is abort.
+  _LIBUNWIND_ABORT("_Unwind_Resume() can't return");
+}
+
+/// Called by personality handler during phase 2 to get LSDA for current frame.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_proc_info_t frameInfo;
+  uintptr_t result = 0;
+  if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
+    result = (uintptr_t)frameInfo.lsda;
+  _LIBUNWIND_TRACE_API(
+      "_Unwind_GetLanguageSpecificData(context=%p) => 0x%llx",
+      static_cast<void *>(context), (long long)result);
+  return result;
+}
+
+static uint64_t ValueAsBitPattern(_Unwind_VRS_DataRepresentation representation,
+                                  void* valuep) {
+  uint64_t value = 0;
+  switch (representation) {
+    case _UVRSD_UINT32:
+    case _UVRSD_FLOAT:
+      memcpy(&value, valuep, sizeof(uint32_t));
+      break;
+
+    case _UVRSD_VFPX:
+    case _UVRSD_UINT64:
+    case _UVRSD_DOUBLE:
+      memcpy(&value, valuep, sizeof(uint64_t));
+      break;
+  }
+  return value;
+}
+
+_LIBUNWIND_EXPORT _Unwind_VRS_Result
+_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+                uint32_t regno, _Unwind_VRS_DataRepresentation representation,
+                void *valuep, unw_word_t *pos) {
+  _LIBUNWIND_TRACE_API("_Unwind_VRS_Set(context=%p, regclass=%d, reg=%d, "
+                       "rep=%d, value=0x%llX)",
+                       static_cast<void *>(context), regclass, regno,
+                       representation,
+                       ValueAsBitPattern(representation, valuep));
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  switch (regclass) {
+    case _UVRSC_CORE:
+      if (representation != _UVRSD_UINT32 || regno > 15)
+        return _UVRSR_FAILED;
+      return __unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno),
+                           *(unw_word_t *)valuep,(unw_word_t *)pos) == UNW_ESUCCESS
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
+    case _UVRSC_VFP:
+      if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
+        return _UVRSR_FAILED;
+      if (representation == _UVRSD_VFPX) {
+        // Can only touch d0-15 with FSTMFDX.
+        if (regno > 15)
+          return _UVRSR_FAILED;
+        __unw_save_vfp_as_X(cursor);
+      } else {
+        if (regno > 31)
+          return _UVRSR_FAILED;
+      }
+      return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno),
+                             *(unw_fpreg_t *)valuep) == UNW_ESUCCESS
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
+#if defined(__ARM_WMMX)
+    case _UVRSC_WMMXC:
+      if (representation != _UVRSD_UINT32 || regno > 3)
+        return _UVRSR_FAILED;
+      return __unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno),
+                           *(unw_word_t *)valuep) == UNW_ESUCCESS
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
+    case _UVRSC_WMMXD:
+      if (representation != _UVRSD_DOUBLE || regno > 31)
+        return _UVRSR_FAILED;
+      return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno),
+                             *(unw_fpreg_t *)valuep) == UNW_ESUCCESS
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
+#else
+    case _UVRSC_WMMXC:
+    case _UVRSC_WMMXD:
+      break;
+#endif
+  }
+  _LIBUNWIND_ABORT("unsupported register class");
+}
+
+static _Unwind_VRS_Result
+_Unwind_VRS_Get_Internal(_Unwind_Context *context,
+                         _Unwind_VRS_RegClass regclass, uint32_t regno,
+                         _Unwind_VRS_DataRepresentation representation,
+                         void *valuep) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  switch (regclass) {
+    case _UVRSC_CORE:
+      if (representation != _UVRSD_UINT32 || regno > 15)
+        return _UVRSR_FAILED;
+      return __unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno),
+                           (unw_word_t *)valuep) == UNW_ESUCCESS
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
+    case _UVRSC_VFP:
+      if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
+        return _UVRSR_FAILED;
+      if (representation == _UVRSD_VFPX) {
+        // Can only touch d0-15 with FSTMFDX.
+        if (regno > 15)
+          return _UVRSR_FAILED;
+        __unw_save_vfp_as_X(cursor);
+      } else {
+        if (regno > 31)
+          return _UVRSR_FAILED;
+      }
+      return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno),
+                             (unw_fpreg_t *)valuep) == UNW_ESUCCESS
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
+#if defined(__ARM_WMMX)
+    case _UVRSC_WMMXC:
+      if (representation != _UVRSD_UINT32 || regno > 3)
+        return _UVRSR_FAILED;
+      return __unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno),
+                           (unw_word_t *)valuep) == UNW_ESUCCESS
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
+    case _UVRSC_WMMXD:
+      if (representation != _UVRSD_DOUBLE || regno > 31)
+        return _UVRSR_FAILED;
+      return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno),
+                             (unw_fpreg_t *)valuep) == UNW_ESUCCESS
+                 ? _UVRSR_OK
+                 : _UVRSR_FAILED;
+#else
+    case _UVRSC_WMMXC:
+    case _UVRSC_WMMXD:
+      break;
+#endif
+  }
+  _LIBUNWIND_ABORT("unsupported register class");
+}
+
+_LIBUNWIND_EXPORT _Unwind_VRS_Result
+_Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+                uint32_t regno, _Unwind_VRS_DataRepresentation representation,
+                void *valuep) {
+  _Unwind_VRS_Result result =
+      _Unwind_VRS_Get_Internal(context, regclass, regno, representation,
+                               valuep);
+  _LIBUNWIND_TRACE_API("_Unwind_VRS_Get(context=%p, regclass=%d, reg=%d, "
+                       "rep=%d, value=0x%llX, result = %d)",
+                       static_cast<void *>(context), regclass, regno,
+                       representation,
+                       ValueAsBitPattern(representation, valuep), result);
+  return result;
+}
+
+_Unwind_VRS_Result
+_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+                uint32_t discriminator,
+                _Unwind_VRS_DataRepresentation representation) {
+  _LIBUNWIND_TRACE_API("_Unwind_VRS_Pop(context=%p, regclass=%d, "
+                       "discriminator=%d, representation=%d)",
+                       static_cast<void *>(context), regclass, discriminator,
+                       representation);
+  switch (regclass) {
+    case _UVRSC_WMMXC:
+#if !defined(__ARM_WMMX)
+      break;
+#endif
+    case _UVRSC_CORE: {
+      if (representation != _UVRSD_UINT32)
+        return _UVRSR_FAILED;
+      // When popping SP from the stack, we don't want to override it from the
+      // computed new stack location. See EHABI #7.5.4 table 3.
+      bool poppedSP = false;
+      uint32_t* sp;
+      uint32_t* pos;
+      if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP,
+                          _UVRSD_UINT32, &sp) != _UVRSR_OK) {
+        return _UVRSR_FAILED;
+      }
+      for (uint32_t i = 0; i < 16; ++i) {
+        if (!(discriminator & static_cast<uint32_t>(1 << i)))
+          continue;
+        pos = sp;
+        uint32_t value = *sp++;
+        if (regclass == _UVRSC_CORE && i == 13)
+          poppedSP = true;
+        if (_Unwind_VRS_Set(context, regclass, i,
+                            _UVRSD_UINT32, &value, pos) != _UVRSR_OK) {
+          return _UVRSR_FAILED;
+        }
+      }
+      if (!poppedSP) {
+        return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP,
+                               _UVRSD_UINT32, &sp, NULL);
+      }
+      return _UVRSR_OK;
+    }
+    case _UVRSC_WMMXD:
+#if !defined(__ARM_WMMX)
+      break;
+#endif
+    case _UVRSC_VFP: {
+      if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
+        return _UVRSR_FAILED;
+      uint32_t first = discriminator >> 16;
+      uint32_t count = discriminator & 0xffff;
+      uint32_t end = first+count;
+      uint32_t* sp;
+      if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP,
+                          _UVRSD_UINT32, &sp) != _UVRSR_OK) {
+        return _UVRSR_FAILED;
+      }
+      // For _UVRSD_VFPX, we're assuming the data is stored in FSTMX "standard
+      // format 1", which is equivalent to FSTMD + a padding word.
+      for (uint32_t i = first; i < end; ++i) {
+        // SP is only 32-bit aligned so don't copy 64-bit at a time.
+        uint64_t value = *sp++;
+        value |= ((uint64_t)(*sp++)) << 32;
+        if (_Unwind_VRS_Set(context, regclass, i, representation, &value, NULL) !=
+            _UVRSR_OK)
+          return _UVRSR_FAILED;
+      }
+      if (representation == _UVRSD_VFPX)
+        ++sp;
+      return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
+                             &sp, NULL);
+    }
+  }
+  _LIBUNWIND_ABORT("unsupported register class");
+}
+
+/// Called by personality handler during phase 2 to find the start of the
+/// function.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetRegionStart(struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_proc_info_t frameInfo;
+  uintptr_t result = 0;
+  if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
+    result = (uintptr_t)frameInfo.start_ip;
+  _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%llX",
+                       static_cast<void *>(context), (long long)result);
+  return result;
+}
+
+
+/// Called by personality handler during phase 2 if a foreign exception
+// is caught.
+_LIBUNWIND_EXPORT void
+_Unwind_DeleteException(_Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)",
+                       static_cast<void *>(exception_object));
+  if (exception_object->exception_cleanup != NULL)
+    (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT,
+                                           exception_object);
+}
+
+extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
+__gnu_unwind_frame(_Unwind_Exception *exception_object,
+                   struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  if (__unw_step(cursor) != UNW_STEP_SUCCESS)
+    return _URC_FAILURE;
+  return _URC_OK;
+}
+
+#endif  // defined(_LIBUNWIND_ARM_EHABI)
diff --git a/src/coreclr/src/nativeaot/libunwind/src/Unwind-EHABI.h b/src/coreclr/src/nativeaot/libunwind/src/Unwind-EHABI.h
new file mode 100644
index 0000000000000..6897082a337f3
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/Unwind-EHABI.h
@@ -0,0 +1,50 @@
+//===------------------------- Unwind-EHABI.hpp ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __UNWIND_EHABI_H__
+#define __UNWIND_EHABI_H__
+
+#include <__libunwind_config.h>
+
+#if defined(_LIBUNWIND_ARM_EHABI)
+
+#include <stdint.h>
+#include <unwind.h>
+
+// Unable to unwind in the ARM index table (section 5 EHABI).
+#define UNW_EXIDX_CANTUNWIND 0x1
+
+static inline uint32_t signExtendPrel31(uint32_t data) {
+  return data | ((data & 0x40000000u) << 1);
+}
+
+static inline uint32_t readPrel31(const uint32_t *data) {
+  return (((uint32_t)(uintptr_t)data) + signExtendPrel31(*data));
+}
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr0(
+    _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context);
+
+extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr1(
+    _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context);
+
+extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr2(
+    _Unwind_State state, _Unwind_Control_Block *ucbp, _Unwind_Context *context);
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+#endif // defined(_LIBUNWIND_ARM_EHABI)
+
+#endif  // __UNWIND_EHABI_H__
diff --git a/src/coreclr/src/nativeaot/libunwind/src/Unwind-seh.cpp b/src/coreclr/src/nativeaot/libunwind/src/Unwind-seh.cpp
new file mode 100644
index 0000000000000..7647f2e0db0bf
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/Unwind-seh.cpp
@@ -0,0 +1,501 @@
+//===--------------------------- Unwind-seh.cpp ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  Implements SEH-based Itanium C++ exceptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "config.h"
+
+#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
+
+#include <unwind.h>
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include <windef.h>
+#include <excpt.h>
+#include <winnt.h>
+#include <ntstatus.h>
+
+#include "libunwind_ext.h"
+#include "UnwindCursor.hpp"
+
+using namespace libunwind;
+
+#define STATUS_USER_DEFINED (1u << 29)
+
+#define STATUS_GCC_MAGIC  (('G' << 16) | ('C' << 8) | 'C')
+
+#define MAKE_CUSTOM_STATUS(s, c) \
+  ((NTSTATUS)(((s) << 30) | STATUS_USER_DEFINED | (c)))
+#define MAKE_GCC_EXCEPTION(c) \
+  MAKE_CUSTOM_STATUS(STATUS_SEVERITY_SUCCESS, STATUS_GCC_MAGIC | ((c) << 24))
+
+/// SEH exception raised by libunwind when the program calls
+/// \c _Unwind_RaiseException.
+#define STATUS_GCC_THROW MAKE_GCC_EXCEPTION(0) // 0x20474343
+/// SEH exception raised by libunwind to initiate phase 2 of exception
+/// handling.
+#define STATUS_GCC_UNWIND MAKE_GCC_EXCEPTION(1) // 0x21474343
+
+/// Class of foreign exceptions based on unrecognized SEH exceptions.
+static const uint64_t kSEHExceptionClass = 0x434C4E4753454800; // CLNGSEH\0
+
+/// Exception cleanup routine used by \c _GCC_specific_handler to
+/// free foreign exceptions.
+static void seh_exc_cleanup(_Unwind_Reason_Code urc, _Unwind_Exception *exc) {
+  (void)urc;
+  if (exc->exception_class != kSEHExceptionClass)
+    _LIBUNWIND_ABORT("SEH cleanup called on non-SEH exception");
+  free(exc);
+}
+
+static int __unw_init_seh(unw_cursor_t *cursor, CONTEXT *ctx);
+static DISPATCHER_CONTEXT *__unw_seh_get_disp_ctx(unw_cursor_t *cursor);
+static void __unw_seh_set_disp_ctx(unw_cursor_t *cursor,
+                                   DISPATCHER_CONTEXT *disp);
+
+/// Common implementation of SEH-style handler functions used by Itanium-
+/// style frames.  Depending on how and why it was called, it may do one of:
+///  a) Delegate to the given Itanium-style personality function; or
+///  b) Initiate a collided unwind to halt unwinding.
+_LIBUNWIND_EXPORT EXCEPTION_DISPOSITION
+_GCC_specific_handler(PEXCEPTION_RECORD ms_exc, PVOID frame, PCONTEXT ms_ctx,
+                      DISPATCHER_CONTEXT *disp, __personality_routine pers) {
+  unw_cursor_t cursor;
+  _Unwind_Exception *exc;
+  _Unwind_Action action;
+  struct _Unwind_Context *ctx = nullptr;
+  _Unwind_Reason_Code urc;
+  uintptr_t retval, target;
+  bool ours = false;
+
+  _LIBUNWIND_TRACE_UNWINDING("_GCC_specific_handler(%#010lx(%lx), %p)",
+                             ms_exc->ExceptionCode, ms_exc->ExceptionFlags,
+                             (void *)frame);
+  if (ms_exc->ExceptionCode == STATUS_GCC_UNWIND) {
+    if (IS_TARGET_UNWIND(ms_exc->ExceptionFlags)) {
+      // Set up the upper return value (the lower one and the target PC
+      // were set in the call to RtlUnwindEx()) for the landing pad.
+#ifdef __x86_64__
+      disp->ContextRecord->Rdx = ms_exc->ExceptionInformation[3];
+#elif defined(__arm__)
+      disp->ContextRecord->R1 = ms_exc->ExceptionInformation[3];
+#elif defined(__aarch64__)
+      disp->ContextRecord->X1 = ms_exc->ExceptionInformation[3];
+#endif
+    }
+    // This is the collided unwind to the landing pad. Nothing to do.
+    return ExceptionContinueSearch;
+  }
+
+  if (ms_exc->ExceptionCode == STATUS_GCC_THROW) {
+    // This is (probably) a libunwind-controlled exception/unwind. Recover the
+    // parameters which we set below, and pass them to the personality function.
+    ours = true;
+    exc = (_Unwind_Exception *)ms_exc->ExceptionInformation[0];
+    if (!IS_UNWINDING(ms_exc->ExceptionFlags) && ms_exc->NumberParameters > 1) {
+      ctx = (struct _Unwind_Context *)ms_exc->ExceptionInformation[1];
+      action = (_Unwind_Action)ms_exc->ExceptionInformation[2];
+    }
+  } else {
+    // Foreign exception.
+    exc = (_Unwind_Exception *)malloc(sizeof(_Unwind_Exception));
+    exc->exception_class = kSEHExceptionClass;
+    exc->exception_cleanup = seh_exc_cleanup;
+    memset(exc->private_, 0, sizeof(exc->private_));
+  }
+  if (!ctx) {
+    __unw_init_seh(&cursor, disp->ContextRecord);
+    __unw_seh_set_disp_ctx(&cursor, disp);
+    __unw_set_reg(&cursor, UNW_REG_IP, disp->ControlPc - 1);
+    ctx = (struct _Unwind_Context *)&cursor;
+
+    if (!IS_UNWINDING(ms_exc->ExceptionFlags)) {
+      if (ours && ms_exc->NumberParameters > 1)
+        action =  (_Unwind_Action)(_UA_CLEANUP_PHASE | _UA_FORCE_UNWIND);
+      else
+        action = _UA_SEARCH_PHASE;
+    } else {
+      if (ours && ms_exc->ExceptionInformation[1] == (ULONG_PTR)frame)
+        action = (_Unwind_Action)(_UA_CLEANUP_PHASE | _UA_HANDLER_FRAME);
+      else
+        action = _UA_CLEANUP_PHASE;
+    }
+  }
+
+  _LIBUNWIND_TRACE_UNWINDING("_GCC_specific_handler() calling personality "
+                             "function %p(1, %d, %llx, %p, %p)",
+                             (void *)pers, action, exc->exception_class,
+                             (void *)exc, (void *)ctx);
+  urc = pers(1, action, exc->exception_class, exc, ctx);
+  _LIBUNWIND_TRACE_UNWINDING("_GCC_specific_handler() personality returned %d", urc);
+  switch (urc) {
+  case _URC_CONTINUE_UNWIND:
+    // If we're in phase 2, and the personality routine said to continue
+    // at the target frame, we're in real trouble.
+    if (action & _UA_HANDLER_FRAME)
+      _LIBUNWIND_ABORT("Personality continued unwind at the target frame!");
+    return ExceptionContinueSearch;
+  case _URC_HANDLER_FOUND:
+    // If we were called by __libunwind_seh_personality(), indicate that
+    // a handler was found; otherwise, initiate phase 2 by unwinding.
+    if (ours && ms_exc->NumberParameters > 1)
+      return 4 /* ExecptionExecuteHandler in mingw */;
+    // This should never happen in phase 2.
+    if (IS_UNWINDING(ms_exc->ExceptionFlags))
+      _LIBUNWIND_ABORT("Personality indicated exception handler in phase 2!");
+    exc->private_[1] = (ULONG_PTR)frame;
+    if (ours) {
+      ms_exc->NumberParameters = 4;
+      ms_exc->ExceptionInformation[1] = (ULONG_PTR)frame;
+    }
+    // FIXME: Indicate target frame in foreign case!
+    // phase 2: the clean up phase
+    RtlUnwindEx(frame, (PVOID)disp->ControlPc, ms_exc, exc, ms_ctx, disp->HistoryTable);
+    _LIBUNWIND_ABORT("RtlUnwindEx() failed");
+  case _URC_INSTALL_CONTEXT: {
+    // If we were called by __libunwind_seh_personality(), indicate that
+    // a handler was found; otherwise, it's time to initiate a collided
+    // unwind to the target.
+    if (ours && !IS_UNWINDING(ms_exc->ExceptionFlags) && ms_exc->NumberParameters > 1)
+      return 4 /* ExecptionExecuteHandler in mingw */;
+    // This should never happen in phase 1.
+    if (!IS_UNWINDING(ms_exc->ExceptionFlags))
+      _LIBUNWIND_ABORT("Personality installed context during phase 1!");
+#ifdef __x86_64__
+    exc->private_[2] = disp->TargetIp;
+    __unw_get_reg(&cursor, UNW_X86_64_RAX, &retval);
+    __unw_get_reg(&cursor, UNW_X86_64_RDX, &exc->private_[3]);
+#elif defined(__arm__)
+    exc->private_[2] = disp->TargetPc;
+    __unw_get_reg(&cursor, UNW_ARM_R0, &retval);
+    __unw_get_reg(&cursor, UNW_ARM_R1, &exc->private_[3]);
+#elif defined(__aarch64__)
+    exc->private_[2] = disp->TargetPc;
+    __unw_get_reg(&cursor, UNW_ARM64_X0, &retval);
+    __unw_get_reg(&cursor, UNW_ARM64_X1, &exc->private_[3]);
+#endif
+    __unw_get_reg(&cursor, UNW_REG_IP, &target);
+    ms_exc->ExceptionCode = STATUS_GCC_UNWIND;
+#ifdef __x86_64__
+    ms_exc->ExceptionInformation[2] = disp->TargetIp;
+#elif defined(__arm__) || defined(__aarch64__)
+    ms_exc->ExceptionInformation[2] = disp->TargetPc;
+#endif
+    ms_exc->ExceptionInformation[3] = exc->private_[3];
+    // Give NTRTL some scratch space to keep track of the collided unwind.
+    // Don't use the one that was passed in; we don't want to overwrite the
+    // context in the DISPATCHER_CONTEXT.
+    CONTEXT new_ctx;
+    RtlUnwindEx(frame, (PVOID)target, ms_exc, (PVOID)retval, &new_ctx, disp->HistoryTable);
+    _LIBUNWIND_ABORT("RtlUnwindEx() failed");
+  }
+  // Anything else indicates a serious problem.
+  default: return ExceptionContinueExecution;
+  }
+}
+
+/// Personality function returned by \c __unw_get_proc_info() in SEH contexts.
+/// This is a wrapper that calls the real SEH handler function, which in
+/// turn (at least, for Itanium-style frames) calls the real Itanium
+/// personality function (see \c _GCC_specific_handler()).
+extern "C" _Unwind_Reason_Code
+__libunwind_seh_personality(int version, _Unwind_Action state,
+                            uint64_t klass, _Unwind_Exception *exc,
+                            struct _Unwind_Context *context) {
+  (void)version;
+  (void)klass;
+  EXCEPTION_RECORD ms_exc;
+  bool phase2 = (state & (_UA_SEARCH_PHASE|_UA_CLEANUP_PHASE)) == _UA_CLEANUP_PHASE;
+  ms_exc.ExceptionCode = STATUS_GCC_THROW;
+  ms_exc.ExceptionFlags = 0;
+  ms_exc.NumberParameters = 3;
+  ms_exc.ExceptionInformation[0] = (ULONG_PTR)exc;
+  ms_exc.ExceptionInformation[1] = (ULONG_PTR)context;
+  ms_exc.ExceptionInformation[2] = state;
+  DISPATCHER_CONTEXT *disp_ctx =
+      __unw_seh_get_disp_ctx((unw_cursor_t *)context);
+  EXCEPTION_DISPOSITION ms_act = disp_ctx->LanguageHandler(&ms_exc,
+                                                           (PVOID)disp_ctx->EstablisherFrame,
+                                                           disp_ctx->ContextRecord,
+                                                           disp_ctx);
+  switch (ms_act) {
+  case ExceptionContinueSearch: return _URC_CONTINUE_UNWIND;
+  case 4 /*ExceptionExecuteHandler*/:
+    return phase2 ? _URC_INSTALL_CONTEXT : _URC_HANDLER_FOUND;
+  default:
+    return phase2 ? _URC_FATAL_PHASE2_ERROR : _URC_FATAL_PHASE1_ERROR;
+  }
+}
+
+static _Unwind_Reason_Code
+unwind_phase2_forced(unw_context_t *uc,
+                     _Unwind_Exception *exception_object,
+                     _Unwind_Stop_Fn stop, void *stop_parameter) {
+  unw_cursor_t cursor2;
+  __unw_init_local(&cursor2, uc);
+
+  // Walk each frame until we reach where search phase said to stop
+  while (__unw_step(&cursor2) > 0) {
+
+    // Update info about this frame.
+    unw_proc_info_t frameInfo;
+    if (__unw_get_proc_info(&cursor2, &frameInfo) != UNW_ESUCCESS) {
+      _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): __unw_step "
+                                 "failed => _URC_END_OF_STACK",
+                                 (void *)exception_object);
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
+    // When tracing, print state information.
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionBuf[512];
+      const char *functionName = functionBuf;
+      unw_word_t offset;
+      if ((__unw_get_proc_name(&cursor2, functionBuf, sizeof(functionBuf),
+                               &offset) != UNW_ESUCCESS) ||
+          (frameInfo.start_ip + offset > frameInfo.end_ip))
+        functionName = ".anonymous.";
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2_forced(ex_ojb=%p): start_ip=0x%" PRIx64
+          ", func=%s, lsda=0x%" PRIx64 ", personality=0x%" PRIx64,
+          (void *)exception_object, frameInfo.start_ip, functionName,
+          frameInfo.lsda, frameInfo.handler);
+    }
+
+    // Call stop function at each frame.
+    _Unwind_Action action =
+        (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE);
+    _Unwind_Reason_Code stopResult =
+        (*stop)(1, action, exception_object->exception_class, exception_object,
+                (struct _Unwind_Context *)(&cursor2), stop_parameter);
+    _LIBUNWIND_TRACE_UNWINDING(
+        "unwind_phase2_forced(ex_ojb=%p): stop function returned %d",
+        (void *)exception_object, stopResult);
+    if (stopResult != _URC_NO_REASON) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2_forced(ex_ojb=%p): stopped by stop function",
+          (void *)exception_object);
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
+    // If there is a personality routine, tell it we are unwinding.
+    if (frameInfo.handler != 0) {
+      __personality_routine p =
+          (__personality_routine)(intptr_t)(frameInfo.handler);
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2_forced(ex_ojb=%p): calling personality function %p",
+          (void *)exception_object, (void *)(uintptr_t)p);
+      _Unwind_Reason_Code personalityResult =
+          (*p)(1, action, exception_object->exception_class, exception_object,
+               (struct _Unwind_Context *)(&cursor2));
+      switch (personalityResult) {
+      case _URC_CONTINUE_UNWIND:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                   "personality returned "
+                                   "_URC_CONTINUE_UNWIND",
+                                   (void *)exception_object);
+        // Destructors called, continue unwinding
+        break;
+      case _URC_INSTALL_CONTEXT:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                   "personality returned "
+                                   "_URC_INSTALL_CONTEXT",
+                                   (void *)exception_object);
+        // We may get control back if landing pad calls _Unwind_Resume().
+        __unw_resume(&cursor2);
+        break;
+      default:
+        // Personality routine returned an unknown result code.
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                   "personality returned %d, "
+                                   "_URC_FATAL_PHASE2_ERROR",
+                                   (void *)exception_object, personalityResult);
+        return _URC_FATAL_PHASE2_ERROR;
+      }
+    }
+  }
+
+  // Call stop function one last time and tell it we've reached the end
+  // of the stack.
+  _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop "
+                             "function with _UA_END_OF_STACK",
+                             (void *)exception_object);
+  _Unwind_Action lastAction =
+      (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK);
+  (*stop)(1, lastAction, exception_object->exception_class, exception_object,
+          (struct _Unwind_Context *)(&cursor2), stop_parameter);
+
+  // Clean up phase did not resume at the frame that the search phase said it
+  // would.
+  return _URC_FATAL_PHASE2_ERROR;
+}
+
+/// Called by \c __cxa_throw().  Only returns if there is a fatal error.
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_RaiseException(_Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)",
+                       (void *)exception_object);
+
+  // Mark that this is a non-forced unwind, so _Unwind_Resume()
+  // can do the right thing.
+  memset(exception_object->private_, 0, sizeof(exception_object->private_));
+
+  // phase 1: the search phase
+  // We'll let the system do that for us.
+  RaiseException(STATUS_GCC_THROW, 0, 1, (ULONG_PTR *)&exception_object);
+
+  // If we get here, either something went horribly wrong or we reached the
+  // top of the stack. Either way, let libc++abi call std::terminate().
+  return _URC_END_OF_STACK;
+}
+
+/// When \c _Unwind_RaiseException() is in phase2, it hands control
+/// to the personality function at each frame.  The personality
+/// may force a jump to a landing pad in that function; the landing
+/// pad code may then call \c _Unwind_Resume() to continue with the
+/// unwinding.  Note: the call to \c _Unwind_Resume() is from compiler
+/// geneated user code.  All other \c _Unwind_* routines are called
+/// by the C++ runtime \c __cxa_* routines.
+///
+/// Note: re-throwing an exception (as opposed to continuing the unwind)
+/// is implemented by having the code call \c __cxa_rethrow() which
+/// in turn calls \c _Unwind_Resume_or_Rethrow().
+_LIBUNWIND_EXPORT void
+_Unwind_Resume(_Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)", (void *)exception_object);
+
+  if (exception_object->private_[0] != 0) {
+    unw_context_t uc;
+
+    __unw_getcontext(&uc);
+    unwind_phase2_forced(&uc, exception_object,
+                         (_Unwind_Stop_Fn) exception_object->private_[0],
+                         (void *)exception_object->private_[4]);
+  } else {
+    // Recover the parameters for the unwind from the exception object
+    // so we can start unwinding again.
+    EXCEPTION_RECORD ms_exc;
+    CONTEXT ms_ctx;
+    UNWIND_HISTORY_TABLE hist;
+
+    memset(&ms_exc, 0, sizeof(ms_exc));
+    memset(&hist, 0, sizeof(hist));
+    ms_exc.ExceptionCode = STATUS_GCC_THROW;
+    ms_exc.ExceptionFlags = EXCEPTION_NONCONTINUABLE;
+    ms_exc.NumberParameters = 4;
+    ms_exc.ExceptionInformation[0] = (ULONG_PTR)exception_object;
+    ms_exc.ExceptionInformation[1] = exception_object->private_[1];
+    ms_exc.ExceptionInformation[2] = exception_object->private_[2];
+    ms_exc.ExceptionInformation[3] = exception_object->private_[3];
+    RtlUnwindEx((PVOID)exception_object->private_[1],
+                (PVOID)exception_object->private_[2], &ms_exc,
+                exception_object, &ms_ctx, &hist);
+  }
+
+  // Clients assume _Unwind_Resume() does not return, so all we can do is abort.
+  _LIBUNWIND_ABORT("_Unwind_Resume() can't return");
+}
+
+/// Not used by C++.
+/// Unwinds stack, calling "stop" function at each frame.
+/// Could be used to implement \c longjmp().
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_ForcedUnwind(_Unwind_Exception *exception_object,
+                     _Unwind_Stop_Fn stop, void *stop_parameter) {
+  _LIBUNWIND_TRACE_API("_Unwind_ForcedUnwind(ex_obj=%p, stop=%p)",
+                       (void *)exception_object, (void *)(uintptr_t)stop);
+  unw_context_t uc;
+  __unw_getcontext(&uc);
+
+  // Mark that this is a forced unwind, so _Unwind_Resume() can do
+  // the right thing.
+  exception_object->private_[0] = (uintptr_t) stop;
+  exception_object->private_[4] = (uintptr_t) stop_parameter;
+
+  // do it
+  return unwind_phase2_forced(&uc, exception_object, stop, stop_parameter);
+}
+
+/// Called by personality handler during phase 2 to get LSDA for current frame.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) {
+  uintptr_t result =
+      (uintptr_t)__unw_seh_get_disp_ctx((unw_cursor_t *)context)->HandlerData;
+  _LIBUNWIND_TRACE_API(
+      "_Unwind_GetLanguageSpecificData(context=%p) => 0x%" PRIxPTR,
+      (void *)context, result);
+  return result;
+}
+
+/// Called by personality handler during phase 2 to find the start of the
+/// function.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetRegionStart(struct _Unwind_Context *context) {
+  DISPATCHER_CONTEXT *disp = __unw_seh_get_disp_ctx((unw_cursor_t *)context);
+  uintptr_t result = (uintptr_t)disp->FunctionEntry->BeginAddress + disp->ImageBase;
+  _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%" PRIxPTR,
+                       (void *)context, result);
+  return result;
+}
+
+static int __unw_init_seh(unw_cursor_t *cursor, CONTEXT *context) {
+#ifdef _LIBUNWIND_TARGET_X86_64
+  new (reinterpret_cast<UnwindCursor<LocalAddressSpace, Registers_x86_64> *>(cursor))
+      UnwindCursor<LocalAddressSpace, Registers_x86_64>(
+          context, LocalAddressSpace::sThisAddressSpace);
+  auto *co = reinterpret_cast<AbstractUnwindCursor *>(cursor);
+  co->setInfoBasedOnIPRegister();
+  return UNW_ESUCCESS;
+#elif defined(_LIBUNWIND_TARGET_ARM)
+  new (reinterpret_cast<UnwindCursor<LocalAddressSpace, Registers_arm> *>(cursor))
+      UnwindCursor<LocalAddressSpace, Registers_arm>(
+          context, LocalAddressSpace::sThisAddressSpace);
+  auto *co = reinterpret_cast<AbstractUnwindCursor *>(cursor);
+  co->setInfoBasedOnIPRegister();
+  return UNW_ESUCCESS;
+#elif defined(_LIBUNWIND_TARGET_AARCH64)
+  new (reinterpret_cast<UnwindCursor<LocalAddressSpace, Registers_arm64> *>(cursor))
+      UnwindCursor<LocalAddressSpace, Registers_arm64>(
+          context, LocalAddressSpace::sThisAddressSpace);
+  auto *co = reinterpret_cast<AbstractUnwindCursor *>(cursor);
+  co->setInfoBasedOnIPRegister();
+  return UNW_ESUCCESS;
+#else
+  return UNW_EINVAL;
+#endif
+}
+
+static DISPATCHER_CONTEXT *__unw_seh_get_disp_ctx(unw_cursor_t *cursor) {
+#ifdef _LIBUNWIND_TARGET_X86_64
+  return reinterpret_cast<UnwindCursor<LocalAddressSpace, Registers_x86_64> *>(cursor)->getDispatcherContext();
+#elif defined(_LIBUNWIND_TARGET_ARM)
+  return reinterpret_cast<UnwindCursor<LocalAddressSpace, Registers_arm> *>(cursor)->getDispatcherContext();
+#elif defined(_LIBUNWIND_TARGET_AARCH64)
+  return reinterpret_cast<UnwindCursor<LocalAddressSpace, Registers_arm64> *>(cursor)->getDispatcherContext();
+#else
+  return nullptr;
+#endif
+}
+
+static void __unw_seh_set_disp_ctx(unw_cursor_t *cursor,
+                                   DISPATCHER_CONTEXT *disp) {
+#ifdef _LIBUNWIND_TARGET_X86_64
+  reinterpret_cast<UnwindCursor<LocalAddressSpace, Registers_x86_64> *>(cursor)->setDispatcherContext(disp);
+#elif defined(_LIBUNWIND_TARGET_ARM)
+  reinterpret_cast<UnwindCursor<LocalAddressSpace, Registers_arm> *>(cursor)->setDispatcherContext(disp);
+#elif defined(_LIBUNWIND_TARGET_AARCH64)
+  reinterpret_cast<UnwindCursor<LocalAddressSpace, Registers_arm64> *>(cursor)->setDispatcherContext(disp);
+#endif
+}
+
+#endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
diff --git a/src/coreclr/src/nativeaot/libunwind/src/Unwind-sjlj.c b/src/coreclr/src/nativeaot/libunwind/src/Unwind-sjlj.c
new file mode 100644
index 0000000000000..b8bb7c83bdff7
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/Unwind-sjlj.c
@@ -0,0 +1,516 @@
+//===--------------------------- Unwind-sjlj.c ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//  Implements setjump-longjump based C++ exceptions
+//
+//===----------------------------------------------------------------------===//
+
+#include <unwind.h>
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "config.h"
+
+/// With SJLJ based exceptions, any function that has a catch clause or needs to
+/// do any clean up when an exception propagates through it, needs to call
+/// \c _Unwind_SjLj_Register at the start of the function and
+/// \c _Unwind_SjLj_Unregister at the end.  The register function is called with
+/// the address of a block of memory in the function's stack frame.  The runtime
+/// keeps a linked list (stack) of these blocks - one per thread.  The calling
+/// function also sets the personality and lsda fields of the block.
+
+#if defined(_LIBUNWIND_BUILD_SJLJ_APIS)
+
+struct _Unwind_FunctionContext {
+  // next function in stack of handlers
+  struct _Unwind_FunctionContext *prev;
+
+  // set by calling function before registering to be the landing pad
+  uint32_t                        resumeLocation;
+
+  // set by personality handler to be parameters passed to landing pad function
+  uint32_t                        resumeParameters[4];
+
+  // set by calling function before registering
+  __personality_routine           personality; // arm offset=24
+  uintptr_t                       lsda;        // arm offset=28
+
+  // variable length array, contains registers to restore
+  // 0 = r7, 1 = pc, 2 = sp
+  void                           *jbuf[];
+};
+
+#if defined(_LIBUNWIND_HAS_NO_THREADS)
+# define _LIBUNWIND_THREAD_LOCAL
+#else
+# if __STDC_VERSION__ >= 201112L
+#  define _LIBUNWIND_THREAD_LOCAL _Thread_local
+# elif defined(_MSC_VER)
+#  define _LIBUNWIND_THREAD_LOCAL __declspec(thread)
+# elif defined(__GNUC__) || defined(__clang__)
+#  define _LIBUNWIND_THREAD_LOCAL __thread
+# else
+#  error Unable to create thread local storage
+# endif
+#endif
+
+
+#if !defined(FOR_DYLD)
+
+#if defined(__APPLE__)
+#include <System/pthread_machdep.h>
+#else
+static _LIBUNWIND_THREAD_LOCAL struct _Unwind_FunctionContext *stack = NULL;
+#endif
+
+static struct _Unwind_FunctionContext *__Unwind_SjLj_GetTopOfFunctionStack() {
+#if defined(__APPLE__)
+  return _pthread_getspecific_direct(__PTK_LIBC_DYLD_Unwind_SjLj_Key);
+#else
+  return stack;
+#endif
+}
+
+static void
+__Unwind_SjLj_SetTopOfFunctionStack(struct _Unwind_FunctionContext *fc) {
+#if defined(__APPLE__)
+  _pthread_setspecific_direct(__PTK_LIBC_DYLD_Unwind_SjLj_Key, fc);
+#else
+  stack = fc;
+#endif
+}
+
+#endif
+
+
+/// Called at start of each function that catches exceptions
+_LIBUNWIND_EXPORT void
+_Unwind_SjLj_Register(struct _Unwind_FunctionContext *fc) {
+  fc->prev = __Unwind_SjLj_GetTopOfFunctionStack();
+  __Unwind_SjLj_SetTopOfFunctionStack(fc);
+}
+
+
+/// Called at end of each function that catches exceptions
+_LIBUNWIND_EXPORT void
+_Unwind_SjLj_Unregister(struct _Unwind_FunctionContext *fc) {
+  __Unwind_SjLj_SetTopOfFunctionStack(fc->prev);
+}
+
+
+static _Unwind_Reason_Code
+unwind_phase1(struct _Unwind_Exception *exception_object) {
+  _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack();
+  _LIBUNWIND_TRACE_UNWINDING("unwind_phase1: initial function-context=%p",
+                             (void *)c);
+
+  // walk each frame looking for a place to stop
+  for (bool handlerNotFound = true; handlerNotFound; c = c->prev) {
+
+    // check for no more frames
+    if (c == NULL) {
+      _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): reached "
+                                 "bottom => _URC_END_OF_STACK",
+                                 (void *)exception_object);
+      return _URC_END_OF_STACK;
+    }
+
+    _LIBUNWIND_TRACE_UNWINDING("unwind_phase1: function-context=%p", (void *)c);
+    // if there is a personality routine, ask it if it will want to stop at this
+    // frame
+    if (c->personality != NULL) {
+      _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): calling "
+                                 "personality function %p",
+                                 (void *)exception_object,
+                                 (void *)c->personality);
+      _Unwind_Reason_Code personalityResult = (*c->personality)(
+          1, _UA_SEARCH_PHASE, exception_object->exception_class,
+          exception_object, (struct _Unwind_Context *)c);
+      switch (personalityResult) {
+      case _URC_HANDLER_FOUND:
+        // found a catch clause or locals that need destructing in this frame
+        // stop search and remember function context
+        handlerNotFound = false;
+        exception_object->private_2 = (uintptr_t) c;
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): "
+                                   "_URC_HANDLER_FOUND",
+                                   (void *)exception_object);
+        return _URC_NO_REASON;
+
+      case _URC_CONTINUE_UNWIND:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): "
+                                   "_URC_CONTINUE_UNWIND",
+                                   (void *)exception_object);
+        // continue unwinding
+        break;
+
+      default:
+        // something went wrong
+        _LIBUNWIND_TRACE_UNWINDING(
+            "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR",
+            (void *)exception_object);
+        return _URC_FATAL_PHASE1_ERROR;
+      }
+    }
+  }
+  return _URC_NO_REASON;
+}
+
+
+static _Unwind_Reason_Code
+unwind_phase2(struct _Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)",
+                             (void *)exception_object);
+
+  // walk each frame until we reach where search phase said to stop
+  _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack();
+  while (true) {
+    _LIBUNWIND_TRACE_UNWINDING("unwind_phase2s(ex_ojb=%p): context=%p",
+                               (void *)exception_object, (void *)c);
+
+    // check for no more frames
+    if (c == NULL) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2(ex_ojb=%p): __unw_step() reached "
+          "bottom => _URC_END_OF_STACK",
+          (void *)exception_object);
+      return _URC_END_OF_STACK;
+    }
+
+    // if there is a personality routine, tell it we are unwinding
+    if (c->personality != NULL) {
+      _Unwind_Action action = _UA_CLEANUP_PHASE;
+      if ((uintptr_t) c == exception_object->private_2)
+        action = (_Unwind_Action)(
+            _UA_CLEANUP_PHASE |
+            _UA_HANDLER_FRAME); // tell personality this was the frame it marked
+                                // in phase 1
+      _Unwind_Reason_Code personalityResult =
+          (*c->personality)(1, action, exception_object->exception_class,
+                            exception_object, (struct _Unwind_Context *)c);
+      switch (personalityResult) {
+      case _URC_CONTINUE_UNWIND:
+        // continue unwinding
+        _LIBUNWIND_TRACE_UNWINDING(
+            "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND",
+            (void *)exception_object);
+        if ((uintptr_t) c == exception_object->private_2) {
+          // phase 1 said we would stop at this frame, but we did not...
+          _LIBUNWIND_ABORT("during phase1 personality function said it would "
+                           "stop here, but now if phase2 it did not stop here");
+        }
+        break;
+      case _URC_INSTALL_CONTEXT:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): "
+                                   "_URC_INSTALL_CONTEXT, will resume at "
+                                   "landing pad %p",
+                                   (void *)exception_object, c->jbuf[1]);
+        // personality routine says to transfer control to landing pad
+        // we may get control back if landing pad calls _Unwind_Resume()
+        __Unwind_SjLj_SetTopOfFunctionStack(c);
+        __builtin_longjmp(c->jbuf, 1);
+        // __unw_resume() only returns if there was an error
+        return _URC_FATAL_PHASE2_ERROR;
+      default:
+        // something went wrong
+        _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d",
+                      personalityResult);
+        return _URC_FATAL_PHASE2_ERROR;
+      }
+    }
+    c = c->prev;
+  }
+
+  // clean up phase did not resume at the frame that the search phase said it
+  // would
+  return _URC_FATAL_PHASE2_ERROR;
+}
+
+
+static _Unwind_Reason_Code
+unwind_phase2_forced(struct _Unwind_Exception *exception_object,
+                     _Unwind_Stop_Fn stop, void *stop_parameter) {
+  // walk each frame until we reach where search phase said to stop
+  _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack();
+  while (true) {
+
+    // get next frame (skip over first which is _Unwind_RaiseException)
+    if (c == NULL) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2(ex_ojb=%p): __unw_step() reached "
+          "bottom => _URC_END_OF_STACK",
+          (void *)exception_object);
+      return _URC_END_OF_STACK;
+    }
+
+    // call stop function at each frame
+    _Unwind_Action action =
+        (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE);
+    _Unwind_Reason_Code stopResult =
+        (*stop)(1, action, exception_object->exception_class, exception_object,
+                (struct _Unwind_Context *)c, stop_parameter);
+    _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                               "stop function returned %d",
+                               (void *)exception_object, stopResult);
+    if (stopResult != _URC_NO_REASON) {
+      _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                 "stopped by stop function",
+                                 (void *)exception_object);
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
+    // if there is a personality routine, tell it we are unwinding
+    if (c->personality != NULL) {
+      __personality_routine p = (__personality_routine) c->personality;
+      _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                 "calling personality function %p",
+                                 (void *)exception_object, (void *)p);
+      _Unwind_Reason_Code personalityResult =
+          (*p)(1, action, exception_object->exception_class, exception_object,
+               (struct _Unwind_Context *)c);
+      switch (personalityResult) {
+      case _URC_CONTINUE_UNWIND:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p):  "
+                                   "personality returned _URC_CONTINUE_UNWIND",
+                                   (void *)exception_object);
+        // destructors called, continue unwinding
+        break;
+      case _URC_INSTALL_CONTEXT:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                   "personality returned _URC_INSTALL_CONTEXT",
+                                   (void *)exception_object);
+        // we may get control back if landing pad calls _Unwind_Resume()
+        __Unwind_SjLj_SetTopOfFunctionStack(c);
+        __builtin_longjmp(c->jbuf, 1);
+        break;
+      default:
+        // something went wrong
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                   "personality returned %d, "
+                                   "_URC_FATAL_PHASE2_ERROR",
+                                   (void *)exception_object, personalityResult);
+        return _URC_FATAL_PHASE2_ERROR;
+      }
+    }
+    c = c->prev;
+  }
+
+  // call stop function one last time and tell it we've reached the end of the
+  // stack
+  _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop "
+                             "function with _UA_END_OF_STACK",
+                             (void *)exception_object);
+  _Unwind_Action lastAction =
+      (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK);
+  (*stop)(1, lastAction, exception_object->exception_class, exception_object,
+          (struct _Unwind_Context *)c, stop_parameter);
+
+  // clean up phase did not resume at the frame that the search phase said it
+  // would
+  return _URC_FATAL_PHASE2_ERROR;
+}
+
+
+/// Called by __cxa_throw.  Only returns if there is a fatal error
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_SjLj_RaiseException(struct _Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("_Unwind_SjLj_RaiseException(ex_obj=%p)",
+                       (void *)exception_object);
+
+  // mark that this is a non-forced unwind, so _Unwind_Resume() can do the right
+  // thing
+  exception_object->private_1 = 0;
+  exception_object->private_2 = 0;
+
+  // phase 1: the search phase
+  _Unwind_Reason_Code phase1 = unwind_phase1(exception_object);
+  if (phase1 != _URC_NO_REASON)
+    return phase1;
+
+  // phase 2: the clean up phase
+  return unwind_phase2(exception_object);
+}
+
+
+
+/// When _Unwind_RaiseException() is in phase2, it hands control
+/// to the personality function at each frame.  The personality
+/// may force a jump to a landing pad in that function, the landing
+/// pad code may then call _Unwind_Resume() to continue with the
+/// unwinding.  Note: the call to _Unwind_Resume() is from compiler
+/// geneated user code.  All other _Unwind_* routines are called
+/// by the C++ runtime __cxa_* routines.
+///
+/// Re-throwing an exception is implemented by having the code call
+/// __cxa_rethrow() which in turn calls _Unwind_Resume_or_Rethrow()
+_LIBUNWIND_EXPORT void
+_Unwind_SjLj_Resume(struct _Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("_Unwind_SjLj_Resume(ex_obj=%p)",
+                       (void *)exception_object);
+
+  if (exception_object->private_1 != 0)
+    unwind_phase2_forced(exception_object,
+                         (_Unwind_Stop_Fn) exception_object->private_1,
+                         (void *)exception_object->private_2);
+  else
+    unwind_phase2(exception_object);
+
+  // clients assume _Unwind_Resume() does not return, so all we can do is abort.
+  _LIBUNWIND_ABORT("_Unwind_SjLj_Resume() can't return");
+}
+
+
+///  Called by __cxa_rethrow().
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("__Unwind_SjLj_Resume_or_Rethrow(ex_obj=%p), "
+                       "private_1=%" PRIuPTR,
+                       (void *)exception_object, exception_object->private_1);
+  // If this is non-forced and a stopping place was found, then this is a
+  // re-throw.
+  // Call _Unwind_RaiseException() as if this was a new exception.
+  if (exception_object->private_1 == 0) {
+    return _Unwind_SjLj_RaiseException(exception_object);
+    // should return if there is no catch clause, so that __cxa_rethrow can call
+    // std::terminate()
+  }
+
+  // Call through to _Unwind_Resume() which distiguishes between forced and
+  // regular exceptions.
+  _Unwind_SjLj_Resume(exception_object);
+  _LIBUNWIND_ABORT("__Unwind_SjLj_Resume_or_Rethrow() called "
+                    "_Unwind_SjLj_Resume() which unexpectedly returned");
+}
+
+
+/// Called by personality handler during phase 2 to get LSDA for current frame.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) {
+  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+  _LIBUNWIND_TRACE_API("_Unwind_GetLanguageSpecificData(context=%p) "
+                       "=> 0x%" PRIuPTR,
+                       (void *)context, ufc->lsda);
+  return ufc->lsda;
+}
+
+
+/// Called by personality handler during phase 2 to get register values.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetGR(struct _Unwind_Context *context,
+                                          int index) {
+  _LIBUNWIND_TRACE_API("_Unwind_GetGR(context=%p, reg=%d)", (void *)context,
+                       index);
+  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+  return ufc->resumeParameters[index];
+}
+
+
+/// Called by personality handler during phase 2 to alter register values.
+_LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index,
+                                     uintptr_t new_value) {
+  _LIBUNWIND_TRACE_API("_Unwind_SetGR(context=%p, reg=%d, value=0x%" PRIuPTR
+                       ")",
+                       (void *)context, index, new_value);
+  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+  ufc->resumeParameters[index] = new_value;
+}
+
+
+/// Called by personality handler during phase 2 to get instruction pointer.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) {
+  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+  _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%" PRIu32,
+                       (void *)context, ufc->resumeLocation + 1);
+  return ufc->resumeLocation + 1;
+}
+
+
+/// Called by personality handler during phase 2 to get instruction pointer.
+/// ipBefore is a boolean that says if IP is already adjusted to be the call
+/// site address.  Normally IP is the return address.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context,
+                                              int *ipBefore) {
+  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+  *ipBefore = 0;
+  _LIBUNWIND_TRACE_API("_Unwind_GetIPInfo(context=%p, %p) => 0x%" PRIu32,
+                       (void *)context, (void *)ipBefore,
+                       ufc->resumeLocation + 1);
+  return ufc->resumeLocation + 1;
+}
+
+
+/// Called by personality handler during phase 2 to alter instruction pointer.
+_LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context,
+                                     uintptr_t new_value) {
+  _LIBUNWIND_TRACE_API("_Unwind_SetIP(context=%p, value=0x%" PRIuPTR ")",
+                       (void *)context, new_value);
+  _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+  ufc->resumeLocation = new_value - 1;
+}
+
+
+/// Called by personality handler during phase 2 to find the start of the
+/// function.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetRegionStart(struct _Unwind_Context *context) {
+  // Not supported or needed for sjlj based unwinding
+  (void)context;
+  _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p)", (void *)context);
+  return 0;
+}
+
+
+/// Called by personality handler during phase 2 if a foreign exception
+/// is caught.
+_LIBUNWIND_EXPORT void
+_Unwind_DeleteException(struct _Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)",
+                       (void *)exception_object);
+  if (exception_object->exception_cleanup != NULL)
+    (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT,
+                                           exception_object);
+}
+
+
+
+/// Called by personality handler during phase 2 to get base address for data
+/// relative encodings.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetDataRelBase(struct _Unwind_Context *context) {
+  // Not supported or needed for sjlj based unwinding
+  (void)context;
+  _LIBUNWIND_TRACE_API("_Unwind_GetDataRelBase(context=%p)", (void *)context);
+  _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented");
+}
+
+
+/// Called by personality handler during phase 2 to get base address for text
+/// relative encodings.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetTextRelBase(struct _Unwind_Context *context) {
+  // Not supported or needed for sjlj based unwinding
+  (void)context;
+  _LIBUNWIND_TRACE_API("_Unwind_GetTextRelBase(context=%p)", (void *)context);
+  _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented");
+}
+
+
+/// Called by personality handler to get "Call Frame Area" for current frame.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) {
+  _LIBUNWIND_TRACE_API("_Unwind_GetCFA(context=%p)", (void *)context);
+  if (context != NULL) {
+    _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context;
+    // Setjmp/longjmp based exceptions don't have a true CFA.
+    // Instead, the SP in the jmpbuf is the closest approximation.
+    return (uintptr_t) ufc->jbuf[2];
+  }
+  return 0;
+}
+
+#endif // defined(_LIBUNWIND_BUILD_SJLJ_APIS)
diff --git a/src/coreclr/src/nativeaot/libunwind/src/UnwindCursor.hpp b/src/coreclr/src/nativeaot/libunwind/src/UnwindCursor.hpp
new file mode 100644
index 0000000000000..ae5cbe7479e6f
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/UnwindCursor.hpp
@@ -0,0 +1,2026 @@
+//===------------------------- UnwindCursor.hpp ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// C++ interface to lower levels of libunwind
+//===----------------------------------------------------------------------===//
+
+#ifndef __UNWINDCURSOR_HPP__
+#define __UNWINDCURSOR_HPP__
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unwind.h>
+
+#ifdef _WIN32
+  #include <windows.h>
+  #include <ntverp.h>
+#endif
+#ifdef __APPLE__
+  #include <mach-o/dyld.h>
+#endif
+
+#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
+// Provide a definition for the DISPATCHER_CONTEXT struct for old (Win7 and
+// earlier) SDKs.
+// MinGW-w64 has always provided this struct.
+  #if defined(_WIN32) && defined(_LIBUNWIND_TARGET_X86_64) && \
+      !defined(__MINGW32__) && VER_PRODUCTBUILD < 8000
+struct _DISPATCHER_CONTEXT {
+  ULONG64 ControlPc;
+  ULONG64 ImageBase;
+  PRUNTIME_FUNCTION FunctionEntry;
+  ULONG64 EstablisherFrame;
+  ULONG64 TargetIp;
+  PCONTEXT ContextRecord;
+  PEXCEPTION_ROUTINE LanguageHandler;
+  PVOID HandlerData;
+  PUNWIND_HISTORY_TABLE HistoryTable;
+  ULONG ScopeIndex;
+  ULONG Fill0;
+};
+  #endif
+
+struct UNWIND_INFO {
+  uint8_t Version : 3;
+  uint8_t Flags : 5;
+  uint8_t SizeOfProlog;
+  uint8_t CountOfCodes;
+  uint8_t FrameRegister : 4;
+  uint8_t FrameOffset : 4;
+  uint16_t UnwindCodes[2];
+};
+
+extern "C" _Unwind_Reason_Code __libunwind_seh_personality(
+    int, _Unwind_Action, uint64_t, _Unwind_Exception *,
+    struct _Unwind_Context *);
+
+#endif
+
+#include "config.h"
+
+#include "AddressSpace.hpp"
+#include "CompactUnwinder.hpp"
+#include "config.h"
+#include "DwarfInstructions.hpp"
+#include "EHHeaderParser.hpp"
+#include "libunwind.h"
+#include "Registers.hpp"
+#include "RWMutex.hpp"
+#include "Unwind-EHABI.h"
+
+namespace libunwind {
+
+#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+/// Cache of recently found FDEs.
+template <typename A>
+class _LIBUNWIND_HIDDEN DwarfFDECache {
+  typedef typename A::pint_t pint_t;
+public:
+  static pint_t findFDE(pint_t mh, pint_t pc);
+  static void add(pint_t mh, pint_t ip_start, pint_t ip_end, pint_t fde);
+  static void removeAllIn(pint_t mh);
+  static void iterateCacheEntries(void (*func)(unw_word_t ip_start,
+                                               unw_word_t ip_end,
+                                               unw_word_t fde, unw_word_t mh));
+
+private:
+
+  struct entry {
+    pint_t mh;
+    pint_t ip_start;
+    pint_t ip_end;
+    pint_t fde;
+  };
+
+  // These fields are all static to avoid needing an initializer.
+  // There is only one instance of this class per process.
+  static RWMutex _lock;
+#ifdef __APPLE__
+  static void dyldUnloadHook(const struct mach_header *mh, intptr_t slide);
+  static bool _registeredForDyldUnloads;
+#endif
+  static entry *_buffer;
+  static entry *_bufferUsed;
+  static entry *_bufferEnd;
+  static entry _initialBuffer[64];
+};
+
+template <typename A>
+typename DwarfFDECache<A>::entry *
+DwarfFDECache<A>::_buffer = _initialBuffer;
+
+template <typename A>
+typename DwarfFDECache<A>::entry *
+DwarfFDECache<A>::_bufferUsed = _initialBuffer;
+
+template <typename A>
+typename DwarfFDECache<A>::entry *
+DwarfFDECache<A>::_bufferEnd = &_initialBuffer[64];
+
+template <typename A>
+typename DwarfFDECache<A>::entry DwarfFDECache<A>::_initialBuffer[64];
+
+template <typename A>
+RWMutex DwarfFDECache<A>::_lock;
+
+#ifdef __APPLE__
+template <typename A>
+bool DwarfFDECache<A>::_registeredForDyldUnloads = false;
+#endif
+
+template <typename A>
+typename A::pint_t DwarfFDECache<A>::findFDE(pint_t mh, pint_t pc) {
+  pint_t result = 0;
+  _LIBUNWIND_LOG_IF_FALSE(_lock.lock_shared());
+  for (entry *p = _buffer; p < _bufferUsed; ++p) {
+    if ((mh == p->mh) || (mh == 0)) {
+      if ((p->ip_start <= pc) && (pc < p->ip_end)) {
+        result = p->fde;
+        break;
+      }
+    }
+  }
+  _LIBUNWIND_LOG_IF_FALSE(_lock.unlock_shared());
+  return result;
+}
+
+template <typename A>
+void DwarfFDECache<A>::add(pint_t mh, pint_t ip_start, pint_t ip_end,
+                           pint_t fde) {
+#if !defined(_LIBUNWIND_NO_HEAP)
+  _LIBUNWIND_LOG_IF_FALSE(_lock.lock());
+  if (_bufferUsed >= _bufferEnd) {
+    size_t oldSize = (size_t)(_bufferEnd - _buffer);
+    size_t newSize = oldSize * 4;
+    // Can't use operator new (we are below it).
+    entry *newBuffer = (entry *)malloc(newSize * sizeof(entry));
+    memcpy(newBuffer, _buffer, oldSize * sizeof(entry));
+    if (_buffer != _initialBuffer)
+      free(_buffer);
+    _buffer = newBuffer;
+    _bufferUsed = &newBuffer[oldSize];
+    _bufferEnd = &newBuffer[newSize];
+  }
+  _bufferUsed->mh = mh;
+  _bufferUsed->ip_start = ip_start;
+  _bufferUsed->ip_end = ip_end;
+  _bufferUsed->fde = fde;
+  ++_bufferUsed;
+#ifdef __APPLE__
+  if (!_registeredForDyldUnloads) {
+    _dyld_register_func_for_remove_image(&dyldUnloadHook);
+    _registeredForDyldUnloads = true;
+  }
+#endif
+  _LIBUNWIND_LOG_IF_FALSE(_lock.unlock());
+#endif
+}
+
+template <typename A>
+void DwarfFDECache<A>::removeAllIn(pint_t mh) {
+  _LIBUNWIND_LOG_IF_FALSE(_lock.lock());
+  entry *d = _buffer;
+  for (const entry *s = _buffer; s < _bufferUsed; ++s) {
+    if (s->mh != mh) {
+      if (d != s)
+        *d = *s;
+      ++d;
+    }
+  }
+  _bufferUsed = d;
+  _LIBUNWIND_LOG_IF_FALSE(_lock.unlock());
+}
+
+#ifdef __APPLE__
+template <typename A>
+void DwarfFDECache<A>::dyldUnloadHook(const struct mach_header *mh, intptr_t ) {
+  removeAllIn((pint_t) mh);
+}
+#endif
+
+template <typename A>
+void DwarfFDECache<A>::iterateCacheEntries(void (*func)(
+    unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) {
+  _LIBUNWIND_LOG_IF_FALSE(_lock.lock());
+  for (entry *p = _buffer; p < _bufferUsed; ++p) {
+    (*func)(p->ip_start, p->ip_end, p->fde, p->mh);
+  }
+  _LIBUNWIND_LOG_IF_FALSE(_lock.unlock());
+}
+#endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+
+
+#define arrayoffsetof(type, index, field) ((size_t)(&((type *)0)[index].field))
+
+#if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
+template <typename A> class UnwindSectionHeader {
+public:
+  UnwindSectionHeader(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t version() const {
+    return _addressSpace.get32(_addr +
+                               offsetof(unwind_info_section_header, version));
+  }
+  uint32_t commonEncodingsArraySectionOffset() const {
+    return _addressSpace.get32(_addr +
+                               offsetof(unwind_info_section_header,
+                                        commonEncodingsArraySectionOffset));
+  }
+  uint32_t commonEncodingsArrayCount() const {
+    return _addressSpace.get32(_addr + offsetof(unwind_info_section_header,
+                                                commonEncodingsArrayCount));
+  }
+  uint32_t personalityArraySectionOffset() const {
+    return _addressSpace.get32(_addr + offsetof(unwind_info_section_header,
+                                                personalityArraySectionOffset));
+  }
+  uint32_t personalityArrayCount() const {
+    return _addressSpace.get32(
+        _addr + offsetof(unwind_info_section_header, personalityArrayCount));
+  }
+  uint32_t indexSectionOffset() const {
+    return _addressSpace.get32(
+        _addr + offsetof(unwind_info_section_header, indexSectionOffset));
+  }
+  uint32_t indexCount() const {
+    return _addressSpace.get32(
+        _addr + offsetof(unwind_info_section_header, indexCount));
+  }
+
+private:
+  A                     &_addressSpace;
+  typename A::pint_t     _addr;
+};
+
+template <typename A> class UnwindSectionIndexArray {
+public:
+  UnwindSectionIndexArray(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t functionOffset(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr + arrayoffsetof(unwind_info_section_header_index_entry, index,
+                              functionOffset));
+  }
+  uint32_t secondLevelPagesSectionOffset(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr + arrayoffsetof(unwind_info_section_header_index_entry, index,
+                              secondLevelPagesSectionOffset));
+  }
+  uint32_t lsdaIndexArraySectionOffset(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr + arrayoffsetof(unwind_info_section_header_index_entry, index,
+                              lsdaIndexArraySectionOffset));
+  }
+
+private:
+  A                   &_addressSpace;
+  typename A::pint_t   _addr;
+};
+
+template <typename A> class UnwindSectionRegularPageHeader {
+public:
+  UnwindSectionRegularPageHeader(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t kind() const {
+    return _addressSpace.get32(
+        _addr + offsetof(unwind_info_regular_second_level_page_header, kind));
+  }
+  uint16_t entryPageOffset() const {
+    return _addressSpace.get16(
+        _addr + offsetof(unwind_info_regular_second_level_page_header,
+                         entryPageOffset));
+  }
+  uint16_t entryCount() const {
+    return _addressSpace.get16(
+        _addr +
+        offsetof(unwind_info_regular_second_level_page_header, entryCount));
+  }
+
+private:
+  A &_addressSpace;
+  typename A::pint_t _addr;
+};
+
+template <typename A> class UnwindSectionRegularArray {
+public:
+  UnwindSectionRegularArray(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t functionOffset(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr + arrayoffsetof(unwind_info_regular_second_level_entry, index,
+                              functionOffset));
+  }
+  uint32_t encoding(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr +
+        arrayoffsetof(unwind_info_regular_second_level_entry, index, encoding));
+  }
+
+private:
+  A &_addressSpace;
+  typename A::pint_t _addr;
+};
+
+template <typename A> class UnwindSectionCompressedPageHeader {
+public:
+  UnwindSectionCompressedPageHeader(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t kind() const {
+    return _addressSpace.get32(
+        _addr +
+        offsetof(unwind_info_compressed_second_level_page_header, kind));
+  }
+  uint16_t entryPageOffset() const {
+    return _addressSpace.get16(
+        _addr + offsetof(unwind_info_compressed_second_level_page_header,
+                         entryPageOffset));
+  }
+  uint16_t entryCount() const {
+    return _addressSpace.get16(
+        _addr +
+        offsetof(unwind_info_compressed_second_level_page_header, entryCount));
+  }
+  uint16_t encodingsPageOffset() const {
+    return _addressSpace.get16(
+        _addr + offsetof(unwind_info_compressed_second_level_page_header,
+                         encodingsPageOffset));
+  }
+  uint16_t encodingsCount() const {
+    return _addressSpace.get16(
+        _addr + offsetof(unwind_info_compressed_second_level_page_header,
+                         encodingsCount));
+  }
+
+private:
+  A &_addressSpace;
+  typename A::pint_t _addr;
+};
+
+template <typename A> class UnwindSectionCompressedArray {
+public:
+  UnwindSectionCompressedArray(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t functionOffset(uint32_t index) const {
+    return UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(
+        _addressSpace.get32(_addr + index * sizeof(uint32_t)));
+  }
+  uint16_t encodingIndex(uint32_t index) const {
+    return UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(
+        _addressSpace.get32(_addr + index * sizeof(uint32_t)));
+  }
+
+private:
+  A &_addressSpace;
+  typename A::pint_t _addr;
+};
+
+template <typename A> class UnwindSectionLsdaArray {
+public:
+  UnwindSectionLsdaArray(A &addressSpace, typename A::pint_t addr)
+      : _addressSpace(addressSpace), _addr(addr) {}
+
+  uint32_t functionOffset(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr + arrayoffsetof(unwind_info_section_header_lsda_index_entry,
+                              index, functionOffset));
+  }
+  uint32_t lsdaOffset(uint32_t index) const {
+    return _addressSpace.get32(
+        _addr + arrayoffsetof(unwind_info_section_header_lsda_index_entry,
+                              index, lsdaOffset));
+  }
+
+private:
+  A                   &_addressSpace;
+  typename A::pint_t   _addr;
+};
+#endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
+
+class _LIBUNWIND_HIDDEN AbstractUnwindCursor {
+public:
+  // NOTE: provide a class specific placement deallocation function (S5.3.4 p20)
+  // This avoids an unnecessary dependency to libc++abi.
+  void operator delete(void *, size_t) {}
+
+  virtual ~AbstractUnwindCursor() {}
+  virtual bool validReg(int) { _LIBUNWIND_ABORT("validReg not implemented"); }
+  virtual unw_word_t getReg(int) { _LIBUNWIND_ABORT("getReg not implemented"); }
+  virtual void setReg(int, unw_word_t, unw_word_t) {
+    _LIBUNWIND_ABORT("setReg not implemented");
+  }
+  virtual unw_word_t getRegLocation(int) { 
+    _LIBUNWIND_ABORT("getRegLocation not implemented");
+  }
+  virtual bool validFloatReg(int) {
+    _LIBUNWIND_ABORT("validFloatReg not implemented");
+  }
+  virtual unw_fpreg_t getFloatReg(int) {
+    _LIBUNWIND_ABORT("getFloatReg not implemented");
+  }
+  virtual void setFloatReg(int, unw_fpreg_t) {
+    _LIBUNWIND_ABORT("setFloatReg not implemented");
+  }
+  virtual int step() { _LIBUNWIND_ABORT("step not implemented"); }
+  virtual void getInfo(unw_proc_info_t *) {
+    _LIBUNWIND_ABORT("getInfo not implemented");
+  }
+  virtual void jumpto() { _LIBUNWIND_ABORT("jumpto not implemented"); }
+  virtual bool isSignalFrame() {
+    _LIBUNWIND_ABORT("isSignalFrame not implemented");
+  }
+  virtual bool getFunctionName(char *, size_t, unw_word_t *) {
+    _LIBUNWIND_ABORT("getFunctionName not implemented");
+  }
+  virtual void setInfoBasedOnIPRegister(bool = false) {
+    _LIBUNWIND_ABORT("setInfoBasedOnIPRegister not implemented");
+  }
+  virtual const char *getRegisterName(int) {
+    _LIBUNWIND_ABORT("getRegisterName not implemented");
+  }
+#ifdef __arm__
+  virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); }
+#endif
+};
+
+#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)
+
+/// \c UnwindCursor contains all state (including all register values) during
+/// an unwind.  This is normally stack-allocated inside a unw_cursor_t.
+template <typename A, typename R>
+class UnwindCursor : public AbstractUnwindCursor {
+  typedef typename A::pint_t pint_t;
+public:
+                      UnwindCursor(unw_context_t *context, A &as);
+                      UnwindCursor(CONTEXT *context, A &as);
+                      UnwindCursor(A &as, void *threadArg);
+  virtual             ~UnwindCursor() {}
+  virtual bool        validReg(int);
+  virtual unw_word_t  getReg(int);
+  virtual void        setReg(int, unw_word_t);
+  virtual bool        validFloatReg(int);
+  virtual unw_fpreg_t getFloatReg(int);
+  virtual void        setFloatReg(int, unw_fpreg_t);
+  virtual int         step();
+  virtual void        getInfo(unw_proc_info_t *);
+  virtual void        jumpto();
+  virtual bool        isSignalFrame();
+  virtual bool        getFunctionName(char *buf, size_t len, unw_word_t *off);
+  virtual void        setInfoBasedOnIPRegister(bool isReturnAddress = false);
+  virtual const char *getRegisterName(int num);
+#ifdef __arm__
+  virtual void        saveVFPAsX();
+#endif
+
+  DISPATCHER_CONTEXT *getDispatcherContext() { return &_dispContext; }
+  void setDispatcherContext(DISPATCHER_CONTEXT *disp) { _dispContext = *disp; }
+
+  // libunwind does not and should not depend on C++ library which means that we
+  // need our own defition of inline placement new.
+  static void *operator new(size_t, UnwindCursor<A, R> *p) { return p; }
+
+private:
+
+  pint_t getLastPC() const { return _dispContext.ControlPc; }
+  void setLastPC(pint_t pc) { _dispContext.ControlPc = pc; }
+  RUNTIME_FUNCTION *lookUpSEHUnwindInfo(pint_t pc, pint_t *base) {
+    _dispContext.FunctionEntry = RtlLookupFunctionEntry(pc,
+                                                        &_dispContext.ImageBase,
+                                                        _dispContext.HistoryTable);
+    *base = _dispContext.ImageBase;
+    return _dispContext.FunctionEntry;
+  }
+  bool getInfoFromSEH(pint_t pc);
+  int stepWithSEHData() {
+    _dispContext.LanguageHandler = RtlVirtualUnwind(UNW_FLAG_UHANDLER,
+                                                    _dispContext.ImageBase,
+                                                    _dispContext.ControlPc,
+                                                    _dispContext.FunctionEntry,
+                                                    _dispContext.ContextRecord,
+                                                    &_dispContext.HandlerData,
+                                                    &_dispContext.EstablisherFrame,
+                                                    NULL);
+    // Update some fields of the unwind info now, since we have them.
+    _info.lsda = reinterpret_cast<unw_word_t>(_dispContext.HandlerData);
+    if (_dispContext.LanguageHandler) {
+      _info.handler = reinterpret_cast<unw_word_t>(__libunwind_seh_personality);
+    } else
+      _info.handler = 0;
+    return UNW_STEP_SUCCESS;
+  }
+
+  A                   &_addressSpace;
+  unw_proc_info_t      _info;
+  DISPATCHER_CONTEXT   _dispContext;
+  CONTEXT              _msContext;
+  UNWIND_HISTORY_TABLE _histTable;
+  bool                 _unwindInfoMissing;
+};
+
+
+template <typename A, typename R>
+UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as)
+    : _addressSpace(as), _unwindInfoMissing(false) {
+  static_assert((check_fit<UnwindCursor<A, R>, unw_cursor_t>::does_fit),
+                "UnwindCursor<> does not fit in unw_cursor_t");
+  memset(&_info, 0, sizeof(_info));
+  memset(&_histTable, 0, sizeof(_histTable));
+  _dispContext.ContextRecord = &_msContext;
+  _dispContext.HistoryTable = &_histTable;
+  // Initialize MS context from ours.
+  R r(context);
+  _msContext.ContextFlags = CONTEXT_CONTROL|CONTEXT_INTEGER|CONTEXT_FLOATING_POINT;
+#if defined(_LIBUNWIND_TARGET_X86_64)
+  _msContext.Rax = r.getRegister(UNW_X86_64_RAX);
+  _msContext.Rcx = r.getRegister(UNW_X86_64_RCX);
+  _msContext.Rdx = r.getRegister(UNW_X86_64_RDX);
+  _msContext.Rbx = r.getRegister(UNW_X86_64_RBX);
+  _msContext.Rsp = r.getRegister(UNW_X86_64_RSP);
+  _msContext.Rbp = r.getRegister(UNW_X86_64_RBP);
+  _msContext.Rsi = r.getRegister(UNW_X86_64_RSI);
+  _msContext.Rdi = r.getRegister(UNW_X86_64_RDI);
+  _msContext.R8 = r.getRegister(UNW_X86_64_R8);
+  _msContext.R9 = r.getRegister(UNW_X86_64_R9);
+  _msContext.R10 = r.getRegister(UNW_X86_64_R10);
+  _msContext.R11 = r.getRegister(UNW_X86_64_R11);
+  _msContext.R12 = r.getRegister(UNW_X86_64_R12);
+  _msContext.R13 = r.getRegister(UNW_X86_64_R13);
+  _msContext.R14 = r.getRegister(UNW_X86_64_R14);
+  _msContext.R15 = r.getRegister(UNW_X86_64_R15);
+  _msContext.Rip = r.getRegister(UNW_REG_IP);
+  union {
+    v128 v;
+    M128A m;
+  } t;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM0);
+  _msContext.Xmm0 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM1);
+  _msContext.Xmm1 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM2);
+  _msContext.Xmm2 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM3);
+  _msContext.Xmm3 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM4);
+  _msContext.Xmm4 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM5);
+  _msContext.Xmm5 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM6);
+  _msContext.Xmm6 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM7);
+  _msContext.Xmm7 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM8);
+  _msContext.Xmm8 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM9);
+  _msContext.Xmm9 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM10);
+  _msContext.Xmm10 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM11);
+  _msContext.Xmm11 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM12);
+  _msContext.Xmm12 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM13);
+  _msContext.Xmm13 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM14);
+  _msContext.Xmm14 = t.m;
+  t.v = r.getVectorRegister(UNW_X86_64_XMM15);
+  _msContext.Xmm15 = t.m;
+#elif defined(_LIBUNWIND_TARGET_ARM)
+  _msContext.R0 = r.getRegister(UNW_ARM_R0);
+  _msContext.R1 = r.getRegister(UNW_ARM_R1);
+  _msContext.R2 = r.getRegister(UNW_ARM_R2);
+  _msContext.R3 = r.getRegister(UNW_ARM_R3);
+  _msContext.R4 = r.getRegister(UNW_ARM_R4);
+  _msContext.R5 = r.getRegister(UNW_ARM_R5);
+  _msContext.R6 = r.getRegister(UNW_ARM_R6);
+  _msContext.R7 = r.getRegister(UNW_ARM_R7);
+  _msContext.R8 = r.getRegister(UNW_ARM_R8);
+  _msContext.R9 = r.getRegister(UNW_ARM_R9);
+  _msContext.R10 = r.getRegister(UNW_ARM_R10);
+  _msContext.R11 = r.getRegister(UNW_ARM_R11);
+  _msContext.R12 = r.getRegister(UNW_ARM_R12);
+  _msContext.Sp = r.getRegister(UNW_ARM_SP);
+  _msContext.Lr = r.getRegister(UNW_ARM_LR);
+  _msContext.Pc = r.getRegister(UNW_ARM_IP);
+  for (int i = UNW_ARM_D0; i <= UNW_ARM_D31; ++i) {
+    union {
+      uint64_t w;
+      double d;
+    } d;
+    d.d = r.getFloatRegister(i);
+    _msContext.D[i - UNW_ARM_D0] = d.w;
+  }
+#elif defined(_LIBUNWIND_TARGET_AARCH64)
+  for (int i = UNW_ARM64_X0; i <= UNW_ARM64_X30; ++i)
+    _msContext.X[i - UNW_ARM64_X0] = r.getRegister(i);
+  _msContext.Sp = r.getRegister(UNW_REG_SP);
+  _msContext.Pc = r.getRegister(UNW_REG_IP);
+  for (int i = UNW_ARM64_D0; i <= UNW_ARM64_D31; ++i)
+    _msContext.V[i - UNW_ARM64_D0].D[0] = r.getFloatRegister(i);
+#endif
+}
+
+template <typename A, typename R>
+UnwindCursor<A, R>::UnwindCursor(CONTEXT *context, A &as)
+    : _addressSpace(as), _unwindInfoMissing(false) {
+  static_assert((check_fit<UnwindCursor<A, R>, unw_cursor_t>::does_fit),
+                "UnwindCursor<> does not fit in unw_cursor_t");
+  memset(&_info, 0, sizeof(_info));
+  memset(&_histTable, 0, sizeof(_histTable));
+  _dispContext.ContextRecord = &_msContext;
+  _dispContext.HistoryTable = &_histTable;
+  _msContext = *context;
+}
+
+
+template <typename A, typename R>
+bool UnwindCursor<A, R>::validReg(int regNum) {
+  if (regNum == UNW_REG_IP || regNum == UNW_REG_SP) return true;
+#if defined(_LIBUNWIND_TARGET_X86_64)
+  if (regNum >= UNW_X86_64_RAX && regNum <= UNW_X86_64_R15) return true;
+#elif defined(_LIBUNWIND_TARGET_ARM)
+  if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R15) return true;
+#elif defined(_LIBUNWIND_TARGET_AARCH64)
+  if (regNum >= UNW_ARM64_X0 && regNum <= UNW_ARM64_X30) return true;
+#endif
+  return false;
+}
+
+template <typename A, typename R>
+unw_word_t UnwindCursor<A, R>::getReg(int regNum) {
+  switch (regNum) {
+#if defined(_LIBUNWIND_TARGET_X86_64)
+  case UNW_REG_IP: return _msContext.Rip;
+  case UNW_X86_64_RAX: return _msContext.Rax;
+  case UNW_X86_64_RDX: return _msContext.Rdx;
+  case UNW_X86_64_RCX: return _msContext.Rcx;
+  case UNW_X86_64_RBX: return _msContext.Rbx;
+  case UNW_REG_SP:
+  case UNW_X86_64_RSP: return _msContext.Rsp;
+  case UNW_X86_64_RBP: return _msContext.Rbp;
+  case UNW_X86_64_RSI: return _msContext.Rsi;
+  case UNW_X86_64_RDI: return _msContext.Rdi;
+  case UNW_X86_64_R8: return _msContext.R8;
+  case UNW_X86_64_R9: return _msContext.R9;
+  case UNW_X86_64_R10: return _msContext.R10;
+  case UNW_X86_64_R11: return _msContext.R11;
+  case UNW_X86_64_R12: return _msContext.R12;
+  case UNW_X86_64_R13: return _msContext.R13;
+  case UNW_X86_64_R14: return _msContext.R14;
+  case UNW_X86_64_R15: return _msContext.R15;
+#elif defined(_LIBUNWIND_TARGET_ARM)
+  case UNW_ARM_R0: return _msContext.R0;
+  case UNW_ARM_R1: return _msContext.R1;
+  case UNW_ARM_R2: return _msContext.R2;
+  case UNW_ARM_R3: return _msContext.R3;
+  case UNW_ARM_R4: return _msContext.R4;
+  case UNW_ARM_R5: return _msContext.R5;
+  case UNW_ARM_R6: return _msContext.R6;
+  case UNW_ARM_R7: return _msContext.R7;
+  case UNW_ARM_R8: return _msContext.R8;
+  case UNW_ARM_R9: return _msContext.R9;
+  case UNW_ARM_R10: return _msContext.R10;
+  case UNW_ARM_R11: return _msContext.R11;
+  case UNW_ARM_R12: return _msContext.R12;
+  case UNW_REG_SP:
+  case UNW_ARM_SP: return _msContext.Sp;
+  case UNW_ARM_LR: return _msContext.Lr;
+  case UNW_REG_IP:
+  case UNW_ARM_IP: return _msContext.Pc;
+#elif defined(_LIBUNWIND_TARGET_AARCH64)
+  case UNW_REG_SP: return _msContext.Sp;
+  case UNW_REG_IP: return _msContext.Pc;
+  default: return _msContext.X[regNum - UNW_ARM64_X0];
+#endif
+  }
+  _LIBUNWIND_ABORT("unsupported register");
+}
+
+template <typename A, typename R>
+void UnwindCursor<A, R>::setReg(int regNum, unw_word_t value) {
+  switch (regNum) {
+#if defined(_LIBUNWIND_TARGET_X86_64)
+  case UNW_REG_IP: _msContext.Rip = value; break;
+  case UNW_X86_64_RAX: _msContext.Rax = value; break;
+  case UNW_X86_64_RDX: _msContext.Rdx = value; break;
+  case UNW_X86_64_RCX: _msContext.Rcx = value; break;
+  case UNW_X86_64_RBX: _msContext.Rbx = value; break;
+  case UNW_REG_SP:
+  case UNW_X86_64_RSP: _msContext.Rsp = value; break;
+  case UNW_X86_64_RBP: _msContext.Rbp = value; break;
+  case UNW_X86_64_RSI: _msContext.Rsi = value; break;
+  case UNW_X86_64_RDI: _msContext.Rdi = value; break;
+  case UNW_X86_64_R8: _msContext.R8 = value; break;
+  case UNW_X86_64_R9: _msContext.R9 = value; break;
+  case UNW_X86_64_R10: _msContext.R10 = value; break;
+  case UNW_X86_64_R11: _msContext.R11 = value; break;
+  case UNW_X86_64_R12: _msContext.R12 = value; break;
+  case UNW_X86_64_R13: _msContext.R13 = value; break;
+  case UNW_X86_64_R14: _msContext.R14 = value; break;
+  case UNW_X86_64_R15: _msContext.R15 = value; break;
+#elif defined(_LIBUNWIND_TARGET_ARM)
+  case UNW_ARM_R0: _msContext.R0 = value; break;
+  case UNW_ARM_R1: _msContext.R1 = value; break;
+  case UNW_ARM_R2: _msContext.R2 = value; break;
+  case UNW_ARM_R3: _msContext.R3 = value; break;
+  case UNW_ARM_R4: _msContext.R4 = value; break;
+  case UNW_ARM_R5: _msContext.R5 = value; break;
+  case UNW_ARM_R6: _msContext.R6 = value; break;
+  case UNW_ARM_R7: _msContext.R7 = value; break;
+  case UNW_ARM_R8: _msContext.R8 = value; break;
+  case UNW_ARM_R9: _msContext.R9 = value; break;
+  case UNW_ARM_R10: _msContext.R10 = value; break;
+  case UNW_ARM_R11: _msContext.R11 = value; break;
+  case UNW_ARM_R12: _msContext.R12 = value; break;
+  case UNW_REG_SP:
+  case UNW_ARM_SP: _msContext.Sp = value; break;
+  case UNW_ARM_LR: _msContext.Lr = value; break;
+  case UNW_REG_IP:
+  case UNW_ARM_IP: _msContext.Pc = value; break;
+#elif defined(_LIBUNWIND_TARGET_AARCH64)
+  case UNW_REG_SP: _msContext.Sp = value; break;
+  case UNW_REG_IP: _msContext.Pc = value; break;
+  case UNW_ARM64_X0:
+  case UNW_ARM64_X1:
+  case UNW_ARM64_X2:
+  case UNW_ARM64_X3:
+  case UNW_ARM64_X4:
+  case UNW_ARM64_X5:
+  case UNW_ARM64_X6:
+  case UNW_ARM64_X7:
+  case UNW_ARM64_X8:
+  case UNW_ARM64_X9:
+  case UNW_ARM64_X10:
+  case UNW_ARM64_X11:
+  case UNW_ARM64_X12:
+  case UNW_ARM64_X13:
+  case UNW_ARM64_X14:
+  case UNW_ARM64_X15:
+  case UNW_ARM64_X16:
+  case UNW_ARM64_X17:
+  case UNW_ARM64_X18:
+  case UNW_ARM64_X19:
+  case UNW_ARM64_X20:
+  case UNW_ARM64_X21:
+  case UNW_ARM64_X22:
+  case UNW_ARM64_X23:
+  case UNW_ARM64_X24:
+  case UNW_ARM64_X25:
+  case UNW_ARM64_X26:
+  case UNW_ARM64_X27:
+  case UNW_ARM64_X28:
+  case UNW_ARM64_FP:
+  case UNW_ARM64_LR: _msContext.X[regNum - UNW_ARM64_X0] = value; break;
+#endif
+  default:
+    _LIBUNWIND_ABORT("unsupported register");
+  }
+}
+
+template <typename A, typename R>
+bool UnwindCursor<A, R>::validFloatReg(int regNum) {
+#if defined(_LIBUNWIND_TARGET_ARM)
+  if (regNum >= UNW_ARM_S0 && regNum <= UNW_ARM_S31) return true;
+  if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D31) return true;
+#elif defined(_LIBUNWIND_TARGET_AARCH64)
+  if (regNum >= UNW_ARM64_D0 && regNum <= UNW_ARM64_D31) return true;
+#else
+  (void)regNum;
+#endif
+  return false;
+}
+
+template <typename A, typename R>
+unw_fpreg_t UnwindCursor<A, R>::getFloatReg(int regNum) {
+#if defined(_LIBUNWIND_TARGET_ARM)
+  if (regNum >= UNW_ARM_S0 && regNum <= UNW_ARM_S31) {
+    union {
+      uint32_t w;
+      float f;
+    } d;
+    d.w = _msContext.S[regNum - UNW_ARM_S0];
+    return d.f;
+  }
+  if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D31) {
+    union {
+      uint64_t w;
+      double d;
+    } d;
+    d.w = _msContext.D[regNum - UNW_ARM_D0];
+    return d.d;
+  }
+  _LIBUNWIND_ABORT("unsupported float register");
+#elif defined(_LIBUNWIND_TARGET_AARCH64)
+  return _msContext.V[regNum - UNW_ARM64_D0].D[0];
+#else
+  (void)regNum;
+  _LIBUNWIND_ABORT("float registers unimplemented");
+#endif
+}
+
+template <typename A, typename R>
+void UnwindCursor<A, R>::setFloatReg(int regNum, unw_fpreg_t value) {
+#if defined(_LIBUNWIND_TARGET_ARM)
+  if (regNum >= UNW_ARM_S0 && regNum <= UNW_ARM_S31) {
+    union {
+      uint32_t w;
+      float f;
+    } d;
+    d.f = value;
+    _msContext.S[regNum - UNW_ARM_S0] = d.w;
+  }
+  if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D31) {
+    union {
+      uint64_t w;
+      double d;
+    } d;
+    d.d = value;
+    _msContext.D[regNum - UNW_ARM_D0] = d.w;
+  }
+  _LIBUNWIND_ABORT("unsupported float register");
+#elif defined(_LIBUNWIND_TARGET_AARCH64)
+  _msContext.V[regNum - UNW_ARM64_D0].D[0] = value;
+#else
+  (void)regNum;
+  (void)value;
+  _LIBUNWIND_ABORT("float registers unimplemented");
+#endif
+}
+
+template <typename A, typename R> void UnwindCursor<A, R>::jumpto() {
+  RtlRestoreContext(&_msContext, nullptr);
+}
+
+#ifdef __arm__
+template <typename A, typename R> void UnwindCursor<A, R>::saveVFPAsX() {}
+#endif
+
+template <typename A, typename R>
+const char *UnwindCursor<A, R>::getRegisterName(int regNum) {
+  return R::getRegisterName(regNum);
+}
+
+template <typename A, typename R> bool UnwindCursor<A, R>::isSignalFrame() {
+  return false;
+}
+
+#else  // !defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) || !defined(_WIN32)
+
+/// UnwindCursor contains all state (including all register values) during
+/// an unwind.  This is normally stack allocated inside a unw_cursor_t.
+template <typename A, typename R>
+class UnwindCursor : public AbstractUnwindCursor{
+  typedef typename A::pint_t pint_t;
+public:
+                      UnwindCursor(A &as);
+                      UnwindCursor(unw_context_t *context, A &as);
+                      UnwindCursor(A &as, void *threadArg);
+  virtual             ~UnwindCursor() {}
+  virtual bool        validReg(int);
+  virtual unw_word_t  getReg(int);
+  virtual void        setReg(int, unw_word_t, unw_word_t);
+  virtual unw_word_t  getRegLocation(int);
+  virtual bool        validFloatReg(int);
+  virtual unw_fpreg_t getFloatReg(int);
+  virtual void        setFloatReg(int, unw_fpreg_t);
+  virtual int         step();
+  virtual void        getInfo(unw_proc_info_t *);
+  virtual void        jumpto();
+  virtual bool        isSignalFrame();
+  virtual bool        getFunctionName(char *buf, size_t len, unw_word_t *off);
+  virtual void        setInfoBasedOnIPRegister(bool isReturnAddress = false);
+  virtual const char *getRegisterName(int num);
+#ifdef __arm__
+  virtual void        saveVFPAsX();
+#endif
+
+  // libunwind does not and should not depend on C++ library which means that we
+  // need our own defition of inline placement new.
+  static void *operator new(size_t, UnwindCursor<A, R> *p) { return p; }
+
+private:
+
+#if defined(_LIBUNWIND_ARM_EHABI)
+  bool getInfoFromEHABISection(pint_t pc, const UnwindInfoSections &sects);
+
+  int stepWithEHABI() {
+    size_t len = 0;
+    size_t off = 0;
+    // FIXME: Calling decode_eht_entry() here is violating the libunwind
+    // abstraction layer.
+    const uint32_t *ehtp =
+        decode_eht_entry(reinterpret_cast<const uint32_t *>(_info.unwind_info),
+                         &off, &len);
+    if (_Unwind_VRS_Interpret((_Unwind_Context *)this, ehtp, off, len) !=
+            _URC_CONTINUE_UNWIND)
+      return UNW_STEP_END;
+    return UNW_STEP_SUCCESS;
+  }
+#endif
+
+#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+public:
+  bool getInfoFromDwarfSection(pint_t pc, const UnwindInfoSections &sects,
+                                            uint32_t fdeSectionOffsetHint=0);
+  int stepWithDwarfFDE() {
+    return DwarfInstructions<A, R>::stepWithDwarf(_addressSpace,
+                                              (pint_t)this->getReg(UNW_REG_IP),
+                                              (pint_t)_info.unwind_info,
+                                              _registers);
+  }
+#endif
+
+#if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
+  bool getInfoFromCompactEncodingSection(pint_t pc,
+                                            const UnwindInfoSections &sects);
+  int stepWithCompactEncoding() {
+  #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+    if ( compactSaysUseDwarf() )
+      return stepWithDwarfFDE();
+  #endif
+    R dummy;
+    return stepWithCompactEncoding(dummy);
+  }
+
+#if defined(_LIBUNWIND_TARGET_X86_64)
+  int stepWithCompactEncoding(Registers_x86_64 &) {
+    return CompactUnwinder_x86_64<A>::stepWithCompactEncoding(
+        _info.format, _info.start_ip, _addressSpace, _registers);
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_I386)
+  int stepWithCompactEncoding(Registers_x86 &) {
+    return CompactUnwinder_x86<A>::stepWithCompactEncoding(
+        _info.format, (uint32_t)_info.start_ip, _addressSpace, _registers);
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_PPC)
+  int stepWithCompactEncoding(Registers_ppc &) {
+    return UNW_EINVAL;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_PPC64)
+  int stepWithCompactEncoding(Registers_ppc64 &) {
+    return UNW_EINVAL;
+  }
+#endif
+
+
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+  int stepWithCompactEncoding(Registers_arm64 &) {
+    return CompactUnwinder_arm64<A>::stepWithCompactEncoding(
+        _info.format, _info.start_ip, _addressSpace, _registers);
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_MIPS_O32)
+  int stepWithCompactEncoding(Registers_mips_o32 &) {
+    return UNW_EINVAL;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_MIPS_NEWABI)
+  int stepWithCompactEncoding(Registers_mips_newabi &) {
+    return UNW_EINVAL;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_SPARC)
+  int stepWithCompactEncoding(Registers_sparc &) { return UNW_EINVAL; }
+#endif
+
+  bool compactSaysUseDwarf(uint32_t *offset=NULL) const {
+    R dummy;
+    return compactSaysUseDwarf(dummy, offset);
+  }
+
+#if defined(_LIBUNWIND_TARGET_X86_64)
+  bool compactSaysUseDwarf(Registers_x86_64 &, uint32_t *offset) const {
+    if ((_info.format & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_DWARF) {
+      if (offset)
+        *offset = (_info.format & UNWIND_X86_64_DWARF_SECTION_OFFSET);
+      return true;
+    }
+    return false;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_I386)
+  bool compactSaysUseDwarf(Registers_x86 &, uint32_t *offset) const {
+    if ((_info.format & UNWIND_X86_MODE_MASK) == UNWIND_X86_MODE_DWARF) {
+      if (offset)
+        *offset = (_info.format & UNWIND_X86_DWARF_SECTION_OFFSET);
+      return true;
+    }
+    return false;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_PPC)
+  bool compactSaysUseDwarf(Registers_ppc &, uint32_t *) const {
+    return true;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_PPC64)
+  bool compactSaysUseDwarf(Registers_ppc64 &, uint32_t *) const {
+    return true;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+  bool compactSaysUseDwarf(Registers_arm64 &, uint32_t *offset) const {
+    if ((_info.format & UNWIND_ARM64_MODE_MASK) == UNWIND_ARM64_MODE_DWARF) {
+      if (offset)
+        *offset = (_info.format & UNWIND_ARM64_DWARF_SECTION_OFFSET);
+      return true;
+    }
+    return false;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_MIPS_O32)
+  bool compactSaysUseDwarf(Registers_mips_o32 &, uint32_t *) const {
+    return true;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_MIPS_NEWABI)
+  bool compactSaysUseDwarf(Registers_mips_newabi &, uint32_t *) const {
+    return true;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_SPARC)
+  bool compactSaysUseDwarf(Registers_sparc &, uint32_t *) const { return true; }
+#endif
+
+#endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
+
+#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+  compact_unwind_encoding_t dwarfEncoding() const {
+    R dummy;
+    return dwarfEncoding(dummy);
+  }
+
+#if defined(_LIBUNWIND_TARGET_X86_64)
+  compact_unwind_encoding_t dwarfEncoding(Registers_x86_64 &) const {
+    return UNWIND_X86_64_MODE_DWARF;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_I386)
+  compact_unwind_encoding_t dwarfEncoding(Registers_x86 &) const {
+    return UNWIND_X86_MODE_DWARF;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_PPC)
+  compact_unwind_encoding_t dwarfEncoding(Registers_ppc &) const {
+    return 0;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_PPC64)
+  compact_unwind_encoding_t dwarfEncoding(Registers_ppc64 &) const {
+    return 0;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+  compact_unwind_encoding_t dwarfEncoding(Registers_arm64 &) const {
+    return UNWIND_ARM64_MODE_DWARF;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_ARM)
+  compact_unwind_encoding_t dwarfEncoding(Registers_arm &) const {
+    return 0;
+  }
+#endif
+
+#if defined (_LIBUNWIND_TARGET_OR1K)
+  compact_unwind_encoding_t dwarfEncoding(Registers_or1k &) const {
+    return 0;
+  }
+#endif
+
+#if defined (_LIBUNWIND_TARGET_MIPS_O32)
+  compact_unwind_encoding_t dwarfEncoding(Registers_mips_o32 &) const {
+    return 0;
+  }
+#endif
+
+#if defined (_LIBUNWIND_TARGET_MIPS_NEWABI)
+  compact_unwind_encoding_t dwarfEncoding(Registers_mips_newabi &) const {
+    return 0;
+  }
+#endif
+
+#if defined(_LIBUNWIND_TARGET_SPARC)
+  compact_unwind_encoding_t dwarfEncoding(Registers_sparc &) const { return 0; }
+#endif
+
+#endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+
+#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
+  // For runtime environments using SEH unwind data without Windows runtime
+  // support.
+  pint_t getLastPC() const { /* FIXME: Implement */ return 0; }
+  void setLastPC(pint_t pc) { /* FIXME: Implement */ }
+  RUNTIME_FUNCTION *lookUpSEHUnwindInfo(pint_t pc, pint_t *base) {
+    /* FIXME: Implement */
+    *base = 0;
+    return nullptr;
+  }
+  bool getInfoFromSEH(pint_t pc);
+  int stepWithSEHData() { /* FIXME: Implement */ return 0; }
+#endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
+
+
+  A               &_addressSpace;
+  R                _registers;
+  unw_proc_info_t  _info;
+  bool             _unwindInfoMissing;
+  bool             _isSignalFrame;
+};
+
+template <typename A, typename R>
+UnwindCursor<A, R>::UnwindCursor(A &as)
+    : _addressSpace(as)
+    , _unwindInfoMissing(false)
+    , _isSignalFrame(false) {
+  memset(&_info, 0, sizeof(_info));
+}
+
+template <typename A, typename R>
+UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as)
+    : _addressSpace(as), _registers(context), _unwindInfoMissing(false),
+      _isSignalFrame(false) {
+  static_assert((check_fit<UnwindCursor<A, R>, unw_cursor_t>::does_fit),
+                "UnwindCursor<> does not fit in unw_cursor_t");
+  memset(&_info, 0, sizeof(_info));
+}
+
+template <typename A, typename R>
+UnwindCursor<A, R>::UnwindCursor(A &as, void *arg)
+    : _addressSpace(as),_registers(arg), _unwindInfoMissing(false),
+        _isSignalFrame(false) {
+  memset(&_info, 0, sizeof(_info));
+
+  // FIXME
+  // fill in _registers from thread arg
+}
+
+
+template <typename A, typename R>
+bool UnwindCursor<A, R>::validReg(int regNum) {
+  return _registers.validRegister(regNum);
+}
+
+template <typename A, typename R>
+unw_word_t UnwindCursor<A, R>::getReg(int regNum) {
+  return _registers.getRegister(regNum);
+}
+
+template <typename A, typename R>
+void UnwindCursor<A, R>::setReg(int regNum, unw_word_t value, unw_word_t location) {
+  _registers.setRegister(regNum, (typename A::pint_t)value, (typename A::pint_t)location);
+}
+
+template <typename A, typename R>
+unw_word_t UnwindCursor<A, R>::getRegLocation(int regNum) {
+  return _registers.getRegisterLocation(regNum);
+}
+
+template <typename A, typename R>
+bool UnwindCursor<A, R>::validFloatReg(int regNum) {
+  return _registers.validFloatRegister(regNum);
+}
+
+template <typename A, typename R>
+unw_fpreg_t UnwindCursor<A, R>::getFloatReg(int regNum) {
+  return _registers.getFloatRegister(regNum);
+}
+
+template <typename A, typename R>
+void UnwindCursor<A, R>::setFloatReg(int regNum, unw_fpreg_t value) {
+  _registers.setFloatRegister(regNum, value);
+}
+
+template <typename A, typename R> void UnwindCursor<A, R>::jumpto() {
+  _registers.jumpto();
+}
+
+#ifdef __arm__
+template <typename A, typename R> void UnwindCursor<A, R>::saveVFPAsX() {
+  _registers.saveVFPAsX();
+}
+#endif
+
+template <typename A, typename R>
+const char *UnwindCursor<A, R>::getRegisterName(int regNum) {
+  return _registers.getRegisterName(regNum);
+}
+
+template <typename A, typename R> bool UnwindCursor<A, R>::isSignalFrame() {
+  return _isSignalFrame;
+}
+
+#endif // defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
+
+#if defined(_LIBUNWIND_ARM_EHABI)
+struct EHABIIndexEntry {
+  uint32_t functionOffset;
+  uint32_t data;
+};
+
+template<typename A>
+struct EHABISectionIterator {
+  typedef EHABISectionIterator _Self;
+
+  typedef typename A::pint_t value_type;
+  typedef typename A::pint_t* pointer;
+  typedef typename A::pint_t& reference;
+  typedef size_t size_type;
+  typedef size_t difference_type;
+
+  static _Self begin(A& addressSpace, const UnwindInfoSections& sects) {
+    return _Self(addressSpace, sects, 0);
+  }
+  static _Self end(A& addressSpace, const UnwindInfoSections& sects) {
+    return _Self(addressSpace, sects,
+                 sects.arm_section_length / sizeof(EHABIIndexEntry));
+  }
+
+  EHABISectionIterator(A& addressSpace, const UnwindInfoSections& sects, size_t i)
+      : _i(i), _addressSpace(&addressSpace), _sects(&sects) {}
+
+  _Self& operator++() { ++_i; return *this; }
+  _Self& operator+=(size_t a) { _i += a; return *this; }
+  _Self& operator--() { assert(_i > 0); --_i; return *this; }
+  _Self& operator-=(size_t a) { assert(_i >= a); _i -= a; return *this; }
+
+  _Self operator+(size_t a) { _Self out = *this; out._i += a; return out; }
+  _Self operator-(size_t a) { assert(_i >= a); _Self out = *this; out._i -= a; return out; }
+
+  size_t operator-(const _Self& other) { return _i - other._i; }
+
+  bool operator==(const _Self& other) const {
+    assert(_addressSpace == other._addressSpace);
+    assert(_sects == other._sects);
+    return _i == other._i;
+  }
+
+  typename A::pint_t operator*() const { return functionAddress(); }
+
+  typename A::pint_t functionAddress() const {
+    typename A::pint_t indexAddr = _sects->arm_section + arrayoffsetof(
+        EHABIIndexEntry, _i, functionOffset);
+    return indexAddr + signExtendPrel31(_addressSpace->get32(indexAddr));
+  }
+
+  typename A::pint_t dataAddress() {
+    typename A::pint_t indexAddr = _sects->arm_section + arrayoffsetof(
+        EHABIIndexEntry, _i, data);
+    return indexAddr;
+  }
+
+ private:
+  size_t _i;
+  A* _addressSpace;
+  const UnwindInfoSections* _sects;
+};
+
+namespace {
+
+template <typename A>
+EHABISectionIterator<A> EHABISectionUpperBound(
+    EHABISectionIterator<A> first,
+    EHABISectionIterator<A> last,
+    typename A::pint_t value) {
+  size_t len = last - first;
+  while (len > 0) {
+    size_t l2 = len / 2;
+    EHABISectionIterator<A> m = first + l2;
+    if (value < *m) {
+        len = l2;
+    } else {
+        first = ++m;
+        len -= l2 + 1;
+    }
+  }
+  return first;
+}
+
+}
+
+template <typename A, typename R>
+bool UnwindCursor<A, R>::getInfoFromEHABISection(
+    pint_t pc,
+    const UnwindInfoSections &sects) {
+  EHABISectionIterator<A> begin =
+      EHABISectionIterator<A>::begin(_addressSpace, sects);
+  EHABISectionIterator<A> end =
+      EHABISectionIterator<A>::end(_addressSpace, sects);
+  if (begin == end)
+    return false;
+
+  EHABISectionIterator<A> itNextPC = EHABISectionUpperBound(begin, end, pc);
+  if (itNextPC == begin)
+    return false;
+  EHABISectionIterator<A> itThisPC = itNextPC - 1;
+
+  pint_t thisPC = itThisPC.functionAddress();
+  // If an exception is thrown from a function, corresponding to the last entry
+  // in the table, we don't really know the function extent and have to choose a
+  // value for nextPC. Choosing max() will allow the range check during trace to
+  // succeed.
+  pint_t nextPC = (itNextPC == end) ? UINTPTR_MAX : itNextPC.functionAddress();
+  pint_t indexDataAddr = itThisPC.dataAddress();
+
+  if (indexDataAddr == 0)
+    return false;
+
+  uint32_t indexData = _addressSpace.get32(indexDataAddr);
+  if (indexData == UNW_EXIDX_CANTUNWIND)
+    return false;
+
+  // If the high bit is set, the exception handling table entry is inline inside
+  // the index table entry on the second word (aka |indexDataAddr|). Otherwise,
+  // the table points at an offset in the exception handling table (section 5 EHABI).
+  pint_t exceptionTableAddr;
+  uint32_t exceptionTableData;
+  bool isSingleWordEHT;
+  if (indexData & 0x80000000) {
+    exceptionTableAddr = indexDataAddr;
+    // TODO(ajwong): Should this data be 0?
+    exceptionTableData = indexData;
+    isSingleWordEHT = true;
+  } else {
+    exceptionTableAddr = indexDataAddr + signExtendPrel31(indexData);
+    exceptionTableData = _addressSpace.get32(exceptionTableAddr);
+    isSingleWordEHT = false;
+  }
+
+  // Now we know the 3 things:
+  //   exceptionTableAddr -- exception handler table entry.
+  //   exceptionTableData -- the data inside the first word of the eht entry.
+  //   isSingleWordEHT -- whether the entry is in the index.
+  unw_word_t personalityRoutine = 0xbadf00d;
+  bool scope32 = false;
+  uintptr_t lsda;
+
+  // If the high bit in the exception handling table entry is set, the entry is
+  // in compact form (section 6.3 EHABI).
+  if (exceptionTableData & 0x80000000) {
+    // Grab the index of the personality routine from the compact form.
+    uint32_t choice = (exceptionTableData & 0x0f000000) >> 24;
+    uint32_t extraWords = 0;
+    switch (choice) {
+      case 0:
+        personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr0;
+        extraWords = 0;
+        scope32 = false;
+        lsda = isSingleWordEHT ? 0 : (exceptionTableAddr + 4);
+        break;
+      case 1:
+        personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr1;
+        extraWords = (exceptionTableData & 0x00ff0000) >> 16;
+        scope32 = false;
+        lsda = exceptionTableAddr + (extraWords + 1) * 4;
+        break;
+      case 2:
+        personalityRoutine = (unw_word_t) &__aeabi_unwind_cpp_pr2;
+        extraWords = (exceptionTableData & 0x00ff0000) >> 16;
+        scope32 = true;
+        lsda = exceptionTableAddr + (extraWords + 1) * 4;
+        break;
+      default:
+        _LIBUNWIND_ABORT("unknown personality routine");
+        return false;
+    }
+
+    if (isSingleWordEHT) {
+      if (extraWords != 0) {
+        _LIBUNWIND_ABORT("index inlined table detected but pr function "
+                         "requires extra words");
+        return false;
+      }
+    }
+  } else {
+    pint_t personalityAddr =
+        exceptionTableAddr + signExtendPrel31(exceptionTableData);
+    personalityRoutine = personalityAddr;
+
+    // ARM EHABI # 6.2, # 9.2
+    //
+    //  +---- ehtp
+    //  v
+    // +--------------------------------------+
+    // | +--------+--------+--------+-------+ |
+    // | |0| prel31 to personalityRoutine   | |
+    // | +--------+--------+--------+-------+ |
+    // | |      N |      unwind opcodes     | |  <-- UnwindData
+    // | +--------+--------+--------+-------+ |
+    // | | Word 2        unwind opcodes     | |
+    // | +--------+--------+--------+-------+ |
+    // | ...                                  |
+    // | +--------+--------+--------+-------+ |
+    // | | Word N        unwind opcodes     | |
+    // | +--------+--------+--------+-------+ |
+    // | | LSDA                             | |  <-- lsda
+    // | | ...                              | |
+    // | +--------+--------+--------+-------+ |
+    // +--------------------------------------+
+
+    uint32_t *UnwindData = reinterpret_cast<uint32_t*>(exceptionTableAddr) + 1;
+    uint32_t FirstDataWord = *UnwindData;
+    size_t N = ((FirstDataWord >> 24) & 0xff);
+    size_t NDataWords = N + 1;
+    lsda = reinterpret_cast<uintptr_t>(UnwindData + NDataWords);
+  }
+
+  _info.start_ip = thisPC;
+  _info.end_ip = nextPC;
+  _info.handler = personalityRoutine;
+  _info.unwind_info = exceptionTableAddr;
+  _info.lsda = lsda;
+  // flags is pr_cache.additional. See EHABI #7.2 for definition of bit 0.
+  _info.flags = (isSingleWordEHT ? 1 : 0) | (scope32 ? 0x2 : 0);  // Use enum?
+
+  return true;
+}
+#endif
+
+#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+template <typename A, typename R>
+bool UnwindCursor<A, R>::getInfoFromDwarfSection(pint_t pc,
+                                                const UnwindInfoSections &sects,
+                                                uint32_t fdeSectionOffsetHint) {
+  typename CFI_Parser<A>::FDE_Info fdeInfo;
+  typename CFI_Parser<A>::CIE_Info cieInfo;
+  bool foundFDE = false;
+  bool foundInCache = false;
+  // If compact encoding table gave offset into dwarf section, go directly there
+  if (fdeSectionOffsetHint != 0) {
+    foundFDE = CFI_Parser<A>::findFDE(_addressSpace, pc, sects.dwarf_section,
+                                    (uint32_t)sects.dwarf_section_length,
+                                    sects.dwarf_section + fdeSectionOffsetHint,
+                                    &fdeInfo, &cieInfo);
+  }
+#if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX)
+  if (!foundFDE && (sects.dwarf_index_section != 0)) {
+    foundFDE = EHHeaderParser<A>::findFDE(
+        _addressSpace, pc, sects.dwarf_index_section,
+        (uint32_t)sects.dwarf_index_section_length, &fdeInfo, &cieInfo);
+  }
+#endif
+  if (!foundFDE) {
+    // otherwise, search cache of previously found FDEs.
+    pint_t cachedFDE = DwarfFDECache<A>::findFDE(sects.dso_base, pc);
+    if (cachedFDE != 0) {
+      foundFDE =
+          CFI_Parser<A>::findFDE(_addressSpace, pc, sects.dwarf_section,
+                                 (uint32_t)sects.dwarf_section_length,
+                                 cachedFDE, &fdeInfo, &cieInfo);
+      foundInCache = foundFDE;
+    }
+  }
+  if (!foundFDE) {
+    // Still not found, do full scan of __eh_frame section.
+    foundFDE = CFI_Parser<A>::findFDE(_addressSpace, pc, sects.dwarf_section,
+                                      (uint32_t)sects.dwarf_section_length, 0,
+                                      &fdeInfo, &cieInfo);
+  }
+  if (foundFDE) {
+    typename CFI_Parser<A>::PrologInfo prolog;
+    if (CFI_Parser<A>::parseFDEInstructions(_addressSpace, fdeInfo, cieInfo, pc,
+                                            R::getArch(), &prolog)) {
+      // Save off parsed FDE info
+      _info.start_ip          = fdeInfo.pcStart;
+      _info.end_ip            = fdeInfo.pcEnd;
+      _info.lsda              = fdeInfo.lsda;
+      _info.handler           = cieInfo.personality;
+      _info.gp                = prolog.spExtraArgSize;
+      _info.flags             = 0;
+      _info.format            = dwarfEncoding();
+      _info.unwind_info       = fdeInfo.fdeStart;
+      _info.unwind_info_size  = (uint32_t)fdeInfo.fdeLength;
+      _info.extra             = (unw_word_t) sects.dso_base;
+
+      // Add to cache (to make next lookup faster) if we had no hint
+      // and there was no index.
+      if (!foundInCache && (fdeSectionOffsetHint == 0)) {
+  #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX)
+        if (sects.dwarf_index_section == 0)
+  #endif
+        DwarfFDECache<A>::add(sects.dso_base, fdeInfo.pcStart, fdeInfo.pcEnd,
+                              fdeInfo.fdeStart);
+      }
+      return true;
+    }
+  }
+  //_LIBUNWIND_DEBUG_LOG("can't find/use FDE for pc=0x%llX", (uint64_t)pc);
+  return false;
+}
+#endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+
+
+#if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
+template <typename A, typename R>
+bool UnwindCursor<A, R>::getInfoFromCompactEncodingSection(pint_t pc,
+                                              const UnwindInfoSections &sects) {
+  const bool log = false;
+  if (log)
+    fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX, mh=0x%llX)\n",
+            (uint64_t)pc, (uint64_t)sects.dso_base);
+
+  const UnwindSectionHeader<A> sectionHeader(_addressSpace,
+                                                sects.compact_unwind_section);
+  if (sectionHeader.version() != UNWIND_SECTION_VERSION)
+    return false;
+
+  // do a binary search of top level index to find page with unwind info
+  pint_t targetFunctionOffset = pc - sects.dso_base;
+  const UnwindSectionIndexArray<A> topIndex(_addressSpace,
+                                           sects.compact_unwind_section
+                                         + sectionHeader.indexSectionOffset());
+  uint32_t low = 0;
+  uint32_t high = sectionHeader.indexCount();
+  uint32_t last = high - 1;
+  while (low < high) {
+    uint32_t mid = (low + high) / 2;
+    //if ( log ) fprintf(stderr, "\tmid=%d, low=%d, high=%d, *mid=0x%08X\n",
+    //mid, low, high, topIndex.functionOffset(mid));
+    if (topIndex.functionOffset(mid) <= targetFunctionOffset) {
+      if ((mid == last) ||
+          (topIndex.functionOffset(mid + 1) > targetFunctionOffset)) {
+        low = mid;
+        break;
+      } else {
+        low = mid + 1;
+      }
+    } else {
+      high = mid;
+    }
+  }
+  const uint32_t firstLevelFunctionOffset = topIndex.functionOffset(low);
+  const uint32_t firstLevelNextPageFunctionOffset =
+      topIndex.functionOffset(low + 1);
+  const pint_t secondLevelAddr =
+      sects.compact_unwind_section + topIndex.secondLevelPagesSectionOffset(low);
+  const pint_t lsdaArrayStartAddr =
+      sects.compact_unwind_section + topIndex.lsdaIndexArraySectionOffset(low);
+  const pint_t lsdaArrayEndAddr =
+      sects.compact_unwind_section + topIndex.lsdaIndexArraySectionOffset(low+1);
+  if (log)
+    fprintf(stderr, "\tfirst level search for result index=%d "
+                    "to secondLevelAddr=0x%llX\n",
+                    low, (uint64_t) secondLevelAddr);
+  // do a binary search of second level page index
+  uint32_t encoding = 0;
+  pint_t funcStart = 0;
+  pint_t funcEnd = 0;
+  pint_t lsda = 0;
+  pint_t personality = 0;
+  uint32_t pageKind = _addressSpace.get32(secondLevelAddr);
+  if (pageKind == UNWIND_SECOND_LEVEL_REGULAR) {
+    // regular page
+    UnwindSectionRegularPageHeader<A> pageHeader(_addressSpace,
+                                                 secondLevelAddr);
+    UnwindSectionRegularArray<A> pageIndex(
+        _addressSpace, secondLevelAddr + pageHeader.entryPageOffset());
+    // binary search looks for entry with e where index[e].offset <= pc <
+    // index[e+1].offset
+    if (log)
+      fprintf(stderr, "\tbinary search for targetFunctionOffset=0x%08llX in "
+                      "regular page starting at secondLevelAddr=0x%llX\n",
+              (uint64_t) targetFunctionOffset, (uint64_t) secondLevelAddr);
+    low = 0;
+    high = pageHeader.entryCount();
+    while (low < high) {
+      uint32_t mid = (low + high) / 2;
+      if (pageIndex.functionOffset(mid) <= targetFunctionOffset) {
+        if (mid == (uint32_t)(pageHeader.entryCount() - 1)) {
+          // at end of table
+          low = mid;
+          funcEnd = firstLevelNextPageFunctionOffset + sects.dso_base;
+          break;
+        } else if (pageIndex.functionOffset(mid + 1) > targetFunctionOffset) {
+          // next is too big, so we found it
+          low = mid;
+          funcEnd = pageIndex.functionOffset(low + 1) + sects.dso_base;
+          break;
+        } else {
+          low = mid + 1;
+        }
+      } else {
+        high = mid;
+      }
+    }
+    encoding = pageIndex.encoding(low);
+    funcStart = pageIndex.functionOffset(low) + sects.dso_base;
+    if (pc < funcStart) {
+      if (log)
+        fprintf(
+            stderr,
+            "\tpc not in table, pc=0x%llX, funcStart=0x%llX, funcEnd=0x%llX\n",
+            (uint64_t) pc, (uint64_t) funcStart, (uint64_t) funcEnd);
+      return false;
+    }
+    if (pc > funcEnd) {
+      if (log)
+        fprintf(
+            stderr,
+            "\tpc not in table, pc=0x%llX, funcStart=0x%llX, funcEnd=0x%llX\n",
+            (uint64_t) pc, (uint64_t) funcStart, (uint64_t) funcEnd);
+      return false;
+    }
+  } else if (pageKind == UNWIND_SECOND_LEVEL_COMPRESSED) {
+    // compressed page
+    UnwindSectionCompressedPageHeader<A> pageHeader(_addressSpace,
+                                                    secondLevelAddr);
+    UnwindSectionCompressedArray<A> pageIndex(
+        _addressSpace, secondLevelAddr + pageHeader.entryPageOffset());
+    const uint32_t targetFunctionPageOffset =
+        (uint32_t)(targetFunctionOffset - firstLevelFunctionOffset);
+    // binary search looks for entry with e where index[e].offset <= pc <
+    // index[e+1].offset
+    if (log)
+      fprintf(stderr, "\tbinary search of compressed page starting at "
+                      "secondLevelAddr=0x%llX\n",
+              (uint64_t) secondLevelAddr);
+    low = 0;
+    last = pageHeader.entryCount() - 1;
+    high = pageHeader.entryCount();
+    while (low < high) {
+      uint32_t mid = (low + high) / 2;
+      if (pageIndex.functionOffset(mid) <= targetFunctionPageOffset) {
+        if ((mid == last) ||
+            (pageIndex.functionOffset(mid + 1) > targetFunctionPageOffset)) {
+          low = mid;
+          break;
+        } else {
+          low = mid + 1;
+        }
+      } else {
+        high = mid;
+      }
+    }
+    funcStart = pageIndex.functionOffset(low) + firstLevelFunctionOffset
+                                                              + sects.dso_base;
+    if (low < last)
+      funcEnd =
+          pageIndex.functionOffset(low + 1) + firstLevelFunctionOffset
+                                                              + sects.dso_base;
+    else
+      funcEnd = firstLevelNextPageFunctionOffset + sects.dso_base;
+    if (pc < funcStart) {
+      _LIBUNWIND_DEBUG_LOG("malformed __unwind_info, pc=0x%llX not in second  "
+                           "level compressed unwind table. funcStart=0x%llX",
+                            (uint64_t) pc, (uint64_t) funcStart);
+      return false;
+    }
+    if (pc > funcEnd) {
+      _LIBUNWIND_DEBUG_LOG("malformed __unwind_info, pc=0x%llX not in second  "
+                          "level compressed unwind table. funcEnd=0x%llX",
+                           (uint64_t) pc, (uint64_t) funcEnd);
+      return false;
+    }
+    uint16_t encodingIndex = pageIndex.encodingIndex(low);
+    if (encodingIndex < sectionHeader.commonEncodingsArrayCount()) {
+      // encoding is in common table in section header
+      encoding = _addressSpace.get32(
+          sects.compact_unwind_section +
+          sectionHeader.commonEncodingsArraySectionOffset() +
+          encodingIndex * sizeof(uint32_t));
+    } else {
+      // encoding is in page specific table
+      uint16_t pageEncodingIndex =
+          encodingIndex - (uint16_t)sectionHeader.commonEncodingsArrayCount();
+      encoding = _addressSpace.get32(secondLevelAddr +
+                                     pageHeader.encodingsPageOffset() +
+                                     pageEncodingIndex * sizeof(uint32_t));
+    }
+  } else {
+    _LIBUNWIND_DEBUG_LOG("malformed __unwind_info at 0x%0llX bad second "
+                         "level page",
+                          (uint64_t) sects.compact_unwind_section);
+    return false;
+  }
+
+  // look up LSDA, if encoding says function has one
+  if (encoding & UNWIND_HAS_LSDA) {
+    UnwindSectionLsdaArray<A> lsdaIndex(_addressSpace, lsdaArrayStartAddr);
+    uint32_t funcStartOffset = (uint32_t)(funcStart - sects.dso_base);
+    low = 0;
+    high = (uint32_t)(lsdaArrayEndAddr - lsdaArrayStartAddr) /
+                    sizeof(unwind_info_section_header_lsda_index_entry);
+    // binary search looks for entry with exact match for functionOffset
+    if (log)
+      fprintf(stderr,
+              "\tbinary search of lsda table for targetFunctionOffset=0x%08X\n",
+              funcStartOffset);
+    while (low < high) {
+      uint32_t mid = (low + high) / 2;
+      if (lsdaIndex.functionOffset(mid) == funcStartOffset) {
+        lsda = lsdaIndex.lsdaOffset(mid) + sects.dso_base;
+        break;
+      } else if (lsdaIndex.functionOffset(mid) < funcStartOffset) {
+        low = mid + 1;
+      } else {
+        high = mid;
+      }
+    }
+    if (lsda == 0) {
+      _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with HAS_LSDA bit set for "
+                    "pc=0x%0llX, but lsda table has no entry",
+                    encoding, (uint64_t) pc);
+      return false;
+    }
+  }
+
+  // extact personality routine, if encoding says function has one
+  uint32_t personalityIndex = (encoding & UNWIND_PERSONALITY_MASK) >>
+                              (__builtin_ctz(UNWIND_PERSONALITY_MASK));
+  if (personalityIndex != 0) {
+    --personalityIndex; // change 1-based to zero-based index
+    if (personalityIndex > sectionHeader.personalityArrayCount()) {
+      _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with personality index %d,  "
+                            "but personality table has only %d entries",
+                            encoding, personalityIndex,
+                            sectionHeader.personalityArrayCount());
+      return false;
+    }
+    int32_t personalityDelta = (int32_t)_addressSpace.get32(
+        sects.compact_unwind_section +
+        sectionHeader.personalityArraySectionOffset() +
+        personalityIndex * sizeof(uint32_t));
+    pint_t personalityPointer = sects.dso_base + (pint_t)personalityDelta;
+    personality = _addressSpace.getP(personalityPointer);
+    if (log)
+      fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), "
+                      "personalityDelta=0x%08X, personality=0x%08llX\n",
+              (uint64_t) pc, personalityDelta, (uint64_t) personality);
+  }
+
+  if (log)
+    fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), "
+                    "encoding=0x%08X, lsda=0x%08llX for funcStart=0x%llX\n",
+            (uint64_t) pc, encoding, (uint64_t) lsda, (uint64_t) funcStart);
+  _info.start_ip = funcStart;
+  _info.end_ip = funcEnd;
+  _info.lsda = lsda;
+  _info.handler = personality;
+  _info.gp = 0;
+  _info.flags = 0;
+  _info.format = encoding;
+  _info.unwind_info = 0;
+  _info.unwind_info_size = 0;
+  _info.extra = sects.dso_base;
+  return true;
+}
+#endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
+
+
+#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
+template <typename A, typename R>
+bool UnwindCursor<A, R>::getInfoFromSEH(pint_t pc) {
+  pint_t base;
+  RUNTIME_FUNCTION *unwindEntry = lookUpSEHUnwindInfo(pc, &base);
+  if (!unwindEntry) {
+    _LIBUNWIND_DEBUG_LOG("\tpc not in table, pc=0x%llX", (uint64_t) pc);
+    return false;
+  }
+  _info.gp = 0;
+  _info.flags = 0;
+  _info.format = 0;
+  _info.unwind_info_size = sizeof(RUNTIME_FUNCTION);
+  _info.unwind_info = reinterpret_cast<unw_word_t>(unwindEntry);
+  _info.extra = base;
+  _info.start_ip = base + unwindEntry->BeginAddress;
+#ifdef _LIBUNWIND_TARGET_X86_64
+  _info.end_ip = base + unwindEntry->EndAddress;
+  // Only fill in the handler and LSDA if they're stale.
+  if (pc != getLastPC()) {
+    UNWIND_INFO *xdata = reinterpret_cast<UNWIND_INFO *>(base + unwindEntry->UnwindData);
+    if (xdata->Flags & (UNW_FLAG_EHANDLER|UNW_FLAG_UHANDLER)) {
+      // The personality is given in the UNWIND_INFO itself. The LSDA immediately
+      // follows the UNWIND_INFO. (This follows how both Clang and MSVC emit
+      // these structures.)
+      // N.B. UNWIND_INFO structs are DWORD-aligned.
+      uint32_t lastcode = (xdata->CountOfCodes + 1) & ~1;
+      const uint32_t *handler = reinterpret_cast<uint32_t *>(&xdata->UnwindCodes[lastcode]);
+      _info.lsda = reinterpret_cast<unw_word_t>(handler+1);
+      if (*handler) {
+        _info.handler = reinterpret_cast<unw_word_t>(__libunwind_seh_personality);
+      } else
+        _info.handler = 0;
+    } else {
+      _info.lsda = 0;
+      _info.handler = 0;
+    }
+  }
+#elif defined(_LIBUNWIND_TARGET_ARM)
+  _info.end_ip = _info.start_ip + unwindEntry->FunctionLength;
+  _info.lsda = 0; // FIXME
+  _info.handler = 0; // FIXME
+#endif
+  setLastPC(pc);
+  return true;
+}
+#endif
+
+
+template <typename A, typename R>
+void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) {
+  pint_t pc = (pint_t)this->getReg(UNW_REG_IP);
+#if defined(_LIBUNWIND_ARM_EHABI)
+  // Remove the thumb bit so the IP represents the actual instruction address.
+  // This matches the behaviour of _Unwind_GetIP on arm.
+  pc &= (pint_t)~0x1;
+#endif
+
+  // If the last line of a function is a "throw" the compiler sometimes
+  // emits no instructions after the call to __cxa_throw.  This means
+  // the return address is actually the start of the next function.
+  // To disambiguate this, back up the pc when we know it is a return
+  // address.
+  if (isReturnAddress)
+    --pc;
+
+  // Ask address space object to find unwind sections for this pc.
+  UnwindInfoSections sects;
+  if (_addressSpace.findUnwindSections(pc, sects)) {
+#if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
+    // If there is a compact unwind encoding table, look there first.
+    if (sects.compact_unwind_section != 0) {
+      if (this->getInfoFromCompactEncodingSection(pc, sects)) {
+  #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+        // Found info in table, done unless encoding says to use dwarf.
+        uint32_t dwarfOffset;
+        if ((sects.dwarf_section != 0) && compactSaysUseDwarf(&dwarfOffset)) {
+          if (this->getInfoFromDwarfSection(pc, sects, dwarfOffset)) {
+            // found info in dwarf, done
+            return;
+          }
+        }
+  #endif
+        // If unwind table has entry, but entry says there is no unwind info,
+        // record that we have no unwind info.
+        if (_info.format == 0)
+          _unwindInfoMissing = true;
+        return;
+      }
+    }
+#endif // defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
+
+#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
+    // If there is SEH unwind info, look there next.
+    if (this->getInfoFromSEH(pc))
+      return;
+#endif
+
+#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+    // If there is dwarf unwind info, look there next.
+    if (sects.dwarf_section != 0) {
+      if (this->getInfoFromDwarfSection(pc, sects)) {
+        // found info in dwarf, done
+        return;
+      }
+    }
+#endif
+
+#if defined(_LIBUNWIND_ARM_EHABI)
+    // If there is ARM EHABI unwind info, look there next.
+    if (sects.arm_section != 0 && this->getInfoFromEHABISection(pc, sects))
+      return;
+#endif
+  }
+
+#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+  // There is no static unwind info for this pc. Look to see if an FDE was
+  // dynamically registered for it.
+  pint_t cachedFDE = DwarfFDECache<A>::findFDE(0, pc);
+  if (cachedFDE != 0) {
+    CFI_Parser<LocalAddressSpace>::FDE_Info fdeInfo;
+    CFI_Parser<LocalAddressSpace>::CIE_Info cieInfo;
+    const char *msg = CFI_Parser<A>::decodeFDE(_addressSpace,
+                                                cachedFDE, &fdeInfo, &cieInfo);
+    if (msg == NULL) {
+      typename CFI_Parser<A>::PrologInfo prolog;
+      if (CFI_Parser<A>::parseFDEInstructions(_addressSpace, fdeInfo, cieInfo,
+                                              pc, R::getArch(), &prolog)) {
+        // save off parsed FDE info
+        _info.start_ip         = fdeInfo.pcStart;
+        _info.end_ip           = fdeInfo.pcEnd;
+        _info.lsda             = fdeInfo.lsda;
+        _info.handler          = cieInfo.personality;
+        _info.gp               = prolog.spExtraArgSize;
+                                  // Some frameless functions need SP
+                                  // altered when resuming in function.
+        _info.flags            = 0;
+        _info.format           = dwarfEncoding();
+        _info.unwind_info      = fdeInfo.fdeStart;
+        _info.unwind_info_size = (uint32_t)fdeInfo.fdeLength;
+        _info.extra            = 0;
+        return;
+      }
+    }
+  }
+
+  // Lastly, ask AddressSpace object about platform specific ways to locate
+  // other FDEs.
+  pint_t fde;
+  if (_addressSpace.findOtherFDE(pc, fde)) {
+    CFI_Parser<LocalAddressSpace>::FDE_Info fdeInfo;
+    CFI_Parser<LocalAddressSpace>::CIE_Info cieInfo;
+    if (!CFI_Parser<A>::decodeFDE(_addressSpace, fde, &fdeInfo, &cieInfo)) {
+      // Double check this FDE is for a function that includes the pc.
+      if ((fdeInfo.pcStart <= pc) && (pc < fdeInfo.pcEnd)) {
+        typename CFI_Parser<A>::PrologInfo prolog;
+        if (CFI_Parser<A>::parseFDEInstructions(_addressSpace, fdeInfo, cieInfo,
+                                                pc, R::getArch(), &prolog)) {
+          // save off parsed FDE info
+          _info.start_ip         = fdeInfo.pcStart;
+          _info.end_ip           = fdeInfo.pcEnd;
+          _info.lsda             = fdeInfo.lsda;
+          _info.handler          = cieInfo.personality;
+          _info.gp               = prolog.spExtraArgSize;
+          _info.flags            = 0;
+          _info.format           = dwarfEncoding();
+          _info.unwind_info      = fdeInfo.fdeStart;
+          _info.unwind_info_size = (uint32_t)fdeInfo.fdeLength;
+          _info.extra            = 0;
+          return;
+        }
+      }
+    }
+  }
+#endif // #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+
+  // no unwind info, flag that we can't reliably unwind
+  _unwindInfoMissing = true;
+}
+
+template <typename A, typename R>
+int UnwindCursor<A, R>::step() {
+  // Bottom of stack is defined is when unwind info cannot be found.
+  if (_unwindInfoMissing)
+    return UNW_STEP_END;
+
+  // Use unwinding info to modify register set as if function returned.
+  int result;
+#if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
+  result = this->stepWithCompactEncoding();
+#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
+  result = this->stepWithSEHData();
+#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+  result = this->stepWithDwarfFDE();
+#elif defined(_LIBUNWIND_ARM_EHABI)
+  result = this->stepWithEHABI();
+#else
+  #error Need _LIBUNWIND_SUPPORT_COMPACT_UNWIND or \
+              _LIBUNWIND_SUPPORT_SEH_UNWIND or \
+              _LIBUNWIND_SUPPORT_DWARF_UNWIND or \
+              _LIBUNWIND_ARM_EHABI
+#endif
+
+  // update info based on new PC
+  if (result == UNW_STEP_SUCCESS) {
+    this->setInfoBasedOnIPRegister(true);
+    if (_unwindInfoMissing)
+      return UNW_STEP_END;
+  }
+
+  return result;
+}
+
+template <typename A, typename R>
+void UnwindCursor<A, R>::getInfo(unw_proc_info_t *info) {
+  *info = _info;
+}
+
+template <typename A, typename R>
+bool UnwindCursor<A, R>::getFunctionName(char *buf, size_t bufLen,
+                                                           unw_word_t *offset) {
+  return _addressSpace.findFunctionName((pint_t)this->getReg(UNW_REG_IP),
+                                         buf, bufLen, offset);
+}
+
+} // namespace libunwind
+
+#endif // __UNWINDCURSOR_HPP__
diff --git a/src/coreclr/src/nativeaot/libunwind/src/UnwindLevel1-gcc-ext.c b/src/coreclr/src/nativeaot/libunwind/src/UnwindLevel1-gcc-ext.c
new file mode 100644
index 0000000000000..63e4083a45794
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/UnwindLevel1-gcc-ext.c
@@ -0,0 +1,319 @@
+//===--------------------- UnwindLevel1-gcc-ext.c -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//  Implements gcc extensions to the C++ ABI Exception Handling Level 1.
+//
+//===----------------------------------------------------------------------===//
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "config.h"
+#include "libunwind_ext.h"
+#include "libunwind.h"
+#include "Unwind-EHABI.h"
+#include "unwind.h"
+
+#if defined(_LIBUNWIND_BUILD_ZERO_COST_APIS)
+
+#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
+#define private_1 private_[0]
+#endif
+
+///  Called by __cxa_rethrow().
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object) {
+#if defined(_LIBUNWIND_ARM_EHABI)
+  _LIBUNWIND_TRACE_API("_Unwind_Resume_or_Rethrow(ex_obj=%p), private_1=%ld",
+                       (void *)exception_object,
+                       (long)exception_object->unwinder_cache.reserved1);
+#else
+  _LIBUNWIND_TRACE_API("_Unwind_Resume_or_Rethrow(ex_obj=%p), private_1=%" PRIdPTR,
+                       (void *)exception_object,
+                       (intptr_t)exception_object->private_1);
+#endif
+
+#if defined(_LIBUNWIND_ARM_EHABI)
+  // _Unwind_RaiseException on EHABI will always set the reserved1 field to 0,
+  // which is in the same position as private_1 below.
+  return _Unwind_RaiseException(exception_object);
+#else
+  // If this is non-forced and a stopping place was found, then this is a
+  // re-throw.
+  // Call _Unwind_RaiseException() as if this was a new exception
+  if (exception_object->private_1 == 0) {
+    return _Unwind_RaiseException(exception_object);
+    // Will return if there is no catch clause, so that __cxa_rethrow can call
+    // std::terminate().
+  }
+
+  // Call through to _Unwind_Resume() which distiguishes between forced and
+  // regular exceptions.
+  _Unwind_Resume(exception_object);
+  _LIBUNWIND_ABORT("_Unwind_Resume_or_Rethrow() called _Unwind_RaiseException()"
+                   " which unexpectedly returned");
+#endif
+}
+
+
+/// Called by personality handler during phase 2 to get base address for data
+/// relative encodings.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetDataRelBase(struct _Unwind_Context *context) {
+  (void)context;
+  _LIBUNWIND_TRACE_API("_Unwind_GetDataRelBase(context=%p)", (void *)context);
+  _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented");
+}
+
+
+/// Called by personality handler during phase 2 to get base address for text
+/// relative encodings.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetTextRelBase(struct _Unwind_Context *context) {
+  (void)context;
+  _LIBUNWIND_TRACE_API("_Unwind_GetTextRelBase(context=%p)", (void *)context);
+  _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented");
+}
+
+
+/// Scans unwind information to find the function that contains the
+/// specified code address "pc".
+_LIBUNWIND_EXPORT void *_Unwind_FindEnclosingFunction(void *pc) {
+  _LIBUNWIND_TRACE_API("_Unwind_FindEnclosingFunction(pc=%p)", pc);
+  // This is slow, but works.
+  // We create an unwind cursor then alter the IP to be pc
+  unw_cursor_t cursor;
+  unw_context_t uc;
+  unw_proc_info_t info;
+  __unw_getcontext(&uc);
+  __unw_init_local(&cursor, &uc);
+  __unw_set_reg(&cursor, UNW_REG_IP, (unw_word_t)(intptr_t)pc);
+  if (__unw_get_proc_info(&cursor, &info) == UNW_ESUCCESS)
+    return (void *)(intptr_t) info.start_ip;
+  else
+    return NULL;
+}
+
+/// Walk every frame and call trace function at each one.  If trace function
+/// returns anything other than _URC_NO_REASON, then walk is terminated.
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_Backtrace(_Unwind_Trace_Fn callback, void *ref) {
+  unw_cursor_t cursor;
+  unw_context_t uc;
+  __unw_getcontext(&uc);
+  __unw_init_local(&cursor, &uc);
+
+  _LIBUNWIND_TRACE_API("_Unwind_Backtrace(callback=%p)",
+                       (void *)(uintptr_t)callback);
+
+#if defined(_LIBUNWIND_ARM_EHABI)
+  // Create a mock exception object for force unwinding.
+  _Unwind_Exception ex;
+  memset(&ex, '\0', sizeof(ex));
+  ex.exception_class = 0x434C4E47554E5700; // CLNGUNW\0
+#endif
+
+  // walk each frame
+  while (true) {
+    _Unwind_Reason_Code result;
+
+#if !defined(_LIBUNWIND_ARM_EHABI)
+    // ask libunwind to get next frame (skip over first frame which is
+    // _Unwind_Backtrace())
+    if (__unw_step(&cursor) <= 0) {
+      _LIBUNWIND_TRACE_UNWINDING(" _backtrace: ended because cursor reached "
+                                 "bottom of stack, returning %d",
+                                 _URC_END_OF_STACK);
+      return _URC_END_OF_STACK;
+    }
+#else
+    // Get the information for this frame.
+    unw_proc_info_t frameInfo;
+    if (__unw_get_proc_info(&cursor, &frameInfo) != UNW_ESUCCESS) {
+      return _URC_END_OF_STACK;
+    }
+
+    // Update the pr_cache in the mock exception object.
+    const uint32_t* unwindInfo = (uint32_t *) frameInfo.unwind_info;
+    ex.pr_cache.fnstart = frameInfo.start_ip;
+    ex.pr_cache.ehtp = (_Unwind_EHT_Header *) unwindInfo;
+    ex.pr_cache.additional= frameInfo.flags;
+
+    struct _Unwind_Context *context = (struct _Unwind_Context *)&cursor;
+    // Get and call the personality function to unwind the frame.
+    __personality_routine handler = (__personality_routine) frameInfo.handler;
+    if (handler == NULL) {
+      return _URC_END_OF_STACK;
+    }
+    if (handler(_US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND, &ex, context) !=
+            _URC_CONTINUE_UNWIND) {
+      return _URC_END_OF_STACK;
+    }
+#endif // defined(_LIBUNWIND_ARM_EHABI)
+
+    // debugging
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionName[512];
+      unw_proc_info_t frame;
+      unw_word_t offset;
+      __unw_get_proc_name(&cursor, functionName, 512, &offset);
+      __unw_get_proc_info(&cursor, &frame);
+      _LIBUNWIND_TRACE_UNWINDING(
+          " _backtrace: start_ip=0x%" PRIxPTR ", func=%s, lsda=0x%" PRIxPTR ", context=%p",
+          frame.start_ip, functionName, frame.lsda,
+          (void *)&cursor);
+    }
+
+    // call trace function with this frame
+    result = (*callback)((struct _Unwind_Context *)(&cursor), ref);
+    if (result != _URC_NO_REASON) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          " _backtrace: ended because callback returned %d", result);
+      return result;
+    }
+  }
+}
+
+
+/// Find DWARF unwind info for an address 'pc' in some function.
+_LIBUNWIND_EXPORT const void *_Unwind_Find_FDE(const void *pc,
+                                               struct dwarf_eh_bases *bases) {
+  // This is slow, but works.
+  // We create an unwind cursor then alter the IP to be pc
+  unw_cursor_t cursor;
+  unw_context_t uc;
+  unw_proc_info_t info;
+  __unw_getcontext(&uc);
+  __unw_init_local(&cursor, &uc);
+  __unw_set_reg(&cursor, UNW_REG_IP, (unw_word_t)(intptr_t)pc);
+  __unw_get_proc_info(&cursor, &info);
+  bases->tbase = (uintptr_t)info.extra;
+  bases->dbase = 0; // dbase not used on Mac OS X
+  bases->func = (uintptr_t)info.start_ip;
+  _LIBUNWIND_TRACE_API("_Unwind_Find_FDE(pc=%p) => %p", pc,
+                  (void *)(intptr_t) info.unwind_info);
+  return (void *)(intptr_t) info.unwind_info;
+}
+
+/// Returns the CFA (call frame area, or stack pointer at start of function)
+/// for the current context.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_word_t result;
+  __unw_get_reg(cursor, UNW_REG_SP, &result);
+  _LIBUNWIND_TRACE_API("_Unwind_GetCFA(context=%p) => 0x%" PRIxPTR,
+                       (void *)context, result);
+  return (uintptr_t)result;
+}
+
+
+/// Called by personality handler during phase 2 to get instruction pointer.
+/// ipBefore is a boolean that says if IP is already adjusted to be the call
+/// site address.  Normally IP is the return address.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context,
+                                              int *ipBefore) {
+  _LIBUNWIND_TRACE_API("_Unwind_GetIPInfo(context=%p)", (void *)context);
+  *ipBefore = 0;
+  return _Unwind_GetIP(context);
+}
+
+#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+
+/// Called by programs with dynamic code generators that want
+/// to register a dynamically generated FDE.
+/// This function has existed on Mac OS X since 10.4, but
+/// was broken until 10.6.
+_LIBUNWIND_EXPORT void __register_frame(const void *fde) {
+  _LIBUNWIND_TRACE_API("__register_frame(%p)", fde);
+  __unw_add_dynamic_fde((unw_word_t)(uintptr_t)fde);
+}
+
+
+/// Called by programs with dynamic code generators that want
+/// to unregister a dynamically generated FDE.
+/// This function has existed on Mac OS X since 10.4, but
+/// was broken until 10.6.
+_LIBUNWIND_EXPORT void __deregister_frame(const void *fde) {
+  _LIBUNWIND_TRACE_API("__deregister_frame(%p)", fde);
+  __unw_remove_dynamic_fde((unw_word_t)(uintptr_t)fde);
+}
+
+
+// The following register/deregister functions are gcc extensions.
+// They have existed on Mac OS X, but have never worked because Mac OS X
+// before 10.6 used keymgr to track known FDEs, but these functions
+// never got updated to use keymgr.
+// For now, we implement these as do-nothing functions to keep any existing
+// applications working.  We also add the not in 10.6 symbol so that nwe
+// application won't be able to use them.
+
+#if defined(_LIBUNWIND_SUPPORT_FRAME_APIS)
+_LIBUNWIND_EXPORT void __register_frame_info_bases(const void *fde, void *ob,
+                                                   void *tb, void *db) {
+  (void)fde;
+  (void)ob;
+  (void)tb;
+  (void)db;
+ _LIBUNWIND_TRACE_API("__register_frame_info_bases(%p,%p, %p, %p)",
+                            fde, ob, tb, db);
+  // do nothing, this function never worked in Mac OS X
+}
+
+_LIBUNWIND_EXPORT void __register_frame_info(const void *fde, void *ob) {
+  (void)fde;
+  (void)ob;
+  _LIBUNWIND_TRACE_API("__register_frame_info(%p, %p)", fde, ob);
+  // do nothing, this function never worked in Mac OS X
+}
+
+_LIBUNWIND_EXPORT void __register_frame_info_table_bases(const void *fde,
+                                                         void *ob, void *tb,
+                                                         void *db) {
+  (void)fde;
+  (void)ob;
+  (void)tb;
+  (void)db;
+  _LIBUNWIND_TRACE_API("__register_frame_info_table_bases"
+                             "(%p,%p, %p, %p)", fde, ob, tb, db);
+  // do nothing, this function never worked in Mac OS X
+}
+
+_LIBUNWIND_EXPORT void __register_frame_info_table(const void *fde, void *ob) {
+  (void)fde;
+  (void)ob;
+  _LIBUNWIND_TRACE_API("__register_frame_info_table(%p, %p)", fde, ob);
+  // do nothing, this function never worked in Mac OS X
+}
+
+_LIBUNWIND_EXPORT void __register_frame_table(const void *fde) {
+  (void)fde;
+  _LIBUNWIND_TRACE_API("__register_frame_table(%p)", fde);
+  // do nothing, this function never worked in Mac OS X
+}
+
+_LIBUNWIND_EXPORT void *__deregister_frame_info(const void *fde) {
+  (void)fde;
+  _LIBUNWIND_TRACE_API("__deregister_frame_info(%p)", fde);
+  // do nothing, this function never worked in Mac OS X
+  return NULL;
+}
+
+_LIBUNWIND_EXPORT void *__deregister_frame_info_bases(const void *fde) {
+  (void)fde;
+  _LIBUNWIND_TRACE_API("__deregister_frame_info_bases(%p)", fde);
+  // do nothing, this function never worked in Mac OS X
+  return NULL;
+}
+#endif // defined(_LIBUNWIND_SUPPORT_FRAME_APIS)
+
+#endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+
+#endif // defined(_LIBUNWIND_BUILD_ZERO_COST_APIS)
diff --git a/src/coreclr/src/nativeaot/libunwind/src/UnwindLevel1.c b/src/coreclr/src/nativeaot/libunwind/src/UnwindLevel1.c
new file mode 100644
index 0000000000000..bcb1a7fbec2a1
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/UnwindLevel1.c
@@ -0,0 +1,515 @@
+//===------------------------- UnwindLevel1.c -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// Implements C++ ABI Exception Handling Level 1 as documented at:
+//      https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html
+// using libunwind
+//
+//===----------------------------------------------------------------------===//
+
+// ARM EHABI does not specify _Unwind_{Get,Set}{GR,IP}().  Thus, we are
+// defining inline functions to delegate the function calls to
+// _Unwind_VRS_{Get,Set}().  However, some applications might declare the
+// function protetype directly (instead of including <unwind.h>), thus we need
+// to export these functions from libunwind.so as well.
+#define _LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE 1
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "config.h"
+#include "libunwind.h"
+#include "libunwind_ext.h"
+#include "unwind.h"
+
+#if !defined(_LIBUNWIND_ARM_EHABI) && !defined(__USING_SJLJ_EXCEPTIONS__)
+
+#ifndef _LIBUNWIND_SUPPORT_SEH_UNWIND
+
+static _Unwind_Reason_Code
+unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) {
+  __unw_init_local(cursor, uc);
+
+  // Walk each frame looking for a place to stop.
+  bool handlerNotFound = true;
+  while (handlerNotFound) {
+    // Ask libunwind to get next frame (skip over first which is
+    // _Unwind_RaiseException).
+    int stepResult = __unw_step(cursor);
+    if (stepResult == 0) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase1(ex_ojb=%p): __unw_step() reached "
+          "bottom => _URC_END_OF_STACK",
+          (void *)exception_object);
+      return _URC_END_OF_STACK;
+    } else if (stepResult < 0) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase1(ex_ojb=%p): __unw_step failed => "
+          "_URC_FATAL_PHASE1_ERROR",
+          (void *)exception_object);
+      return _URC_FATAL_PHASE1_ERROR;
+    }
+
+    // See if frame has code to run (has personality routine).
+    unw_proc_info_t frameInfo;
+    unw_word_t sp;
+    if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase1(ex_ojb=%p): __unw_get_proc_info "
+          "failed => _URC_FATAL_PHASE1_ERROR",
+          (void *)exception_object);
+      return _URC_FATAL_PHASE1_ERROR;
+    }
+
+    // When tracing, print state information.
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionBuf[512];
+      const char *functionName = functionBuf;
+      unw_word_t offset;
+      if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
+                               &offset) != UNW_ESUCCESS) ||
+          (frameInfo.start_ip + offset > frameInfo.end_ip))
+        functionName = ".anonymous.";
+      unw_word_t pc;
+      __unw_get_reg(cursor, UNW_REG_IP, &pc);
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase1(ex_ojb=%p): pc=0x%" PRIxPTR ", start_ip=0x%" PRIxPTR
+          ", func=%s, lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR "",
+          (void *)exception_object, pc, frameInfo.start_ip, functionName,
+          frameInfo.lsda, frameInfo.handler);
+    }
+
+    // If there is a personality routine, ask it if it will want to stop at
+    // this frame.
+    if (frameInfo.handler != 0) {
+      __personality_routine p =
+          (__personality_routine)(uintptr_t)(frameInfo.handler);
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase1(ex_ojb=%p): calling personality function %p",
+          (void *)exception_object, (void *)(uintptr_t)p);
+      _Unwind_Reason_Code personalityResult =
+          (*p)(1, _UA_SEARCH_PHASE, exception_object->exception_class,
+               exception_object, (struct _Unwind_Context *)(cursor));
+      switch (personalityResult) {
+      case _URC_HANDLER_FOUND:
+        // found a catch clause or locals that need destructing in this frame
+        // stop search and remember stack pointer at the frame
+        handlerNotFound = false;
+        __unw_get_reg(cursor, UNW_REG_SP, &sp);
+        exception_object->private_2 = (uintptr_t)sp;
+        _LIBUNWIND_TRACE_UNWINDING(
+            "unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND",
+            (void *)exception_object);
+        return _URC_NO_REASON;
+
+      case _URC_CONTINUE_UNWIND:
+        _LIBUNWIND_TRACE_UNWINDING(
+            "unwind_phase1(ex_ojb=%p): _URC_CONTINUE_UNWIND",
+            (void *)exception_object);
+        // continue unwinding
+        break;
+
+      default:
+        // something went wrong
+        _LIBUNWIND_TRACE_UNWINDING(
+            "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR",
+            (void *)exception_object);
+        return _URC_FATAL_PHASE1_ERROR;
+      }
+    }
+  }
+  return _URC_NO_REASON;
+}
+
+
+static _Unwind_Reason_Code
+unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) {
+  __unw_init_local(cursor, uc);
+
+  _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)",
+                             (void *)exception_object);
+
+  // Walk each frame until we reach where search phase said to stop.
+  while (true) {
+
+    // Ask libunwind to get next frame (skip over first which is
+    // _Unwind_RaiseException).
+    int stepResult = __unw_step(cursor);
+    if (stepResult == 0) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2(ex_ojb=%p): __unw_step() reached "
+          "bottom => _URC_END_OF_STACK",
+          (void *)exception_object);
+      return _URC_END_OF_STACK;
+    } else if (stepResult < 0) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2(ex_ojb=%p): __unw_step failed => "
+          "_URC_FATAL_PHASE1_ERROR",
+          (void *)exception_object);
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
+    // Get info about this frame.
+    unw_word_t sp;
+    unw_proc_info_t frameInfo;
+    __unw_get_reg(cursor, UNW_REG_SP, &sp);
+    if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2(ex_ojb=%p): __unw_get_proc_info "
+          "failed => _URC_FATAL_PHASE1_ERROR",
+          (void *)exception_object);
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
+    // When tracing, print state information.
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionBuf[512];
+      const char *functionName = functionBuf;
+      unw_word_t offset;
+      if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
+                               &offset) != UNW_ESUCCESS) ||
+          (frameInfo.start_ip + offset > frameInfo.end_ip))
+        functionName = ".anonymous.";
+      _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): start_ip=0x%" PRIxPTR
+                                 ", func=%s, sp=0x%" PRIxPTR ", lsda=0x%" PRIxPTR
+                                 ", personality=0x%" PRIxPTR,
+                                 (void *)exception_object, frameInfo.start_ip,
+                                 functionName, sp, frameInfo.lsda,
+                                 frameInfo.handler);
+    }
+
+    // If there is a personality routine, tell it we are unwinding.
+    if (frameInfo.handler != 0) {
+      __personality_routine p =
+          (__personality_routine)(uintptr_t)(frameInfo.handler);
+      _Unwind_Action action = _UA_CLEANUP_PHASE;
+      if (sp == exception_object->private_2) {
+        // Tell personality this was the frame it marked in phase 1.
+        action = (_Unwind_Action)(_UA_CLEANUP_PHASE | _UA_HANDLER_FRAME);
+      }
+       _Unwind_Reason_Code personalityResult =
+          (*p)(1, action, exception_object->exception_class, exception_object,
+               (struct _Unwind_Context *)(cursor));
+      switch (personalityResult) {
+      case _URC_CONTINUE_UNWIND:
+        // Continue unwinding
+        _LIBUNWIND_TRACE_UNWINDING(
+            "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND",
+            (void *)exception_object);
+        if (sp == exception_object->private_2) {
+          // Phase 1 said we would stop at this frame, but we did not...
+          _LIBUNWIND_ABORT("during phase1 personality function said it would "
+                           "stop here, but now in phase2 it did not stop here");
+        }
+        break;
+      case _URC_INSTALL_CONTEXT:
+        _LIBUNWIND_TRACE_UNWINDING(
+            "unwind_phase2(ex_ojb=%p): _URC_INSTALL_CONTEXT",
+            (void *)exception_object);
+        // Personality routine says to transfer control to landing pad.
+        // We may get control back if landing pad calls _Unwind_Resume().
+        if (_LIBUNWIND_TRACING_UNWINDING) {
+          unw_word_t pc;
+          __unw_get_reg(cursor, UNW_REG_IP, &pc);
+          __unw_get_reg(cursor, UNW_REG_SP, &sp);
+          _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering "
+                                     "user code with ip=0x%" PRIxPTR
+                                     ", sp=0x%" PRIxPTR,
+                                     (void *)exception_object, pc, sp);
+        }
+        __unw_resume(cursor);
+        // __unw_resume() only returns if there was an error.
+        return _URC_FATAL_PHASE2_ERROR;
+      default:
+        // Personality routine returned an unknown result code.
+        _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d",
+                             personalityResult);
+        return _URC_FATAL_PHASE2_ERROR;
+      }
+    }
+  }
+
+  // Clean up phase did not resume at the frame that the search phase
+  // said it would...
+  return _URC_FATAL_PHASE2_ERROR;
+}
+
+static _Unwind_Reason_Code
+unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
+                     _Unwind_Exception *exception_object,
+                     _Unwind_Stop_Fn stop, void *stop_parameter) {
+  __unw_init_local(cursor, uc);
+
+  // Walk each frame until we reach where search phase said to stop
+  while (__unw_step(cursor) > 0) {
+
+    // Update info about this frame.
+    unw_proc_info_t frameInfo;
+    if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
+      _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): __unw_step "
+                                 "failed => _URC_END_OF_STACK",
+                                 (void *)exception_object);
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
+    // When tracing, print state information.
+    if (_LIBUNWIND_TRACING_UNWINDING) {
+      char functionBuf[512];
+      const char *functionName = functionBuf;
+      unw_word_t offset;
+      if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
+                               &offset) != UNW_ESUCCESS) ||
+          (frameInfo.start_ip + offset > frameInfo.end_ip))
+        functionName = ".anonymous.";
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2_forced(ex_ojb=%p): start_ip=0x%" PRIxPTR
+          ", func=%s, lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR,
+          (void *)exception_object, frameInfo.start_ip, functionName,
+          frameInfo.lsda, frameInfo.handler);
+    }
+
+    // Call stop function at each frame.
+    _Unwind_Action action =
+        (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE);
+    _Unwind_Reason_Code stopResult =
+        (*stop)(1, action, exception_object->exception_class, exception_object,
+                (struct _Unwind_Context *)(cursor), stop_parameter);
+    _LIBUNWIND_TRACE_UNWINDING(
+        "unwind_phase2_forced(ex_ojb=%p): stop function returned %d",
+        (void *)exception_object, stopResult);
+    if (stopResult != _URC_NO_REASON) {
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2_forced(ex_ojb=%p): stopped by stop function",
+          (void *)exception_object);
+      return _URC_FATAL_PHASE2_ERROR;
+    }
+
+    // If there is a personality routine, tell it we are unwinding.
+    if (frameInfo.handler != 0) {
+      __personality_routine p =
+          (__personality_routine)(intptr_t)(frameInfo.handler);
+      _LIBUNWIND_TRACE_UNWINDING(
+          "unwind_phase2_forced(ex_ojb=%p): calling personality function %p",
+          (void *)exception_object, (void *)(uintptr_t)p);
+      _Unwind_Reason_Code personalityResult =
+          (*p)(1, action, exception_object->exception_class, exception_object,
+               (struct _Unwind_Context *)(cursor));
+      switch (personalityResult) {
+      case _URC_CONTINUE_UNWIND:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                   "personality returned "
+                                   "_URC_CONTINUE_UNWIND",
+                                   (void *)exception_object);
+        // Destructors called, continue unwinding
+        break;
+      case _URC_INSTALL_CONTEXT:
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                   "personality returned "
+                                   "_URC_INSTALL_CONTEXT",
+                                   (void *)exception_object);
+        // We may get control back if landing pad calls _Unwind_Resume().
+        __unw_resume(cursor);
+        break;
+      default:
+        // Personality routine returned an unknown result code.
+        _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+                                   "personality returned %d, "
+                                   "_URC_FATAL_PHASE2_ERROR",
+                                   (void *)exception_object, personalityResult);
+        return _URC_FATAL_PHASE2_ERROR;
+      }
+    }
+  }
+
+  // Call stop function one last time and tell it we've reached the end
+  // of the stack.
+  _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop "
+                             "function with _UA_END_OF_STACK",
+                             (void *)exception_object);
+  _Unwind_Action lastAction =
+      (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK);
+  (*stop)(1, lastAction, exception_object->exception_class, exception_object,
+          (struct _Unwind_Context *)(cursor), stop_parameter);
+
+  // Clean up phase did not resume at the frame that the search phase said it
+  // would.
+  return _URC_FATAL_PHASE2_ERROR;
+}
+
+
+/// Called by __cxa_throw.  Only returns if there is a fatal error.
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_RaiseException(_Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)",
+                       (void *)exception_object);
+  unw_context_t uc;
+  unw_cursor_t cursor;
+  __unw_getcontext(&uc);
+
+  // Mark that this is a non-forced unwind, so _Unwind_Resume()
+  // can do the right thing.
+  exception_object->private_1 = 0;
+  exception_object->private_2 = 0;
+
+  // phase 1: the search phase
+  _Unwind_Reason_Code phase1 = unwind_phase1(&uc, &cursor, exception_object);
+  if (phase1 != _URC_NO_REASON)
+    return phase1;
+
+  // phase 2: the clean up phase
+  return unwind_phase2(&uc, &cursor, exception_object);
+}
+
+
+
+/// When _Unwind_RaiseException() is in phase2, it hands control
+/// to the personality function at each frame.  The personality
+/// may force a jump to a landing pad in that function, the landing
+/// pad code may then call _Unwind_Resume() to continue with the
+/// unwinding.  Note: the call to _Unwind_Resume() is from compiler
+/// geneated user code.  All other _Unwind_* routines are called
+/// by the C++ runtime __cxa_* routines.
+///
+/// Note: re-throwing an exception (as opposed to continuing the unwind)
+/// is implemented by having the code call __cxa_rethrow() which
+/// in turn calls _Unwind_Resume_or_Rethrow().
+_LIBUNWIND_EXPORT void
+_Unwind_Resume(_Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)", (void *)exception_object);
+  unw_context_t uc;
+  unw_cursor_t cursor;
+  __unw_getcontext(&uc);
+
+  if (exception_object->private_1 != 0)
+    unwind_phase2_forced(&uc, &cursor, exception_object,
+                         (_Unwind_Stop_Fn) exception_object->private_1,
+                         (void *)exception_object->private_2);
+  else
+    unwind_phase2(&uc, &cursor, exception_object);
+
+  // Clients assume _Unwind_Resume() does not return, so all we can do is abort.
+  _LIBUNWIND_ABORT("_Unwind_Resume() can't return");
+}
+
+
+
+/// Not used by C++.
+/// Unwinds stack, calling "stop" function at each frame.
+/// Could be used to implement longjmp().
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_ForcedUnwind(_Unwind_Exception *exception_object,
+                     _Unwind_Stop_Fn stop, void *stop_parameter) {
+  _LIBUNWIND_TRACE_API("_Unwind_ForcedUnwind(ex_obj=%p, stop=%p)",
+                       (void *)exception_object, (void *)(uintptr_t)stop);
+  unw_context_t uc;
+  unw_cursor_t cursor;
+  __unw_getcontext(&uc);
+
+  // Mark that this is a forced unwind, so _Unwind_Resume() can do
+  // the right thing.
+  exception_object->private_1 = (uintptr_t) stop;
+  exception_object->private_2 = (uintptr_t) stop_parameter;
+
+  // do it
+  return unwind_phase2_forced(&uc, &cursor, exception_object, stop, stop_parameter);
+}
+
+
+/// Called by personality handler during phase 2 to get LSDA for current frame.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_proc_info_t frameInfo;
+  uintptr_t result = 0;
+  if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
+    result = (uintptr_t)frameInfo.lsda;
+  _LIBUNWIND_TRACE_API(
+      "_Unwind_GetLanguageSpecificData(context=%p) => 0x%" PRIxPTR,
+      (void *)context, result);
+  if (result != 0) {
+    if (*((uint8_t *)result) != 0xFF)
+      _LIBUNWIND_DEBUG_LOG("lsda at 0x%" PRIxPTR " does not start with 0xFF",
+                           result);
+  }
+  return result;
+}
+
+
+/// Called by personality handler during phase 2 to find the start of the
+/// function.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetRegionStart(struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_proc_info_t frameInfo;
+  uintptr_t result = 0;
+  if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
+    result = (uintptr_t)frameInfo.start_ip;
+  _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%" PRIxPTR,
+                       (void *)context, result);
+  return result;
+}
+
+#endif // !_LIBUNWIND_SUPPORT_SEH_UNWIND
+
+/// Called by personality handler during phase 2 if a foreign exception
+// is caught.
+_LIBUNWIND_EXPORT void
+_Unwind_DeleteException(_Unwind_Exception *exception_object) {
+  _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)",
+                       (void *)exception_object);
+  if (exception_object->exception_cleanup != NULL)
+    (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT,
+                                           exception_object);
+}
+
+/// Called by personality handler during phase 2 to get register values.
+_LIBUNWIND_EXPORT uintptr_t
+_Unwind_GetGR(struct _Unwind_Context *context, int index) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_word_t result;
+  __unw_get_reg(cursor, index, &result);
+  _LIBUNWIND_TRACE_API("_Unwind_GetGR(context=%p, reg=%d) => 0x%" PRIxPTR,
+                       (void *)context, index, result);
+  return (uintptr_t)result;
+}
+
+/// Called by personality handler during phase 2 to alter register values.
+_LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index,
+                                     uintptr_t value) {
+  _LIBUNWIND_TRACE_API("_Unwind_SetGR(context=%p, reg=%d, value=0x%0" PRIxPTR
+                       ")",
+                       (void *)context, index, value);
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  __unw_set_reg(cursor, index, value);
+}
+
+/// Called by personality handler during phase 2 to get instruction pointer.
+_LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) {
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  unw_word_t result;
+  __unw_get_reg(cursor, UNW_REG_IP, &result);
+  _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%" PRIxPTR,
+                       (void *)context, result);
+  return (uintptr_t)result;
+}
+
+/// Called by personality handler during phase 2 to alter instruction pointer,
+/// such as setting where the landing pad is, so _Unwind_Resume() will
+/// start executing in the landing pad.
+_LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context,
+                                     uintptr_t value) {
+  _LIBUNWIND_TRACE_API("_Unwind_SetIP(context=%p, value=0x%0" PRIxPTR ")",
+                       (void *)context, value);
+  unw_cursor_t *cursor = (unw_cursor_t *)context;
+  __unw_set_reg(cursor, UNW_REG_IP, value);
+}
+
+#endif // !defined(_LIBUNWIND_ARM_EHABI) && !defined(__USING_SJLJ_EXCEPTIONS__)
diff --git a/src/coreclr/src/nativeaot/libunwind/src/UnwindRegistersRestore.S b/src/coreclr/src/nativeaot/libunwind/src/UnwindRegistersRestore.S
new file mode 100644
index 0000000000000..01113565e8e00
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/UnwindRegistersRestore.S
@@ -0,0 +1,1032 @@
+//===-------------------- UnwindRegistersRestore.S ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+  .text
+
+#if !defined(__USING_SJLJ_EXCEPTIONS__)
+
+#if defined(__i386__)
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_x866jumptoEv)
+#
+# void libunwind::Registers_x86::jumpto()
+#
+#if defined(_WIN32)
+# On windows, the 'this' pointer is passed in ecx instead of on the stack
+  movl   %ecx, %eax
+#else
+# On entry:
+#  +                       +
+#  +-----------------------+
+#  + thread_state pointer  +
+#  +-----------------------+
+#  + return address        +
+#  +-----------------------+   <-- SP
+#  +                       +
+  movl   4(%esp), %eax
+#endif
+  # set up eax and ret on new stack location
+  movl  28(%eax), %edx # edx holds new stack pointer
+  subl  $8,%edx
+  movl  %edx, 28(%eax)
+  movl  0(%eax), %ebx
+  movl  %ebx, 0(%edx)
+  movl  40(%eax), %ebx
+  movl  %ebx, 4(%edx)
+  # we now have ret and eax pushed onto where new stack will be
+  # restore all registers
+  movl   4(%eax), %ebx
+  movl   8(%eax), %ecx
+  movl  12(%eax), %edx
+  movl  16(%eax), %edi
+  movl  20(%eax), %esi
+  movl  24(%eax), %ebp
+  movl  28(%eax), %esp
+  # skip ss
+  # skip eflags
+  pop    %eax  # eax was already pushed on new stack
+  ret        # eip was already pushed on new stack
+  # skip cs
+  # skip ds
+  # skip es
+  # skip fs
+  # skip gs
+
+#elif defined(__x86_64__)
+
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind16Registers_x86_646jumptoEv)
+#
+# void libunwind::Registers_x86_64::jumpto()
+#
+#if defined(_WIN64)
+# On entry, thread_state pointer is in rcx; move it into rdi
+# to share restore code below. Since this routine restores and
+# overwrites all registers, we can use the same registers for
+# pointers and temporaries as on unix even though win64 normally
+# mustn't clobber some of them.
+  movq  %rcx, %rdi
+#else
+# On entry, thread_state pointer is in rdi
+#endif
+
+  movq  56(%rdi), %rax # rax holds new stack pointer
+  subq  $16, %rax
+  movq  %rax, 56(%rdi)
+  movq  32(%rdi), %rbx  # store new rdi on new stack
+  movq  %rbx, 0(%rax)
+  movq  128(%rdi), %rbx # store new rip on new stack
+  movq  %rbx, 8(%rax)
+  # restore all registers
+  movq    0(%rdi), %rax
+  movq    8(%rdi), %rbx
+  movq   16(%rdi), %rcx
+  movq   24(%rdi), %rdx
+  # restore rdi later
+  movq   40(%rdi), %rsi
+  movq   48(%rdi), %rbp
+  # restore rsp later
+  movq   64(%rdi), %r8
+  movq   72(%rdi), %r9
+  movq   80(%rdi), %r10
+  movq   88(%rdi), %r11
+  movq   96(%rdi), %r12
+  movq  104(%rdi), %r13
+  movq  112(%rdi), %r14
+  movq  120(%rdi), %r15
+  # skip rflags
+  # skip cs
+  # skip fs
+  # skip gs
+
+#if defined(_WIN64)
+  movdqu 176(%rdi),%xmm0
+  movdqu 192(%rdi),%xmm1
+  movdqu 208(%rdi),%xmm2
+  movdqu 224(%rdi),%xmm3
+  movdqu 240(%rdi),%xmm4
+  movdqu 256(%rdi),%xmm5
+  movdqu 272(%rdi),%xmm6
+  movdqu 288(%rdi),%xmm7
+  movdqu 304(%rdi),%xmm8
+  movdqu 320(%rdi),%xmm9
+  movdqu 336(%rdi),%xmm10
+  movdqu 352(%rdi),%xmm11
+  movdqu 368(%rdi),%xmm12
+  movdqu 384(%rdi),%xmm13
+  movdqu 400(%rdi),%xmm14
+  movdqu 416(%rdi),%xmm15
+#endif
+  movq  56(%rdi), %rsp  # cut back rsp to new location
+  pop    %rdi      # rdi was saved here earlier
+  ret            # rip was saved here
+
+
+#elif defined(__powerpc64__)
+
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_ppc646jumptoEv)
+//
+// void libunwind::Registers_ppc64::jumpto()
+//
+// On entry:
+//  thread_state pointer is in r3
+//
+
+// load register (GPR)
+#define PPC64_LR(n) \
+  ld    %r##n, (8 * (n + 2))(%r3)
+
+  // restore integral registers
+  // skip r0 for now
+  // skip r1 for now
+  PPC64_LR(2)
+  // skip r3 for now
+  // skip r4 for now
+  // skip r5 for now
+  PPC64_LR(6)
+  PPC64_LR(7)
+  PPC64_LR(8)
+  PPC64_LR(9)
+  PPC64_LR(10)
+  PPC64_LR(11)
+  PPC64_LR(12)
+  PPC64_LR(13)
+  PPC64_LR(14)
+  PPC64_LR(15)
+  PPC64_LR(16)
+  PPC64_LR(17)
+  PPC64_LR(18)
+  PPC64_LR(19)
+  PPC64_LR(20)
+  PPC64_LR(21)
+  PPC64_LR(22)
+  PPC64_LR(23)
+  PPC64_LR(24)
+  PPC64_LR(25)
+  PPC64_LR(26)
+  PPC64_LR(27)
+  PPC64_LR(28)
+  PPC64_LR(29)
+  PPC64_LR(30)
+  PPC64_LR(31)
+
+#ifdef PPC64_HAS_VMX
+
+  // restore VS registers
+  // (note that this also restores floating point registers and V registers,
+  // because part of VS is mapped to these registers)
+
+  addi  %r4, %r3, PPC64_OFFS_FP
+
+// load VS register
+#define PPC64_LVS(n)         \
+  lxvd2x  %vs##n, 0, %r4    ;\
+  addi    %r4, %r4, 16
+
+  // restore the first 32 VS regs (and also all floating point regs)
+  PPC64_LVS(0)
+  PPC64_LVS(1)
+  PPC64_LVS(2)
+  PPC64_LVS(3)
+  PPC64_LVS(4)
+  PPC64_LVS(5)
+  PPC64_LVS(6)
+  PPC64_LVS(7)
+  PPC64_LVS(8)
+  PPC64_LVS(9)
+  PPC64_LVS(10)
+  PPC64_LVS(11)
+  PPC64_LVS(12)
+  PPC64_LVS(13)
+  PPC64_LVS(14)
+  PPC64_LVS(15)
+  PPC64_LVS(16)
+  PPC64_LVS(17)
+  PPC64_LVS(18)
+  PPC64_LVS(19)
+  PPC64_LVS(20)
+  PPC64_LVS(21)
+  PPC64_LVS(22)
+  PPC64_LVS(23)
+  PPC64_LVS(24)
+  PPC64_LVS(25)
+  PPC64_LVS(26)
+  PPC64_LVS(27)
+  PPC64_LVS(28)
+  PPC64_LVS(29)
+  PPC64_LVS(30)
+  PPC64_LVS(31)
+
+  // use VRSAVE to conditionally restore the remaining VS regs,
+  // that are where the V regs are mapped
+
+  ld    %r5, PPC64_OFFS_VRSAVE(%r3)   // test VRsave
+  cmpwi %r5, 0
+  beq   Lnovec
+
+// conditionally load VS
+#define PPC64_CLVS_BOTTOM(n)               \
+  beq    Ldone##n                         ;\
+  addi   %r4, %r3, PPC64_OFFS_FP + n * 16 ;\
+  lxvd2x %vs##n, 0, %r4                   ;\
+Ldone##n:
+
+#define PPC64_CLVSl(n)           \
+  andis. %r0, %r5, (1<<(47-n))  ;\
+PPC64_CLVS_BOTTOM(n)
+
+#define PPC64_CLVSh(n)           \
+  andi.  %r0, %r5, (1<<(63-n))  ;\
+PPC64_CLVS_BOTTOM(n)
+
+  PPC64_CLVSl(32)
+  PPC64_CLVSl(33)
+  PPC64_CLVSl(34)
+  PPC64_CLVSl(35)
+  PPC64_CLVSl(36)
+  PPC64_CLVSl(37)
+  PPC64_CLVSl(38)
+  PPC64_CLVSl(39)
+  PPC64_CLVSl(40)
+  PPC64_CLVSl(41)
+  PPC64_CLVSl(42)
+  PPC64_CLVSl(43)
+  PPC64_CLVSl(44)
+  PPC64_CLVSl(45)
+  PPC64_CLVSl(46)
+  PPC64_CLVSl(47)
+  PPC64_CLVSh(48)
+  PPC64_CLVSh(49)
+  PPC64_CLVSh(50)
+  PPC64_CLVSh(51)
+  PPC64_CLVSh(52)
+  PPC64_CLVSh(53)
+  PPC64_CLVSh(54)
+  PPC64_CLVSh(55)
+  PPC64_CLVSh(56)
+  PPC64_CLVSh(57)
+  PPC64_CLVSh(58)
+  PPC64_CLVSh(59)
+  PPC64_CLVSh(60)
+  PPC64_CLVSh(61)
+  PPC64_CLVSh(62)
+  PPC64_CLVSh(63)
+
+#else
+
+// load FP register
+#define PPC64_LF(n) \
+  lfd   %f##n, (PPC64_OFFS_FP + n * 16)(%r3)
+
+  // restore float registers
+  PPC64_LF(0)
+  PPC64_LF(1)
+  PPC64_LF(2)
+  PPC64_LF(3)
+  PPC64_LF(4)
+  PPC64_LF(5)
+  PPC64_LF(6)
+  PPC64_LF(7)
+  PPC64_LF(8)
+  PPC64_LF(9)
+  PPC64_LF(10)
+  PPC64_LF(11)
+  PPC64_LF(12)
+  PPC64_LF(13)
+  PPC64_LF(14)
+  PPC64_LF(15)
+  PPC64_LF(16)
+  PPC64_LF(17)
+  PPC64_LF(18)
+  PPC64_LF(19)
+  PPC64_LF(20)
+  PPC64_LF(21)
+  PPC64_LF(22)
+  PPC64_LF(23)
+  PPC64_LF(24)
+  PPC64_LF(25)
+  PPC64_LF(26)
+  PPC64_LF(27)
+  PPC64_LF(28)
+  PPC64_LF(29)
+  PPC64_LF(30)
+  PPC64_LF(31)
+
+  // restore vector registers if any are in use
+  ld    %r5, PPC64_OFFS_VRSAVE(%r3)   // test VRsave
+  cmpwi %r5, 0
+  beq   Lnovec
+
+  subi  %r4, %r1, 16
+  // r4 is now a 16-byte aligned pointer into the red zone
+  // the _vectorScalarRegisters may not be 16-byte aligned
+  // so copy via red zone temp buffer
+
+#define PPC64_CLV_UNALIGNED_BOTTOM(n)            \
+  beq    Ldone##n                               ;\
+  ld     %r0, (PPC64_OFFS_V + n * 16)(%r3)      ;\
+  std    %r0, 0(%r4)                            ;\
+  ld     %r0, (PPC64_OFFS_V + n * 16 + 8)(%r3)  ;\
+  std    %r0, 8(%r4)                            ;\
+  lvx    %v##n, 0, %r4                          ;\
+Ldone  ## n:
+
+#define PPC64_CLV_UNALIGNEDl(n)  \
+  andis. %r0, %r5, (1<<(15-n))  ;\
+PPC64_CLV_UNALIGNED_BOTTOM(n)
+
+#define PPC64_CLV_UNALIGNEDh(n)  \
+  andi.  %r0, %r5, (1<<(31-n))  ;\
+PPC64_CLV_UNALIGNED_BOTTOM(n)
+
+  PPC64_CLV_UNALIGNEDl(0)
+  PPC64_CLV_UNALIGNEDl(1)
+  PPC64_CLV_UNALIGNEDl(2)
+  PPC64_CLV_UNALIGNEDl(3)
+  PPC64_CLV_UNALIGNEDl(4)
+  PPC64_CLV_UNALIGNEDl(5)
+  PPC64_CLV_UNALIGNEDl(6)
+  PPC64_CLV_UNALIGNEDl(7)
+  PPC64_CLV_UNALIGNEDl(8)
+  PPC64_CLV_UNALIGNEDl(9)
+  PPC64_CLV_UNALIGNEDl(10)
+  PPC64_CLV_UNALIGNEDl(11)
+  PPC64_CLV_UNALIGNEDl(12)
+  PPC64_CLV_UNALIGNEDl(13)
+  PPC64_CLV_UNALIGNEDl(14)
+  PPC64_CLV_UNALIGNEDl(15)
+  PPC64_CLV_UNALIGNEDh(16)
+  PPC64_CLV_UNALIGNEDh(17)
+  PPC64_CLV_UNALIGNEDh(18)
+  PPC64_CLV_UNALIGNEDh(19)
+  PPC64_CLV_UNALIGNEDh(20)
+  PPC64_CLV_UNALIGNEDh(21)
+  PPC64_CLV_UNALIGNEDh(22)
+  PPC64_CLV_UNALIGNEDh(23)
+  PPC64_CLV_UNALIGNEDh(24)
+  PPC64_CLV_UNALIGNEDh(25)
+  PPC64_CLV_UNALIGNEDh(26)
+  PPC64_CLV_UNALIGNEDh(27)
+  PPC64_CLV_UNALIGNEDh(28)
+  PPC64_CLV_UNALIGNEDh(29)
+  PPC64_CLV_UNALIGNEDh(30)
+  PPC64_CLV_UNALIGNEDh(31)
+
+#endif
+
+Lnovec:
+  ld    %r0, PPC64_OFFS_CR(%r3)
+  mtcr  %r0
+  ld    %r0, PPC64_OFFS_SRR0(%r3)
+  mtctr %r0
+
+  PPC64_LR(0)
+  PPC64_LR(5)
+  PPC64_LR(4)
+  PPC64_LR(1)
+  PPC64_LR(3)
+  bctr
+
+#elif defined(__ppc__)
+
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv)
+//
+// void libunwind::Registers_ppc::jumpto()
+//
+// On entry:
+//  thread_state pointer is in r3
+//
+
+  // restore integral registerrs
+  // skip r0 for now
+  // skip r1 for now
+  lwz     %r2,  16(%r3)
+  // skip r3 for now
+  // skip r4 for now
+  // skip r5 for now
+  lwz     %r6,  32(%r3)
+  lwz     %r7,  36(%r3)
+  lwz     %r8,  40(%r3)
+  lwz     %r9,  44(%r3)
+  lwz     %r10, 48(%r3)
+  lwz     %r11, 52(%r3)
+  lwz     %r12, 56(%r3)
+  lwz     %r13, 60(%r3)
+  lwz     %r14, 64(%r3)
+  lwz     %r15, 68(%r3)
+  lwz     %r16, 72(%r3)
+  lwz     %r17, 76(%r3)
+  lwz     %r18, 80(%r3)
+  lwz     %r19, 84(%r3)
+  lwz     %r20, 88(%r3)
+  lwz     %r21, 92(%r3)
+  lwz     %r22, 96(%r3)
+  lwz     %r23,100(%r3)
+  lwz     %r24,104(%r3)
+  lwz     %r25,108(%r3)
+  lwz     %r26,112(%r3)
+  lwz     %r27,116(%r3)
+  lwz     %r28,120(%r3)
+  lwz     %r29,124(%r3)
+  lwz     %r30,128(%r3)
+  lwz     %r31,132(%r3)
+
+  // restore float registers
+  lfd     %f0, 160(%r3)
+  lfd     %f1, 168(%r3)
+  lfd     %f2, 176(%r3)
+  lfd     %f3, 184(%r3)
+  lfd     %f4, 192(%r3)
+  lfd     %f5, 200(%r3)
+  lfd     %f6, 208(%r3)
+  lfd     %f7, 216(%r3)
+  lfd     %f8, 224(%r3)
+  lfd     %f9, 232(%r3)
+  lfd     %f10,240(%r3)
+  lfd     %f11,248(%r3)
+  lfd     %f12,256(%r3)
+  lfd     %f13,264(%r3)
+  lfd     %f14,272(%r3)
+  lfd     %f15,280(%r3)
+  lfd     %f16,288(%r3)
+  lfd     %f17,296(%r3)
+  lfd     %f18,304(%r3)
+  lfd     %f19,312(%r3)
+  lfd     %f20,320(%r3)
+  lfd     %f21,328(%r3)
+  lfd     %f22,336(%r3)
+  lfd     %f23,344(%r3)
+  lfd     %f24,352(%r3)
+  lfd     %f25,360(%r3)
+  lfd     %f26,368(%r3)
+  lfd     %f27,376(%r3)
+  lfd     %f28,384(%r3)
+  lfd     %f29,392(%r3)
+  lfd     %f30,400(%r3)
+  lfd     %f31,408(%r3)
+
+  // restore vector registers if any are in use
+  lwz     %r5, 156(%r3)       // test VRsave
+  cmpwi   %r5, 0
+  beq     Lnovec
+
+  subi    %r4, %r1, 16
+  rlwinm  %r4, %r4, 0, 0, 27  // mask low 4-bits
+  // r4 is now a 16-byte aligned pointer into the red zone
+  // the _vectorRegisters may not be 16-byte aligned so copy via red zone temp buffer
+ 
+
+#define LOAD_VECTOR_UNALIGNEDl(_index) \
+  andis.  %r0, %r5, (1<<(15-_index))  SEPARATOR \
+  beq     Ldone ## _index             SEPARATOR \
+  lwz     %r0, 424+_index*16(%r3)     SEPARATOR \
+  stw     %r0, 0(%r4)                 SEPARATOR \
+  lwz     %r0, 424+_index*16+4(%r3)   SEPARATOR \
+  stw     %r0, 4(%r4)                 SEPARATOR \
+  lwz     %r0, 424+_index*16+8(%r3)   SEPARATOR \
+  stw     %r0, 8(%r4)                 SEPARATOR \
+  lwz     %r0, 424+_index*16+12(%r3)  SEPARATOR \
+  stw     %r0, 12(%r4)                SEPARATOR \
+  lvx     %v ## _index, 0, %r4        SEPARATOR \
+  Ldone ## _index:
+
+#define LOAD_VECTOR_UNALIGNEDh(_index) \
+  andi.   %r0, %r5, (1<<(31-_index))  SEPARATOR \
+  beq     Ldone ## _index             SEPARATOR \
+  lwz     %r0, 424+_index*16(%r3)     SEPARATOR \
+  stw     %r0, 0(%r4)                 SEPARATOR \
+  lwz     %r0, 424+_index*16+4(%r3)   SEPARATOR \
+  stw     %r0, 4(%r4)                 SEPARATOR \
+  lwz     %r0, 424+_index*16+8(%r3)   SEPARATOR \
+  stw     %r0, 8(%r4)                 SEPARATOR \
+  lwz     %r0, 424+_index*16+12(%r3)  SEPARATOR \
+  stw     %r0, 12(%r4)                SEPARATOR \
+  lvx     %v ## _index, 0, %r4        SEPARATOR \
+  Ldone ## _index:
+
+
+  LOAD_VECTOR_UNALIGNEDl(0)
+  LOAD_VECTOR_UNALIGNEDl(1)
+  LOAD_VECTOR_UNALIGNEDl(2)
+  LOAD_VECTOR_UNALIGNEDl(3)
+  LOAD_VECTOR_UNALIGNEDl(4)
+  LOAD_VECTOR_UNALIGNEDl(5)
+  LOAD_VECTOR_UNALIGNEDl(6)
+  LOAD_VECTOR_UNALIGNEDl(7)
+  LOAD_VECTOR_UNALIGNEDl(8)
+  LOAD_VECTOR_UNALIGNEDl(9)
+  LOAD_VECTOR_UNALIGNEDl(10)
+  LOAD_VECTOR_UNALIGNEDl(11)
+  LOAD_VECTOR_UNALIGNEDl(12)
+  LOAD_VECTOR_UNALIGNEDl(13)
+  LOAD_VECTOR_UNALIGNEDl(14)
+  LOAD_VECTOR_UNALIGNEDl(15)
+  LOAD_VECTOR_UNALIGNEDh(16)
+  LOAD_VECTOR_UNALIGNEDh(17)
+  LOAD_VECTOR_UNALIGNEDh(18)
+  LOAD_VECTOR_UNALIGNEDh(19)
+  LOAD_VECTOR_UNALIGNEDh(20)
+  LOAD_VECTOR_UNALIGNEDh(21)
+  LOAD_VECTOR_UNALIGNEDh(22)
+  LOAD_VECTOR_UNALIGNEDh(23)
+  LOAD_VECTOR_UNALIGNEDh(24)
+  LOAD_VECTOR_UNALIGNEDh(25)
+  LOAD_VECTOR_UNALIGNEDh(26)
+  LOAD_VECTOR_UNALIGNEDh(27)
+  LOAD_VECTOR_UNALIGNEDh(28)
+  LOAD_VECTOR_UNALIGNEDh(29)
+  LOAD_VECTOR_UNALIGNEDh(30)
+  LOAD_VECTOR_UNALIGNEDh(31)
+
+Lnovec:
+  lwz     %r0, 136(%r3)   // __cr
+  mtcr    %r0
+  lwz     %r0, 148(%r3)   // __ctr
+  mtctr   %r0
+  lwz     %r0,   0(%r3)   // __ssr0
+  mtctr   %r0
+  lwz     %r0,   8(%r3)   // do r0 now
+  lwz     %r5,  28(%r3)   // do r5 now
+  lwz     %r4,  24(%r3)   // do r4 now
+  lwz     %r1,  12(%r3)   // do sp now
+  lwz     %r3,  20(%r3)   // do r3 last
+  bctr
+
+#elif defined(__arm64__) || defined(__aarch64__)
+
+//
+// void libunwind::Registers_arm64::jumpto()
+//
+// On entry:
+//  thread_state pointer is in x0
+//
+  .p2align 2
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_arm646jumptoEv)
+  // skip restore of x0,x1 for now
+  ldp    x2, x3,  [x0, #0x010]
+  ldp    x4, x5,  [x0, #0x020]
+  ldp    x6, x7,  [x0, #0x030]
+  ldp    x8, x9,  [x0, #0x040]
+  ldp    x10,x11, [x0, #0x050]
+  ldp    x12,x13, [x0, #0x060]
+  ldp    x14,x15, [x0, #0x070]
+  ldp    x16,x17, [x0, #0x080]
+  ldp    x18,x19, [x0, #0x090]
+  ldp    x20,x21, [x0, #0x0A0]
+  ldp    x22,x23, [x0, #0x0B0]
+  ldp    x24,x25, [x0, #0x0C0]
+  ldp    x26,x27, [x0, #0x0D0]
+  ldp    x28,x29, [x0, #0x0E0]
+  ldr    x30,     [x0, #0x100]  // restore pc into lr
+  ldr    x1,      [x0, #0x0F8]
+  mov    sp,x1                  // restore sp
+
+  ldp    d0, d1,  [x0, #0x110]
+  ldp    d2, d3,  [x0, #0x120]
+  ldp    d4, d5,  [x0, #0x130]
+  ldp    d6, d7,  [x0, #0x140]
+  ldp    d8, d9,  [x0, #0x150]
+  ldp    d10,d11, [x0, #0x160]
+  ldp    d12,d13, [x0, #0x170]
+  ldp    d14,d15, [x0, #0x180]
+  ldp    d16,d17, [x0, #0x190]
+  ldp    d18,d19, [x0, #0x1A0]
+  ldp    d20,d21, [x0, #0x1B0]
+  ldp    d22,d23, [x0, #0x1C0]
+  ldp    d24,d25, [x0, #0x1D0]
+  ldp    d26,d27, [x0, #0x1E0]
+  ldp    d28,d29, [x0, #0x1F0]
+  ldr    d30,     [x0, #0x200]
+  ldr    d31,     [x0, #0x208]
+
+  ldp    x0, x1,  [x0, #0x000]  // restore x0,x1
+  ret    x30                    // jump to pc
+
+#elif defined(__arm__) && !defined(__APPLE__)
+
+#if !defined(__ARM_ARCH_ISA_ARM)
+#if (__ARM_ARCH_ISA_THUMB == 2)
+  .syntax unified
+#endif
+  .thumb
+#endif
+
+@
+@ void libunwind::Registers_arm::restoreCoreAndJumpTo()
+@
+@ On entry:
+@  thread_state pointer is in r0
+@
+  .p2align 2
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm20restoreCoreAndJumpToEv)
+#if !defined(__ARM_ARCH_ISA_ARM) && __ARM_ARCH_ISA_THUMB == 1
+  @ r8-r11: ldm into r1-r4, then mov to r8-r11
+  adds r0, #0x20
+  ldm r0!, {r1-r4}
+  subs r0, #0x30
+  mov r8, r1
+  mov r9, r2
+  mov r10, r3
+  mov r11, r4
+  @ r12 does not need loading, it it the intra-procedure-call scratch register
+  ldr r2, [r0, #0x34]
+  ldr r3, [r0, #0x3c]
+  mov sp, r2
+  mov lr, r3         @ restore pc into lr
+  ldm r0, {r0-r7}
+#else
+  @ Use lr as base so that r0 can be restored.
+  mov lr, r0
+  @ 32bit thumb-2 restrictions for ldm:
+  @ . the sp (r13) cannot be in the list
+  @ . the pc (r15) and lr (r14) cannot both be in the list in an LDM instruction
+  ldm lr, {r0-r12}
+  ldr sp, [lr, #52]
+  ldr lr, [lr, #60]  @ restore pc into lr
+#endif
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::restoreVFPWithFLDMD(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
+#if defined(__ELF__)
+  .fpu vfpv3-d16
+#endif
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMDEPv)
+  @ VFP and iwMMX instructions are only available when compiling with the flags
+  @ that enable them. We do not want to do that in the library (because we do not
+  @ want the compiler to generate instructions that access those) but this is
+  @ only accessed if the personality routine needs these registers. Use of
+  @ these registers implies they are, actually, available on the target, so
+  @ it's ok to execute.
+  @ So, generate the instruction using the corresponding coprocessor mnemonic.
+  vldmia r0, {d0-d15}
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::restoreVFPWithFLDMX(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
+#if defined(__ELF__)
+  .fpu vfpv3-d16
+#endif
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMXEPv)
+  vldmia r0, {d0-d15} @ fldmiax is deprecated in ARMv7+ and now behaves like vldmia
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::restoreVFPv3(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
+#if defined(__ELF__)
+  .fpu vfpv3
+#endif
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm12restoreVFPv3EPv)
+  vldmia r0, {d16-d31}
+  JMP(lr)
+
+#if defined(__ARM_WMMX)
+
+@
+@ static void libunwind::Registers_arm::restoreiWMMX(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
+#if defined(__ELF__)
+  .arch armv5te
+#endif
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm12restoreiWMMXEPv)
+  ldcl p1, cr0, [r0], #8  @ wldrd wR0, [r0], #8
+  ldcl p1, cr1, [r0], #8  @ wldrd wR1, [r0], #8
+  ldcl p1, cr2, [r0], #8  @ wldrd wR2, [r0], #8
+  ldcl p1, cr3, [r0], #8  @ wldrd wR3, [r0], #8
+  ldcl p1, cr4, [r0], #8  @ wldrd wR4, [r0], #8
+  ldcl p1, cr5, [r0], #8  @ wldrd wR5, [r0], #8
+  ldcl p1, cr6, [r0], #8  @ wldrd wR6, [r0], #8
+  ldcl p1, cr7, [r0], #8  @ wldrd wR7, [r0], #8
+  ldcl p1, cr8, [r0], #8  @ wldrd wR8, [r0], #8
+  ldcl p1, cr9, [r0], #8  @ wldrd wR9, [r0], #8
+  ldcl p1, cr10, [r0], #8  @ wldrd wR10, [r0], #8
+  ldcl p1, cr11, [r0], #8  @ wldrd wR11, [r0], #8
+  ldcl p1, cr12, [r0], #8  @ wldrd wR12, [r0], #8
+  ldcl p1, cr13, [r0], #8  @ wldrd wR13, [r0], #8
+  ldcl p1, cr14, [r0], #8  @ wldrd wR14, [r0], #8
+  ldcl p1, cr15, [r0], #8  @ wldrd wR15, [r0], #8
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::restoreiWMMXControl(unw_uint32_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
+#if defined(__ELF__)
+  .arch armv5te
+#endif
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreiWMMXControlEPj)
+  ldc2 p1, cr8, [r0], #4  @ wldrw wCGR0, [r0], #4
+  ldc2 p1, cr9, [r0], #4  @ wldrw wCGR1, [r0], #4
+  ldc2 p1, cr10, [r0], #4  @ wldrw wCGR2, [r0], #4
+  ldc2 p1, cr11, [r0], #4  @ wldrw wCGR3, [r0], #4
+  JMP(lr)
+
+#endif
+
+#elif defined(__or1k__)
+
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind14Registers_or1k6jumptoEv)
+#
+# void libunwind::Registers_or1k::jumpto()
+#
+# On entry:
+#  thread_state pointer is in r3
+#
+
+  # restore integral registers
+  l.lwz     r0,  0(r3)
+  l.lwz     r1,  4(r3)
+  l.lwz     r2,  8(r3)
+  # skip r3 for now
+  l.lwz     r4, 16(r3)
+  l.lwz     r5, 20(r3)
+  l.lwz     r6, 24(r3)
+  l.lwz     r7, 28(r3)
+  l.lwz     r8, 32(r3)
+  # skip r9
+  l.lwz    r10, 40(r3)
+  l.lwz    r11, 44(r3)
+  l.lwz    r12, 48(r3)
+  l.lwz    r13, 52(r3)
+  l.lwz    r14, 56(r3)
+  l.lwz    r15, 60(r3)
+  l.lwz    r16, 64(r3)
+  l.lwz    r17, 68(r3)
+  l.lwz    r18, 72(r3)
+  l.lwz    r19, 76(r3)
+  l.lwz    r20, 80(r3)
+  l.lwz    r21, 84(r3)
+  l.lwz    r22, 88(r3)
+  l.lwz    r23, 92(r3)
+  l.lwz    r24, 96(r3)
+  l.lwz    r25,100(r3)
+  l.lwz    r26,104(r3)
+  l.lwz    r27,108(r3)
+  l.lwz    r28,112(r3)
+  l.lwz    r29,116(r3)
+  l.lwz    r30,120(r3)
+  l.lwz    r31,124(r3)
+
+  # at last, restore r3
+  l.lwz    r3,  12(r3)
+
+  # load new pc into ra
+  l.lwz    r9, 128(r3)
+  # jump to pc
+  l.jr     r9
+   l.nop
+
+#elif defined(__mips__) && defined(_ABIO32) && _MIPS_SIM == _ABIO32
+
+//
+// void libunwind::Registers_mips_o32::jumpto()
+//
+// On entry:
+//  thread state pointer is in a0 ($4)
+//
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind18Registers_mips_o326jumptoEv)
+  .set push
+  .set noat
+  .set noreorder
+  .set nomacro
+#ifdef __mips_hard_float
+#if __mips_fpr != 64
+  ldc1  $f0, (4 * 36 + 8 * 0)($4)
+  ldc1  $f2, (4 * 36 + 8 * 2)($4)
+  ldc1  $f4, (4 * 36 + 8 * 4)($4)
+  ldc1  $f6, (4 * 36 + 8 * 6)($4)
+  ldc1  $f8, (4 * 36 + 8 * 8)($4)
+  ldc1  $f10, (4 * 36 + 8 * 10)($4)
+  ldc1  $f12, (4 * 36 + 8 * 12)($4)
+  ldc1  $f14, (4 * 36 + 8 * 14)($4)
+  ldc1  $f16, (4 * 36 + 8 * 16)($4)
+  ldc1  $f18, (4 * 36 + 8 * 18)($4)
+  ldc1  $f20, (4 * 36 + 8 * 20)($4)
+  ldc1  $f22, (4 * 36 + 8 * 22)($4)
+  ldc1  $f24, (4 * 36 + 8 * 24)($4)
+  ldc1  $f26, (4 * 36 + 8 * 26)($4)
+  ldc1  $f28, (4 * 36 + 8 * 28)($4)
+  ldc1  $f30, (4 * 36 + 8 * 30)($4)
+#else
+  ldc1  $f0, (4 * 36 + 8 * 0)($4)
+  ldc1  $f1, (4 * 36 + 8 * 1)($4)
+  ldc1  $f2, (4 * 36 + 8 * 2)($4)
+  ldc1  $f3, (4 * 36 + 8 * 3)($4)
+  ldc1  $f4, (4 * 36 + 8 * 4)($4)
+  ldc1  $f5, (4 * 36 + 8 * 5)($4)
+  ldc1  $f6, (4 * 36 + 8 * 6)($4)
+  ldc1  $f7, (4 * 36 + 8 * 7)($4)
+  ldc1  $f8, (4 * 36 + 8 * 8)($4)
+  ldc1  $f9, (4 * 36 + 8 * 9)($4)
+  ldc1  $f10, (4 * 36 + 8 * 10)($4)
+  ldc1  $f11, (4 * 36 + 8 * 11)($4)
+  ldc1  $f12, (4 * 36 + 8 * 12)($4)
+  ldc1  $f13, (4 * 36 + 8 * 13)($4)
+  ldc1  $f14, (4 * 36 + 8 * 14)($4)
+  ldc1  $f15, (4 * 36 + 8 * 15)($4)
+  ldc1  $f16, (4 * 36 + 8 * 16)($4)
+  ldc1  $f17, (4 * 36 + 8 * 17)($4)
+  ldc1  $f18, (4 * 36 + 8 * 18)($4)
+  ldc1  $f19, (4 * 36 + 8 * 19)($4)
+  ldc1  $f20, (4 * 36 + 8 * 20)($4)
+  ldc1  $f21, (4 * 36 + 8 * 21)($4)
+  ldc1  $f22, (4 * 36 + 8 * 22)($4)
+  ldc1  $f23, (4 * 36 + 8 * 23)($4)
+  ldc1  $f24, (4 * 36 + 8 * 24)($4)
+  ldc1  $f25, (4 * 36 + 8 * 25)($4)
+  ldc1  $f26, (4 * 36 + 8 * 26)($4)
+  ldc1  $f27, (4 * 36 + 8 * 27)($4)
+  ldc1  $f28, (4 * 36 + 8 * 28)($4)
+  ldc1  $f29, (4 * 36 + 8 * 29)($4)
+  ldc1  $f30, (4 * 36 + 8 * 30)($4)
+  ldc1  $f31, (4 * 36 + 8 * 31)($4)
+#endif
+#endif
+  // restore hi and lo
+  lw    $8, (4 * 33)($4)
+  mthi  $8
+  lw    $8, (4 * 34)($4)
+  mtlo  $8
+  // r0 is zero
+  lw    $1, (4 * 1)($4)
+  lw    $2, (4 * 2)($4)
+  lw    $3, (4 * 3)($4)
+  // skip a0 for now
+  lw    $5, (4 * 5)($4)
+  lw    $6, (4 * 6)($4)
+  lw    $7, (4 * 7)($4)
+  lw    $8, (4 * 8)($4)
+  lw    $9, (4 * 9)($4)
+  lw    $10, (4 * 10)($4)
+  lw    $11, (4 * 11)($4)
+  lw    $12, (4 * 12)($4)
+  lw    $13, (4 * 13)($4)
+  lw    $14, (4 * 14)($4)
+  lw    $15, (4 * 15)($4)
+  lw    $16, (4 * 16)($4)
+  lw    $17, (4 * 17)($4)
+  lw    $18, (4 * 18)($4)
+  lw    $19, (4 * 19)($4)
+  lw    $20, (4 * 20)($4)
+  lw    $21, (4 * 21)($4)
+  lw    $22, (4 * 22)($4)
+  lw    $23, (4 * 23)($4)
+  lw    $24, (4 * 24)($4)
+  lw    $25, (4 * 25)($4)
+  lw    $26, (4 * 26)($4)
+  lw    $27, (4 * 27)($4)
+  lw    $28, (4 * 28)($4)
+  lw    $29, (4 * 29)($4)
+  lw    $30, (4 * 30)($4)
+  // load new pc into ra
+  lw    $31, (4 * 32)($4)
+  // jump to ra, load a0 in the delay slot
+  jr    $31
+  lw    $4, (4 * 4)($4)
+  .set pop
+
+#elif defined(__mips64)
+
+//
+// void libunwind::Registers_mips_newabi::jumpto()
+//
+// On entry:
+//  thread state pointer is in a0 ($4)
+//
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind21Registers_mips_newabi6jumptoEv)
+  .set push
+  .set noat
+  .set noreorder
+  .set nomacro
+#ifdef __mips_hard_float
+  ldc1  $f0, (8 * 35)($4)
+  ldc1  $f1, (8 * 36)($4)
+  ldc1  $f2, (8 * 37)($4)
+  ldc1  $f3, (8 * 38)($4)
+  ldc1  $f4, (8 * 39)($4)
+  ldc1  $f5, (8 * 40)($4)
+  ldc1  $f6, (8 * 41)($4)
+  ldc1  $f7, (8 * 42)($4)
+  ldc1  $f8, (8 * 43)($4)
+  ldc1  $f9, (8 * 44)($4)
+  ldc1  $f10, (8 * 45)($4)
+  ldc1  $f11, (8 * 46)($4)
+  ldc1  $f12, (8 * 47)($4)
+  ldc1  $f13, (8 * 48)($4)
+  ldc1  $f14, (8 * 49)($4)
+  ldc1  $f15, (8 * 50)($4)
+  ldc1  $f16, (8 * 51)($4)
+  ldc1  $f17, (8 * 52)($4)
+  ldc1  $f18, (8 * 53)($4)
+  ldc1  $f19, (8 * 54)($4)
+  ldc1  $f20, (8 * 55)($4)
+  ldc1  $f21, (8 * 56)($4)
+  ldc1  $f22, (8 * 57)($4)
+  ldc1  $f23, (8 * 58)($4)
+  ldc1  $f24, (8 * 59)($4)
+  ldc1  $f25, (8 * 60)($4)
+  ldc1  $f26, (8 * 61)($4)
+  ldc1  $f27, (8 * 62)($4)
+  ldc1  $f28, (8 * 63)($4)
+  ldc1  $f29, (8 * 64)($4)
+  ldc1  $f30, (8 * 65)($4)
+  ldc1  $f31, (8 * 66)($4)
+#endif
+  // restore hi and lo
+  ld    $8, (8 * 33)($4)
+  mthi  $8
+  ld    $8, (8 * 34)($4)
+  mtlo  $8
+  // r0 is zero
+  ld    $1, (8 * 1)($4)
+  ld    $2, (8 * 2)($4)
+  ld    $3, (8 * 3)($4)
+  // skip a0 for now
+  ld    $5, (8 * 5)($4)
+  ld    $6, (8 * 6)($4)
+  ld    $7, (8 * 7)($4)
+  ld    $8, (8 * 8)($4)
+  ld    $9, (8 * 9)($4)
+  ld    $10, (8 * 10)($4)
+  ld    $11, (8 * 11)($4)
+  ld    $12, (8 * 12)($4)
+  ld    $13, (8 * 13)($4)
+  ld    $14, (8 * 14)($4)
+  ld    $15, (8 * 15)($4)
+  ld    $16, (8 * 16)($4)
+  ld    $17, (8 * 17)($4)
+  ld    $18, (8 * 18)($4)
+  ld    $19, (8 * 19)($4)
+  ld    $20, (8 * 20)($4)
+  ld    $21, (8 * 21)($4)
+  ld    $22, (8 * 22)($4)
+  ld    $23, (8 * 23)($4)
+  ld    $24, (8 * 24)($4)
+  ld    $25, (8 * 25)($4)
+  ld    $26, (8 * 26)($4)
+  ld    $27, (8 * 27)($4)
+  ld    $28, (8 * 28)($4)
+  ld    $29, (8 * 29)($4)
+  ld    $30, (8 * 30)($4)
+  // load new pc into ra
+  ld    $31, (8 * 32)($4)
+  // jump to ra, load a0 in the delay slot
+  jr    $31
+  ld    $4, (8 * 4)($4)
+  .set pop
+
+#elif defined(__sparc__)
+
+//
+// void libunwind::Registers_sparc_o32::jumpto()
+//
+// On entry:
+//  thread_state pointer is in o0
+//
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_sparc6jumptoEv)
+  ta 3
+  ldd [%o0 + 64],  %l0
+  ldd [%o0 + 72],  %l2
+  ldd [%o0 + 80],  %l4
+  ldd [%o0 + 88],  %l6
+  ldd [%o0 + 96],  %i0
+  ldd [%o0 + 104], %i2
+  ldd [%o0 + 112], %i4
+  ldd [%o0 + 120], %i6
+  ld  [%o0 + 60],  %o7
+  jmp %o7
+   nop
+
+#endif
+
+#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/src/coreclr/src/nativeaot/libunwind/src/UnwindRegistersSave.S b/src/coreclr/src/nativeaot/libunwind/src/UnwindRegistersSave.S
new file mode 100644
index 0000000000000..54505e53bac70
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/UnwindRegistersSave.S
@@ -0,0 +1,983 @@
+//===------------------------ UnwindRegistersSave.S -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+    .text
+
+#if !defined(__USING_SJLJ_EXCEPTIONS__)
+
+#if defined(__i386__)
+
+#
+# extern int __unw_getcontext(unw_context_t* thread_state)
+#
+# On entry:
+#   +                       +
+#   +-----------------------+
+#   + thread_state pointer  +
+#   +-----------------------+
+#   + return address        +
+#   +-----------------------+   <-- SP
+#   +                       +
+#
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+  push  %eax
+  movl  8(%esp), %eax
+  movl  %ebx,  4(%eax)
+  movl  %ecx,  8(%eax)
+  movl  %edx, 12(%eax)
+  movl  %edi, 16(%eax)
+  movl  %esi, 20(%eax)
+  movl  %ebp, 24(%eax)
+  movl  %esp, %edx
+  addl  $8, %edx
+  movl  %edx, 28(%eax)  # store what sp was at call site as esp
+  # skip ss
+  # skip eflags
+  movl  4(%esp), %edx
+  movl  %edx, 40(%eax)  # store return address as eip
+  # skip cs
+  # skip ds
+  # skip es
+  # skip fs
+  # skip gs
+  movl  (%esp), %edx
+  movl  %edx, (%eax)  # store original eax
+  popl  %eax
+  xorl  %eax, %eax    # return UNW_ESUCCESS
+  ret
+
+#elif defined(__x86_64__)
+
+#
+# extern int __unw_getcontext(unw_context_t* thread_state)
+#
+# On entry:
+#  thread_state pointer is in rdi
+#
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+#if defined(_WIN64)
+#define PTR %rcx
+#define TMP %rdx
+#else
+#define PTR %rdi
+#define TMP %rsi
+#endif
+
+  movq  %rax,   (PTR)
+  movq  %rbx,  8(PTR)
+  movq  %rcx, 16(PTR)
+  movq  %rdx, 24(PTR)
+  movq  %rdi, 32(PTR)
+  movq  %rsi, 40(PTR)
+  movq  %rbp, 48(PTR)
+  movq  %rsp, 56(PTR)
+  addq  $8,   56(PTR)
+  movq  %r8,  64(PTR)
+  movq  %r9,  72(PTR)
+  movq  %r10, 80(PTR)
+  movq  %r11, 88(PTR)
+  movq  %r12, 96(PTR)
+  movq  %r13,104(PTR)
+  movq  %r14,112(PTR)
+  movq  %r15,120(PTR)
+  movq  (%rsp),TMP
+  movq  TMP,128(PTR) # store return address as rip
+  # skip rflags
+  # skip cs
+  # skip fs
+  # skip gs
+
+#if defined(_WIN64)
+  movdqu %xmm0,176(PTR)
+  movdqu %xmm1,192(PTR)
+  movdqu %xmm2,208(PTR)
+  movdqu %xmm3,224(PTR)
+  movdqu %xmm4,240(PTR)
+  movdqu %xmm5,256(PTR)
+  movdqu %xmm6,272(PTR)
+  movdqu %xmm7,288(PTR)
+  movdqu %xmm8,304(PTR)
+  movdqu %xmm9,320(PTR)
+  movdqu %xmm10,336(PTR)
+  movdqu %xmm11,352(PTR)
+  movdqu %xmm12,368(PTR)
+  movdqu %xmm13,384(PTR)
+  movdqu %xmm14,400(PTR)
+  movdqu %xmm15,416(PTR)
+#endif
+  xorl  %eax, %eax    # return UNW_ESUCCESS
+  ret
+
+#elif defined(__mips__) && defined(_ABIO32) && _MIPS_SIM == _ABIO32
+
+#
+# extern int __unw_getcontext(unw_context_t* thread_state)
+#
+# On entry:
+#  thread_state pointer is in a0 ($4)
+#
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+  .set push
+  .set noat
+  .set noreorder
+  .set nomacro
+  sw    $1, (4 * 1)($4)
+  sw    $2, (4 * 2)($4)
+  sw    $3, (4 * 3)($4)
+  sw    $4, (4 * 4)($4)
+  sw    $5, (4 * 5)($4)
+  sw    $6, (4 * 6)($4)
+  sw    $7, (4 * 7)($4)
+  sw    $8, (4 * 8)($4)
+  sw    $9, (4 * 9)($4)
+  sw    $10, (4 * 10)($4)
+  sw    $11, (4 * 11)($4)
+  sw    $12, (4 * 12)($4)
+  sw    $13, (4 * 13)($4)
+  sw    $14, (4 * 14)($4)
+  sw    $15, (4 * 15)($4)
+  sw    $16, (4 * 16)($4)
+  sw    $17, (4 * 17)($4)
+  sw    $18, (4 * 18)($4)
+  sw    $19, (4 * 19)($4)
+  sw    $20, (4 * 20)($4)
+  sw    $21, (4 * 21)($4)
+  sw    $22, (4 * 22)($4)
+  sw    $23, (4 * 23)($4)
+  sw    $24, (4 * 24)($4)
+  sw    $25, (4 * 25)($4)
+  sw    $26, (4 * 26)($4)
+  sw    $27, (4 * 27)($4)
+  sw    $28, (4 * 28)($4)
+  sw    $29, (4 * 29)($4)
+  sw    $30, (4 * 30)($4)
+  sw    $31, (4 * 31)($4)
+  # Store return address to pc
+  sw    $31, (4 * 32)($4)
+  # hi and lo
+  mfhi  $8
+  sw    $8,  (4 * 33)($4)
+  mflo  $8
+  sw    $8,  (4 * 34)($4)
+#ifdef __mips_hard_float
+#if __mips_fpr != 64
+  sdc1  $f0, (4 * 36 + 8 * 0)($4)
+  sdc1  $f2, (4 * 36 + 8 * 2)($4)
+  sdc1  $f4, (4 * 36 + 8 * 4)($4)
+  sdc1  $f6, (4 * 36 + 8 * 6)($4)
+  sdc1  $f8, (4 * 36 + 8 * 8)($4)
+  sdc1  $f10, (4 * 36 + 8 * 10)($4)
+  sdc1  $f12, (4 * 36 + 8 * 12)($4)
+  sdc1  $f14, (4 * 36 + 8 * 14)($4)
+  sdc1  $f16, (4 * 36 + 8 * 16)($4)
+  sdc1  $f18, (4 * 36 + 8 * 18)($4)
+  sdc1  $f20, (4 * 36 + 8 * 20)($4)
+  sdc1  $f22, (4 * 36 + 8 * 22)($4)
+  sdc1  $f24, (4 * 36 + 8 * 24)($4)
+  sdc1  $f26, (4 * 36 + 8 * 26)($4)
+  sdc1  $f28, (4 * 36 + 8 * 28)($4)
+  sdc1  $f30, (4 * 36 + 8 * 30)($4)
+#else
+  sdc1  $f0, (4 * 36 + 8 * 0)($4)
+  sdc1  $f1, (4 * 36 + 8 * 1)($4)
+  sdc1  $f2, (4 * 36 + 8 * 2)($4)
+  sdc1  $f3, (4 * 36 + 8 * 3)($4)
+  sdc1  $f4, (4 * 36 + 8 * 4)($4)
+  sdc1  $f5, (4 * 36 + 8 * 5)($4)
+  sdc1  $f6, (4 * 36 + 8 * 6)($4)
+  sdc1  $f7, (4 * 36 + 8 * 7)($4)
+  sdc1  $f8, (4 * 36 + 8 * 8)($4)
+  sdc1  $f9, (4 * 36 + 8 * 9)($4)
+  sdc1  $f10, (4 * 36 + 8 * 10)($4)
+  sdc1  $f11, (4 * 36 + 8 * 11)($4)
+  sdc1  $f12, (4 * 36 + 8 * 12)($4)
+  sdc1  $f13, (4 * 36 + 8 * 13)($4)
+  sdc1  $f14, (4 * 36 + 8 * 14)($4)
+  sdc1  $f15, (4 * 36 + 8 * 15)($4)
+  sdc1  $f16, (4 * 36 + 8 * 16)($4)
+  sdc1  $f17, (4 * 36 + 8 * 17)($4)
+  sdc1  $f18, (4 * 36 + 8 * 18)($4)
+  sdc1  $f19, (4 * 36 + 8 * 19)($4)
+  sdc1  $f20, (4 * 36 + 8 * 20)($4)
+  sdc1  $f21, (4 * 36 + 8 * 21)($4)
+  sdc1  $f22, (4 * 36 + 8 * 22)($4)
+  sdc1  $f23, (4 * 36 + 8 * 23)($4)
+  sdc1  $f24, (4 * 36 + 8 * 24)($4)
+  sdc1  $f25, (4 * 36 + 8 * 25)($4)
+  sdc1  $f26, (4 * 36 + 8 * 26)($4)
+  sdc1  $f27, (4 * 36 + 8 * 27)($4)
+  sdc1  $f28, (4 * 36 + 8 * 28)($4)
+  sdc1  $f29, (4 * 36 + 8 * 29)($4)
+  sdc1  $f30, (4 * 36 + 8 * 30)($4)
+  sdc1  $f31, (4 * 36 + 8 * 31)($4)
+#endif
+#endif
+  jr	$31
+  # return UNW_ESUCCESS
+  or    $2, $0, $0
+  .set pop
+
+#elif defined(__mips64)
+
+#
+# extern int __unw_getcontext(unw_context_t* thread_state)
+#
+# On entry:
+#  thread_state pointer is in a0 ($4)
+#
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+  .set push
+  .set noat
+  .set noreorder
+  .set nomacro
+  sd    $1, (8 * 1)($4)
+  sd    $2, (8 * 2)($4)
+  sd    $3, (8 * 3)($4)
+  sd    $4, (8 * 4)($4)
+  sd    $5, (8 * 5)($4)
+  sd    $6, (8 * 6)($4)
+  sd    $7, (8 * 7)($4)
+  sd    $8, (8 * 8)($4)
+  sd    $9, (8 * 9)($4)
+  sd    $10, (8 * 10)($4)
+  sd    $11, (8 * 11)($4)
+  sd    $12, (8 * 12)($4)
+  sd    $13, (8 * 13)($4)
+  sd    $14, (8 * 14)($4)
+  sd    $15, (8 * 15)($4)
+  sd    $16, (8 * 16)($4)
+  sd    $17, (8 * 17)($4)
+  sd    $18, (8 * 18)($4)
+  sd    $19, (8 * 19)($4)
+  sd    $20, (8 * 20)($4)
+  sd    $21, (8 * 21)($4)
+  sd    $22, (8 * 22)($4)
+  sd    $23, (8 * 23)($4)
+  sd    $24, (8 * 24)($4)
+  sd    $25, (8 * 25)($4)
+  sd    $26, (8 * 26)($4)
+  sd    $27, (8 * 27)($4)
+  sd    $28, (8 * 28)($4)
+  sd    $29, (8 * 29)($4)
+  sd    $30, (8 * 30)($4)
+  sd    $31, (8 * 31)($4)
+  # Store return address to pc
+  sd    $31, (8 * 32)($4)
+  # hi and lo
+  mfhi  $8
+  sd    $8,  (8 * 33)($4)
+  mflo  $8
+  sd    $8,  (8 * 34)($4)
+#ifdef __mips_hard_float
+  sdc1  $f0, (8 * 35)($4)
+  sdc1  $f1, (8 * 36)($4)
+  sdc1  $f2, (8 * 37)($4)
+  sdc1  $f3, (8 * 38)($4)
+  sdc1  $f4, (8 * 39)($4)
+  sdc1  $f5, (8 * 40)($4)
+  sdc1  $f6, (8 * 41)($4)
+  sdc1  $f7, (8 * 42)($4)
+  sdc1  $f8, (8 * 43)($4)
+  sdc1  $f9, (8 * 44)($4)
+  sdc1  $f10, (8 * 45)($4)
+  sdc1  $f11, (8 * 46)($4)
+  sdc1  $f12, (8 * 47)($4)
+  sdc1  $f13, (8 * 48)($4)
+  sdc1  $f14, (8 * 49)($4)
+  sdc1  $f15, (8 * 50)($4)
+  sdc1  $f16, (8 * 51)($4)
+  sdc1  $f17, (8 * 52)($4)
+  sdc1  $f18, (8 * 53)($4)
+  sdc1  $f19, (8 * 54)($4)
+  sdc1  $f20, (8 * 55)($4)
+  sdc1  $f21, (8 * 56)($4)
+  sdc1  $f22, (8 * 57)($4)
+  sdc1  $f23, (8 * 58)($4)
+  sdc1  $f24, (8 * 59)($4)
+  sdc1  $f25, (8 * 60)($4)
+  sdc1  $f26, (8 * 61)($4)
+  sdc1  $f27, (8 * 62)($4)
+  sdc1  $f28, (8 * 63)($4)
+  sdc1  $f29, (8 * 64)($4)
+  sdc1  $f30, (8 * 65)($4)
+  sdc1  $f31, (8 * 66)($4)
+#endif
+  jr	$31
+  # return UNW_ESUCCESS
+  or    $2, $0, $0
+  .set pop
+
+# elif defined(__mips__)
+
+#
+# extern int __unw_getcontext(unw_context_t* thread_state)
+#
+# Just trap for the time being.
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+  teq $0, $0
+
+#elif defined(__powerpc64__)
+
+//
+// extern int __unw_getcontext(unw_context_t* thread_state)
+//
+// On entry:
+//  thread_state pointer is in r3
+//
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+
+// store register (GPR)
+#define PPC64_STR(n) \
+  std   %r##n, (8 * (n + 2))(%r3)
+
+  // save GPRs
+  PPC64_STR(0)
+  mflr  %r0
+  std   %r0, PPC64_OFFS_SRR0(%r3) // store lr as ssr0
+  PPC64_STR(1)
+  PPC64_STR(2)
+  PPC64_STR(3)
+  PPC64_STR(4)
+  PPC64_STR(5)
+  PPC64_STR(6)
+  PPC64_STR(7)
+  PPC64_STR(8)
+  PPC64_STR(9)
+  PPC64_STR(10)
+  PPC64_STR(11)
+  PPC64_STR(12)
+  PPC64_STR(13)
+  PPC64_STR(14)
+  PPC64_STR(15)
+  PPC64_STR(16)
+  PPC64_STR(17)
+  PPC64_STR(18)
+  PPC64_STR(19)
+  PPC64_STR(20)
+  PPC64_STR(21)
+  PPC64_STR(22)
+  PPC64_STR(23)
+  PPC64_STR(24)
+  PPC64_STR(25)
+  PPC64_STR(26)
+  PPC64_STR(27)
+  PPC64_STR(28)
+  PPC64_STR(29)
+  PPC64_STR(30)
+  PPC64_STR(31)
+
+  mfcr  %r0
+  std   %r0,  PPC64_OFFS_CR(%r3)
+  mfxer %r0
+  std   %r0,  PPC64_OFFS_XER(%r3)
+  mflr  %r0
+  std   %r0,  PPC64_OFFS_LR(%r3)
+  mfctr %r0
+  std   %r0,  PPC64_OFFS_CTR(%r3)
+  mfvrsave    %r0
+  std   %r0,  PPC64_OFFS_VRSAVE(%r3)
+
+#ifdef PPC64_HAS_VMX
+  // save VS registers
+  // (note that this also saves floating point registers and V registers,
+  // because part of VS is mapped to these registers)
+
+  addi  %r4, %r3, PPC64_OFFS_FP
+
+// store VS register
+#define PPC64_STVS(n)      \
+  stxvd2x %vs##n, 0, %r4  ;\
+  addi    %r4, %r4, 16
+
+  PPC64_STVS(0)
+  PPC64_STVS(1)
+  PPC64_STVS(2)
+  PPC64_STVS(3)
+  PPC64_STVS(4)
+  PPC64_STVS(5)
+  PPC64_STVS(6)
+  PPC64_STVS(7)
+  PPC64_STVS(8)
+  PPC64_STVS(9)
+  PPC64_STVS(10)
+  PPC64_STVS(11)
+  PPC64_STVS(12)
+  PPC64_STVS(13)
+  PPC64_STVS(14)
+  PPC64_STVS(15)
+  PPC64_STVS(16)
+  PPC64_STVS(17)
+  PPC64_STVS(18)
+  PPC64_STVS(19)
+  PPC64_STVS(20)
+  PPC64_STVS(21)
+  PPC64_STVS(22)
+  PPC64_STVS(23)
+  PPC64_STVS(24)
+  PPC64_STVS(25)
+  PPC64_STVS(26)
+  PPC64_STVS(27)
+  PPC64_STVS(28)
+  PPC64_STVS(29)
+  PPC64_STVS(30)
+  PPC64_STVS(31)
+  PPC64_STVS(32)
+  PPC64_STVS(33)
+  PPC64_STVS(34)
+  PPC64_STVS(35)
+  PPC64_STVS(36)
+  PPC64_STVS(37)
+  PPC64_STVS(38)
+  PPC64_STVS(39)
+  PPC64_STVS(40)
+  PPC64_STVS(41)
+  PPC64_STVS(42)
+  PPC64_STVS(43)
+  PPC64_STVS(44)
+  PPC64_STVS(45)
+  PPC64_STVS(46)
+  PPC64_STVS(47)
+  PPC64_STVS(48)
+  PPC64_STVS(49)
+  PPC64_STVS(50)
+  PPC64_STVS(51)
+  PPC64_STVS(52)
+  PPC64_STVS(53)
+  PPC64_STVS(54)
+  PPC64_STVS(55)
+  PPC64_STVS(56)
+  PPC64_STVS(57)
+  PPC64_STVS(58)
+  PPC64_STVS(59)
+  PPC64_STVS(60)
+  PPC64_STVS(61)
+  PPC64_STVS(62)
+  PPC64_STVS(63)
+
+#else
+
+// store FP register
+#define PPC64_STF(n) \
+  stfd  %f##n, (PPC64_OFFS_FP + n * 16)(%r3)
+
+  // save float registers
+  PPC64_STF(0)
+  PPC64_STF(1)
+  PPC64_STF(2)
+  PPC64_STF(3)
+  PPC64_STF(4)
+  PPC64_STF(5)
+  PPC64_STF(6)
+  PPC64_STF(7)
+  PPC64_STF(8)
+  PPC64_STF(9)
+  PPC64_STF(10)
+  PPC64_STF(11)
+  PPC64_STF(12)
+  PPC64_STF(13)
+  PPC64_STF(14)
+  PPC64_STF(15)
+  PPC64_STF(16)
+  PPC64_STF(17)
+  PPC64_STF(18)
+  PPC64_STF(19)
+  PPC64_STF(20)
+  PPC64_STF(21)
+  PPC64_STF(22)
+  PPC64_STF(23)
+  PPC64_STF(24)
+  PPC64_STF(25)
+  PPC64_STF(26)
+  PPC64_STF(27)
+  PPC64_STF(28)
+  PPC64_STF(29)
+  PPC64_STF(30)
+  PPC64_STF(31)
+
+  // save vector registers
+
+  // Use 16-bytes below the stack pointer as an
+  // aligned buffer to save each vector register.
+  // Note that the stack pointer is always 16-byte aligned.
+  subi  %r4, %r1, 16
+
+#define PPC64_STV_UNALIGNED(n)                 \
+  stvx  %v##n, 0, %r4                         ;\
+  ld    %r5, 0(%r4)                           ;\
+  std   %r5, (PPC64_OFFS_V + n * 16)(%r3)     ;\
+  ld    %r5, 8(%r4)                           ;\
+  std   %r5, (PPC64_OFFS_V + n * 16 + 8)(%r3)
+
+  PPC64_STV_UNALIGNED(0)
+  PPC64_STV_UNALIGNED(1)
+  PPC64_STV_UNALIGNED(2)
+  PPC64_STV_UNALIGNED(3)
+  PPC64_STV_UNALIGNED(4)
+  PPC64_STV_UNALIGNED(5)
+  PPC64_STV_UNALIGNED(6)
+  PPC64_STV_UNALIGNED(7)
+  PPC64_STV_UNALIGNED(8)
+  PPC64_STV_UNALIGNED(9)
+  PPC64_STV_UNALIGNED(10)
+  PPC64_STV_UNALIGNED(11)
+  PPC64_STV_UNALIGNED(12)
+  PPC64_STV_UNALIGNED(13)
+  PPC64_STV_UNALIGNED(14)
+  PPC64_STV_UNALIGNED(15)
+  PPC64_STV_UNALIGNED(16)
+  PPC64_STV_UNALIGNED(17)
+  PPC64_STV_UNALIGNED(18)
+  PPC64_STV_UNALIGNED(19)
+  PPC64_STV_UNALIGNED(20)
+  PPC64_STV_UNALIGNED(21)
+  PPC64_STV_UNALIGNED(22)
+  PPC64_STV_UNALIGNED(23)
+  PPC64_STV_UNALIGNED(24)
+  PPC64_STV_UNALIGNED(25)
+  PPC64_STV_UNALIGNED(26)
+  PPC64_STV_UNALIGNED(27)
+  PPC64_STV_UNALIGNED(28)
+  PPC64_STV_UNALIGNED(29)
+  PPC64_STV_UNALIGNED(30)
+  PPC64_STV_UNALIGNED(31)
+
+#endif
+
+  li    %r3,  0   // return UNW_ESUCCESS
+  blr
+
+
+#elif defined(__ppc__)
+
+//
+// extern int unw_getcontext(unw_context_t* thread_state)
+//
+// On entry:
+//  thread_state pointer is in r3
+//
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+  stw     %r0,   8(%r3)
+  mflr    %r0
+  stw     %r0,   0(%r3) // store lr as ssr0
+  stw     %r1,  12(%r3)
+  stw     %r2,  16(%r3)
+  stw     %r3,  20(%r3)
+  stw     %r4,  24(%r3)
+  stw     %r5,  28(%r3)
+  stw     %r6,  32(%r3)
+  stw     %r7,  36(%r3)
+  stw     %r8,  40(%r3)
+  stw     %r9,  44(%r3)
+  stw     %r10, 48(%r3)
+  stw     %r11, 52(%r3)
+  stw     %r12, 56(%r3)
+  stw     %r13, 60(%r3)
+  stw     %r14, 64(%r3)
+  stw     %r15, 68(%r3)
+  stw     %r16, 72(%r3)
+  stw     %r17, 76(%r3)
+  stw     %r18, 80(%r3)
+  stw     %r19, 84(%r3)
+  stw     %r20, 88(%r3)
+  stw     %r21, 92(%r3)
+  stw     %r22, 96(%r3)
+  stw     %r23,100(%r3)
+  stw     %r24,104(%r3)
+  stw     %r25,108(%r3)
+  stw     %r26,112(%r3)
+  stw     %r27,116(%r3)
+  stw     %r28,120(%r3)
+  stw     %r29,124(%r3)
+  stw     %r30,128(%r3)
+  stw     %r31,132(%r3)
+
+  // save VRSave register
+  mfspr   %r0, 256
+  stw     %r0, 156(%r3)
+  // save CR registers
+  mfcr    %r0
+  stw     %r0, 136(%r3)
+  // save CTR register
+  mfctr   %r0
+  stw     %r0, 148(%r3)
+
+  // save float registers
+  stfd    %f0, 160(%r3)
+  stfd    %f1, 168(%r3)
+  stfd    %f2, 176(%r3)
+  stfd    %f3, 184(%r3)
+  stfd    %f4, 192(%r3)
+  stfd    %f5, 200(%r3)
+  stfd    %f6, 208(%r3)
+  stfd    %f7, 216(%r3)
+  stfd    %f8, 224(%r3)
+  stfd    %f9, 232(%r3)
+  stfd    %f10,240(%r3)
+  stfd    %f11,248(%r3)
+  stfd    %f12,256(%r3)
+  stfd    %f13,264(%r3)
+  stfd    %f14,272(%r3)
+  stfd    %f15,280(%r3)
+  stfd    %f16,288(%r3)
+  stfd    %f17,296(%r3)
+  stfd    %f18,304(%r3)
+  stfd    %f19,312(%r3)
+  stfd    %f20,320(%r3)
+  stfd    %f21,328(%r3)
+  stfd    %f22,336(%r3)
+  stfd    %f23,344(%r3)
+  stfd    %f24,352(%r3)
+  stfd    %f25,360(%r3)
+  stfd    %f26,368(%r3)
+  stfd    %f27,376(%r3)
+  stfd    %f28,384(%r3)
+  stfd    %f29,392(%r3)
+  stfd    %f30,400(%r3)
+  stfd    %f31,408(%r3)
+
+
+  // save vector registers
+
+  subi    %r4, %r1, 16
+  rlwinm  %r4, %r4, 0, 0, 27  // mask low 4-bits
+  // r4 is now a 16-byte aligned pointer into the red zone
+
+#define SAVE_VECTOR_UNALIGNED(_vec, _offset) \
+  stvx    _vec, 0, %r4          SEPARATOR \
+  lwz     %r5, 0(%r4)           SEPARATOR \
+  stw     %r5, _offset(%r3)     SEPARATOR \
+  lwz     %r5, 4(%r4)           SEPARATOR \
+  stw     %r5, _offset+4(%r3)   SEPARATOR \
+  lwz     %r5, 8(%r4)           SEPARATOR \
+  stw     %r5, _offset+8(%r3)   SEPARATOR \
+  lwz     %r5, 12(%r4)          SEPARATOR \
+  stw     %r5, _offset+12(%r3)
+
+  SAVE_VECTOR_UNALIGNED( %v0, 424+0x000)
+  SAVE_VECTOR_UNALIGNED( %v1, 424+0x010)
+  SAVE_VECTOR_UNALIGNED( %v2, 424+0x020)
+  SAVE_VECTOR_UNALIGNED( %v3, 424+0x030)
+  SAVE_VECTOR_UNALIGNED( %v4, 424+0x040)
+  SAVE_VECTOR_UNALIGNED( %v5, 424+0x050)
+  SAVE_VECTOR_UNALIGNED( %v6, 424+0x060)
+  SAVE_VECTOR_UNALIGNED( %v7, 424+0x070)
+  SAVE_VECTOR_UNALIGNED( %v8, 424+0x080)
+  SAVE_VECTOR_UNALIGNED( %v9, 424+0x090)
+  SAVE_VECTOR_UNALIGNED(%v10, 424+0x0A0)
+  SAVE_VECTOR_UNALIGNED(%v11, 424+0x0B0)
+  SAVE_VECTOR_UNALIGNED(%v12, 424+0x0C0)
+  SAVE_VECTOR_UNALIGNED(%v13, 424+0x0D0)
+  SAVE_VECTOR_UNALIGNED(%v14, 424+0x0E0)
+  SAVE_VECTOR_UNALIGNED(%v15, 424+0x0F0)
+  SAVE_VECTOR_UNALIGNED(%v16, 424+0x100)
+  SAVE_VECTOR_UNALIGNED(%v17, 424+0x110)
+  SAVE_VECTOR_UNALIGNED(%v18, 424+0x120)
+  SAVE_VECTOR_UNALIGNED(%v19, 424+0x130)
+  SAVE_VECTOR_UNALIGNED(%v20, 424+0x140)
+  SAVE_VECTOR_UNALIGNED(%v21, 424+0x150)
+  SAVE_VECTOR_UNALIGNED(%v22, 424+0x160)
+  SAVE_VECTOR_UNALIGNED(%v23, 424+0x170)
+  SAVE_VECTOR_UNALIGNED(%v24, 424+0x180)
+  SAVE_VECTOR_UNALIGNED(%v25, 424+0x190)
+  SAVE_VECTOR_UNALIGNED(%v26, 424+0x1A0)
+  SAVE_VECTOR_UNALIGNED(%v27, 424+0x1B0)
+  SAVE_VECTOR_UNALIGNED(%v28, 424+0x1C0)
+  SAVE_VECTOR_UNALIGNED(%v29, 424+0x1D0)
+  SAVE_VECTOR_UNALIGNED(%v30, 424+0x1E0)
+  SAVE_VECTOR_UNALIGNED(%v31, 424+0x1F0)
+
+  li      %r3, 0  // return UNW_ESUCCESS
+  blr
+
+
+#elif defined(__arm64__) || defined(__aarch64__)
+
+//
+// extern int __unw_getcontext(unw_context_t* thread_state)
+//
+// On entry:
+//  thread_state pointer is in x0
+//
+  .p2align 2
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+  stp    x0, x1,  [x0, #0x000]
+  stp    x2, x3,  [x0, #0x010]
+  stp    x4, x5,  [x0, #0x020]
+  stp    x6, x7,  [x0, #0x030]
+  stp    x8, x9,  [x0, #0x040]
+  stp    x10,x11, [x0, #0x050]
+  stp    x12,x13, [x0, #0x060]
+  stp    x14,x15, [x0, #0x070]
+  stp    x16,x17, [x0, #0x080]
+  stp    x18,x19, [x0, #0x090]
+  stp    x20,x21, [x0, #0x0A0]
+  stp    x22,x23, [x0, #0x0B0]
+  stp    x24,x25, [x0, #0x0C0]
+  stp    x26,x27, [x0, #0x0D0]
+  stp    x28,x29, [x0, #0x0E0]
+  str    x30,     [x0, #0x0F0]
+  mov    x1,sp
+  str    x1,      [x0, #0x0F8]
+  str    x30,     [x0, #0x100]    // store return address as pc
+  // skip cpsr
+  stp    d0, d1,  [x0, #0x110]
+  stp    d2, d3,  [x0, #0x120]
+  stp    d4, d5,  [x0, #0x130]
+  stp    d6, d7,  [x0, #0x140]
+  stp    d8, d9,  [x0, #0x150]
+  stp    d10,d11, [x0, #0x160]
+  stp    d12,d13, [x0, #0x170]
+  stp    d14,d15, [x0, #0x180]
+  stp    d16,d17, [x0, #0x190]
+  stp    d18,d19, [x0, #0x1A0]
+  stp    d20,d21, [x0, #0x1B0]
+  stp    d22,d23, [x0, #0x1C0]
+  stp    d24,d25, [x0, #0x1D0]
+  stp    d26,d27, [x0, #0x1E0]
+  stp    d28,d29, [x0, #0x1F0]
+  str    d30,     [x0, #0x200]
+  str    d31,     [x0, #0x208]
+  mov    x0, #0                   // return UNW_ESUCCESS
+  ret
+
+#elif defined(__arm__) && !defined(__APPLE__)
+
+#if !defined(__ARM_ARCH_ISA_ARM)
+#if (__ARM_ARCH_ISA_THUMB == 2)
+  .syntax unified
+#endif
+  .thumb
+#endif
+
+@
+@ extern int __unw_getcontext(unw_context_t* thread_state)
+@
+@ On entry:
+@  thread_state pointer is in r0
+@ 
+@ Per EHABI #4.7 this only saves the core integer registers.
+@ EHABI #7.4.5 notes that in general all VRS registers should be restored
+@ however this is very hard to do for VFP registers because it is unknown
+@ to the library how many registers are implemented by the architecture.
+@ Instead, VFP registers are demand saved by logic external to __unw_getcontext.
+@
+  .p2align 2
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+#if !defined(__ARM_ARCH_ISA_ARM) && __ARM_ARCH_ISA_THUMB == 1
+  stm r0!, {r0-r7}
+  mov r1, r8
+  mov r2, r9
+  mov r3, r10
+  stm r0!, {r1-r3}
+  mov r1, r11
+  mov r2, sp
+  mov r3, lr
+  str r1, [r0, #0]   @ r11
+  @ r12 does not need storing, it it the intra-procedure-call scratch register
+  str r2, [r0, #8]   @ sp
+  str r3, [r0, #12]  @ lr
+  str r3, [r0, #16]  @ store return address as pc
+  @ T1 does not have a non-cpsr-clobbering register-zeroing instruction.
+  @ It is safe to use here though because we are about to return, and cpsr is
+  @ not expected to be preserved.
+  movs r0, #0        @ return UNW_ESUCCESS
+#else
+  @ 32bit thumb-2 restrictions for stm:
+  @ . the sp (r13) cannot be in the list
+  @ . the pc (r15) cannot be in the list in an STM instruction
+  stm r0, {r0-r12}
+  str sp, [r0, #52]
+  str lr, [r0, #56]
+  str lr, [r0, #60]  @ store return address as pc
+  mov r0, #0         @ return UNW_ESUCCESS
+#endif
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::saveVFPWithFSTMD(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
+#if defined(__ELF__)
+  .fpu vfpv3-d16
+#endif
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMDEPv)
+  vstmia r0, {d0-d15}
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::saveVFPWithFSTMX(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
+#if defined(__ELF__)
+  .fpu vfpv3-d16
+#endif
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMXEPv)
+  vstmia r0, {d0-d15} @ fstmiax is deprecated in ARMv7+ and now behaves like vstmia
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::saveVFPv3(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
+#if defined(__ELF__)
+  .fpu vfpv3
+#endif
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm9saveVFPv3EPv)
+  @ VFP and iwMMX instructions are only available when compiling with the flags
+  @ that enable them. We do not want to do that in the library (because we do not
+  @ want the compiler to generate instructions that access those) but this is
+  @ only accessed if the personality routine needs these registers. Use of
+  @ these registers implies they are, actually, available on the target, so
+  @ it's ok to execute.
+  @ So, generate the instructions using the corresponding coprocessor mnemonic.
+  vstmia r0, {d16-d31}
+  JMP(lr)
+
+#if defined(_LIBUNWIND_ARM_WMMX)
+
+@
+@ static void libunwind::Registers_arm::saveiWMMX(unw_fpreg_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
+#if defined(__ELF__)
+  .arch armv5te
+#endif
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm9saveiWMMXEPv)
+  stcl p1, cr0, [r0], #8  @ wstrd wR0, [r0], #8
+  stcl p1, cr1, [r0], #8  @ wstrd wR1, [r0], #8
+  stcl p1, cr2, [r0], #8  @ wstrd wR2, [r0], #8
+  stcl p1, cr3, [r0], #8  @ wstrd wR3, [r0], #8
+  stcl p1, cr4, [r0], #8  @ wstrd wR4, [r0], #8
+  stcl p1, cr5, [r0], #8  @ wstrd wR5, [r0], #8
+  stcl p1, cr6, [r0], #8  @ wstrd wR6, [r0], #8
+  stcl p1, cr7, [r0], #8  @ wstrd wR7, [r0], #8
+  stcl p1, cr8, [r0], #8  @ wstrd wR8, [r0], #8
+  stcl p1, cr9, [r0], #8  @ wstrd wR9, [r0], #8
+  stcl p1, cr10, [r0], #8  @ wstrd wR10, [r0], #8
+  stcl p1, cr11, [r0], #8  @ wstrd wR11, [r0], #8
+  stcl p1, cr12, [r0], #8  @ wstrd wR12, [r0], #8
+  stcl p1, cr13, [r0], #8  @ wstrd wR13, [r0], #8
+  stcl p1, cr14, [r0], #8  @ wstrd wR14, [r0], #8
+  stcl p1, cr15, [r0], #8  @ wstrd wR15, [r0], #8
+  JMP(lr)
+
+@
+@ static void libunwind::Registers_arm::saveiWMMXControl(unw_uint32_t* values)
+@
+@ On entry:
+@  values pointer is in r0
+@
+  .p2align 2
+#if defined(__ELF__)
+  .arch armv5te
+#endif
+DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveiWMMXControlEPj)
+  stc2 p1, cr8, [r0], #4  @ wstrw wCGR0, [r0], #4
+  stc2 p1, cr9, [r0], #4  @ wstrw wCGR1, [r0], #4
+  stc2 p1, cr10, [r0], #4  @ wstrw wCGR2, [r0], #4
+  stc2 p1, cr11, [r0], #4  @ wstrw wCGR3, [r0], #4
+  JMP(lr)
+
+#endif
+
+#elif defined(__or1k__)
+
+#
+# extern int __unw_getcontext(unw_context_t* thread_state)
+#
+# On entry:
+#  thread_state pointer is in r3
+#
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+  l.sw       0(r3), r0
+  l.sw       4(r3), r1
+  l.sw       8(r3), r2
+  l.sw      12(r3), r3
+  l.sw      16(r3), r4
+  l.sw      20(r3), r5
+  l.sw      24(r3), r6
+  l.sw      28(r3), r7
+  l.sw      32(r3), r8
+  l.sw      36(r3), r9
+  l.sw      40(r3), r10
+  l.sw      44(r3), r11
+  l.sw      48(r3), r12
+  l.sw      52(r3), r13
+  l.sw      56(r3), r14
+  l.sw      60(r3), r15
+  l.sw      64(r3), r16
+  l.sw      68(r3), r17
+  l.sw      72(r3), r18
+  l.sw      76(r3), r19
+  l.sw      80(r3), r20
+  l.sw      84(r3), r21
+  l.sw      88(r3), r22
+  l.sw      92(r3), r23
+  l.sw      96(r3), r24
+  l.sw     100(r3), r25
+  l.sw     104(r3), r26
+  l.sw     108(r3), r27
+  l.sw     112(r3), r28
+  l.sw     116(r3), r29
+  l.sw     120(r3), r30
+  l.sw     124(r3), r31
+  # store ra to pc
+  l.sw     128(r3), r9
+  # zero epcr
+  l.sw     132(r3), r0
+
+#elif defined(__sparc__)
+
+#
+# extern int __unw_getcontext(unw_context_t* thread_state)
+#
+# On entry:
+#  thread_state pointer is in o0
+#
+DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+  ta 3
+  add %o7, 8, %o7
+  std %g0, [%o0 +   0]
+  std %g2, [%o0 +   8]
+  std %g4, [%o0 +  16]
+  std %g6, [%o0 +  24]
+  std %o0, [%o0 +  32]
+  std %o2, [%o0 +  40]
+  std %o4, [%o0 +  48]
+  std %o6, [%o0 +  56]
+  std %l0, [%o0 +  64]
+  std %l2, [%o0 +  72]
+  std %l4, [%o0 +  80]
+  std %l6, [%o0 +  88]
+  std %i0, [%o0 +  96]
+  std %i2, [%o0 + 104]
+  std %i4, [%o0 + 112]
+  std %i6, [%o0 + 120]
+  jmp %o7
+   clr %o0                   // return UNW_ESUCCESS
+#endif
+
+  WEAK_ALIAS(__unw_getcontext, unw_getcontext)
+
+#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/src/coreclr/src/nativeaot/libunwind/src/Unwind_AppleExtras.cpp b/src/coreclr/src/nativeaot/libunwind/src/Unwind_AppleExtras.cpp
new file mode 100644
index 0000000000000..248d99570e94a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/Unwind_AppleExtras.cpp
@@ -0,0 +1,183 @@
+//===--------------------- Unwind_AppleExtras.cpp -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "config.h"
+#include "AddressSpace.hpp"
+#include "DwarfParser.hpp"
+
+
+// private keymgr stuff
+#define KEYMGR_GCC3_DW2_OBJ_LIST 302
+extern "C" {
+ extern void _keymgr_set_and_unlock_processwide_ptr(int key, void *ptr);
+ extern void *_keymgr_get_and_lock_processwide_ptr(int key);
+}
+
+// undocumented libgcc "struct object"
+struct libgcc_object {
+  void          *start;
+  void          *unused1;
+  void          *unused2;
+  void          *fde;
+  unsigned long  encoding;
+  void          *fde_end;
+  libgcc_object *next;
+};
+
+// undocumented libgcc "struct km_object_info" referenced by
+// KEYMGR_GCC3_DW2_OBJ_LIST
+struct libgcc_object_info {
+  libgcc_object   *seen_objects;
+  libgcc_object   *unseen_objects;
+  unsigned         spare[2];
+};
+
+
+// static linker symbols to prevent wrong two level namespace for _Unwind symbols
+#if defined(__arm__)
+   #define NOT_HERE_BEFORE_5_0(sym)     \
+       extern const char sym##_tmp30 __asm("$ld$hide$os3.0$_" #sym ); \
+       __attribute__((visibility("default"))) const char sym##_tmp30 = 0; \
+       extern const char sym##_tmp31 __asm("$ld$hide$os3.1$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp31 = 0; \
+       extern const char sym##_tmp32 __asm("$ld$hide$os3.2$_" #sym );\
+           __attribute__((visibility("default"))) const char sym##_tmp32 = 0; \
+       extern const char sym##_tmp40 __asm("$ld$hide$os4.0$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp40 = 0; \
+       extern const char sym##_tmp41 __asm("$ld$hide$os4.1$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp41 = 0; \
+       extern const char sym##_tmp42 __asm("$ld$hide$os4.2$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp42 = 0; \
+       extern const char sym##_tmp43 __asm("$ld$hide$os4.3$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp43 = 0;
+#elif defined(__arm64__)
+  #define NOT_HERE_BEFORE_10_6(sym)
+  #define NEVER_HERE(sym)
+#else
+  #define NOT_HERE_BEFORE_10_6(sym) \
+    extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \
+    extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp5 = 0;
+  #define NEVER_HERE(sym) \
+    extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \
+    extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \
+    extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \
+          __attribute__((visibility("default"))) const char sym##_tmp6 = 0;
+#endif
+
+
+#if defined(_LIBUNWIND_BUILD_ZERO_COST_APIS)
+
+//
+// symbols in libSystem.dylib in 10.6 and later, but are in libgcc_s.dylib in
+// earlier versions
+//
+NOT_HERE_BEFORE_10_6(_Unwind_DeleteException)
+NOT_HERE_BEFORE_10_6(_Unwind_Find_FDE)
+NOT_HERE_BEFORE_10_6(_Unwind_ForcedUnwind)
+NOT_HERE_BEFORE_10_6(_Unwind_GetGR)
+NOT_HERE_BEFORE_10_6(_Unwind_GetIP)
+NOT_HERE_BEFORE_10_6(_Unwind_GetLanguageSpecificData)
+NOT_HERE_BEFORE_10_6(_Unwind_GetRegionStart)
+NOT_HERE_BEFORE_10_6(_Unwind_RaiseException)
+NOT_HERE_BEFORE_10_6(_Unwind_Resume)
+NOT_HERE_BEFORE_10_6(_Unwind_SetGR)
+NOT_HERE_BEFORE_10_6(_Unwind_SetIP)
+NOT_HERE_BEFORE_10_6(_Unwind_Backtrace)
+NOT_HERE_BEFORE_10_6(_Unwind_FindEnclosingFunction)
+NOT_HERE_BEFORE_10_6(_Unwind_GetCFA)
+NOT_HERE_BEFORE_10_6(_Unwind_GetDataRelBase)
+NOT_HERE_BEFORE_10_6(_Unwind_GetTextRelBase)
+NOT_HERE_BEFORE_10_6(_Unwind_Resume_or_Rethrow)
+NOT_HERE_BEFORE_10_6(_Unwind_GetIPInfo)
+NOT_HERE_BEFORE_10_6(__register_frame)
+NOT_HERE_BEFORE_10_6(__deregister_frame)
+
+//
+// symbols in libSystem.dylib for compatibility, but we don't want any new code
+// using them
+//
+NEVER_HERE(__register_frame_info_bases)
+NEVER_HERE(__register_frame_info)
+NEVER_HERE(__register_frame_info_table_bases)
+NEVER_HERE(__register_frame_info_table)
+NEVER_HERE(__register_frame_table)
+NEVER_HERE(__deregister_frame_info)
+NEVER_HERE(__deregister_frame_info_bases)
+
+#endif // defined(_LIBUNWIND_BUILD_ZERO_COST_APIS)
+
+
+
+
+#if defined(_LIBUNWIND_BUILD_SJLJ_APIS)
+//
+// symbols in libSystem.dylib in iOS 5.0 and later, but are in libgcc_s.dylib in
+// earlier versions
+//
+NOT_HERE_BEFORE_5_0(_Unwind_GetLanguageSpecificData)
+NOT_HERE_BEFORE_5_0(_Unwind_GetRegionStart)
+NOT_HERE_BEFORE_5_0(_Unwind_GetIP)
+NOT_HERE_BEFORE_5_0(_Unwind_SetGR)
+NOT_HERE_BEFORE_5_0(_Unwind_SetIP)
+NOT_HERE_BEFORE_5_0(_Unwind_DeleteException)
+NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Register)
+NOT_HERE_BEFORE_5_0(_Unwind_GetGR)
+NOT_HERE_BEFORE_5_0(_Unwind_GetIPInfo)
+NOT_HERE_BEFORE_5_0(_Unwind_GetCFA)
+NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Resume)
+NOT_HERE_BEFORE_5_0(_Unwind_SjLj_RaiseException)
+NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Resume_or_Rethrow)
+NOT_HERE_BEFORE_5_0(_Unwind_SjLj_Unregister)
+
+#endif // defined(_LIBUNWIND_BUILD_SJLJ_APIS)
+
+
+namespace libunwind {
+
+_LIBUNWIND_HIDDEN
+bool checkKeyMgrRegisteredFDEs(uintptr_t pc, void *&fde) {
+#if __MAC_OS_X_VERSION_MIN_REQUIRED
+  // lastly check for old style keymgr registration of dynamically generated
+  // FDEs acquire exclusive access to libgcc_object_info
+  libgcc_object_info *head = (libgcc_object_info *)
+                _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
+  if (head != NULL) {
+    // look at each FDE in keymgr
+    for (libgcc_object *ob = head->unseen_objects; ob != NULL; ob = ob->next) {
+      CFI_Parser<LocalAddressSpace>::FDE_Info fdeInfo;
+      CFI_Parser<LocalAddressSpace>::CIE_Info cieInfo;
+      const char *msg = CFI_Parser<LocalAddressSpace>::decodeFDE(
+                                      LocalAddressSpace::sThisAddressSpace,
+                                      (uintptr_t)ob->fde, &fdeInfo, &cieInfo);
+      if (msg == NULL) {
+        // Check if this FDE is for a function that includes the pc
+        if ((fdeInfo.pcStart <= pc) && (pc < fdeInfo.pcEnd)) {
+          fde = (void*)fdeInfo.pcStart;
+          _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST,
+                                                 head);
+          return true;
+        }
+      }
+    }
+  }
+  // release libgcc_object_info
+  _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, head);
+#else
+  (void)pc;
+  (void)fde;
+#endif
+  return false;
+}
+
+}
+
diff --git a/src/coreclr/src/nativeaot/libunwind/src/assembly.h b/src/coreclr/src/nativeaot/libunwind/src/assembly.h
new file mode 100644
index 0000000000000..7132b6c561b0d
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/assembly.h
@@ -0,0 +1,158 @@
+/* ===-- assembly.h - libUnwind assembler support macros -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file defines macros for use in libUnwind assembler source.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef UNWIND_ASSEMBLY_H
+#define UNWIND_ASSEMBLY_H
+
+#if defined(__powerpc64__)
+#define SEPARATOR ;
+#define PPC64_OFFS_SRR0   0
+#define PPC64_OFFS_CR     272
+#define PPC64_OFFS_XER    280
+#define PPC64_OFFS_LR     288
+#define PPC64_OFFS_CTR    296
+#define PPC64_OFFS_VRSAVE 304
+#define PPC64_OFFS_FP     312
+#define PPC64_OFFS_V      824
+#ifdef _ARCH_PWR8
+#define PPC64_HAS_VMX
+#endif
+#elif defined(__arm64__)
+#define SEPARATOR %%
+#else
+#define SEPARATOR ;
+#endif
+
+#if defined(__powerpc64__) && (!defined(_CALL_ELF) || _CALL_ELF == 1)
+#define PPC64_OPD1 .section .opd,"aw",@progbits SEPARATOR
+#define PPC64_OPD2 SEPARATOR \
+  .p2align 3 SEPARATOR \
+  .quad .Lfunc_begin0 SEPARATOR \
+  .quad .TOC.@tocbase SEPARATOR \
+  .quad 0 SEPARATOR \
+  .text SEPARATOR \
+.Lfunc_begin0:
+#else
+#define PPC64_OPD1
+#define PPC64_OPD2
+#endif
+
+#define GLUE2(a, b) a ## b
+#define GLUE(a, b) GLUE2(a, b)
+#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
+
+#if defined(__APPLE__)
+
+#define SYMBOL_IS_FUNC(name)
+#define EXPORT_SYMBOL(name)
+#define HIDDEN_SYMBOL(name) .private_extern name
+#define WEAK_SYMBOL(name) .weak_reference name
+#define WEAK_ALIAS(name, aliasname)                                            \
+  .globl SYMBOL_NAME(aliasname) SEPARATOR                                      \
+  WEAK_SYMBOL(aliasname) SEPARATOR                                             \
+  SYMBOL_NAME(aliasname) = SYMBOL_NAME(name)
+
+#define NO_EXEC_STACK_DIRECTIVE
+
+#elif defined(__ELF__)
+
+#if defined(__arm__)
+#define SYMBOL_IS_FUNC(name) .type name,%function
+#else
+#define SYMBOL_IS_FUNC(name) .type name,@function
+#endif
+#define EXPORT_SYMBOL(name)
+#define HIDDEN_SYMBOL(name) .hidden name
+#define WEAK_SYMBOL(name) .weak name
+#define WEAK_ALIAS(name, aliasname)                                            \
+  WEAK_SYMBOL(aliasname) SEPARATOR                                             \
+  SYMBOL_NAME(aliasname) = SYMBOL_NAME(name)
+
+#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
+    defined(__linux__)
+#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
+#else
+#define NO_EXEC_STACK_DIRECTIVE
+#endif
+
+#elif defined(_WIN32)
+
+#define SYMBOL_IS_FUNC(name)                                                   \
+  .def name SEPARATOR                                                          \
+    .scl 2 SEPARATOR                                                           \
+    .type 32 SEPARATOR                                                         \
+  .endef
+#define EXPORT_SYMBOL2(name)                                                   \
+  .section .drectve,"yn" SEPARATOR                                             \
+  .ascii "-export:", #name, "\0" SEPARATOR                                     \
+  .text
+#if defined(_LIBUNWIND_DISABLE_VISIBILITY_ANNOTATIONS)
+#define EXPORT_SYMBOL(name)
+#else
+#define EXPORT_SYMBOL(name) EXPORT_SYMBOL2(name)
+#endif
+#define HIDDEN_SYMBOL(name)
+
+#if defined(__MINGW32__)
+#define WEAK_ALIAS(name, aliasname)                                            \
+  .globl SYMBOL_NAME(aliasname) SEPARATOR                                      \
+  EXPORT_SYMBOL(aliasname) SEPARATOR                                           \
+  SYMBOL_NAME(aliasname) = SYMBOL_NAME(name)
+#else
+#define WEAK_ALIAS3(name, aliasname)                                           \
+  .section .drectve,"yn" SEPARATOR                                             \
+  .ascii "-alternatename:", #aliasname, "=", #name, "\0" SEPARATOR             \
+  .text
+#define WEAK_ALIAS2(name, aliasname)                                           \
+  WEAK_ALIAS3(name, aliasname)
+#define WEAK_ALIAS(name, aliasname)                                            \
+  EXPORT_SYMBOL(SYMBOL_NAME(aliasname)) SEPARATOR                              \
+  WEAK_ALIAS2(SYMBOL_NAME(name), SYMBOL_NAME(aliasname))
+#endif
+
+#define NO_EXEC_STACK_DIRECTIVE
+
+#elif defined(__sparc__)
+
+#else
+
+#error Unsupported target
+
+#endif
+
+#define DEFINE_LIBUNWIND_FUNCTION(name)                                        \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  HIDDEN_SYMBOL(SYMBOL_NAME(name)) SEPARATOR                                   \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  PPC64_OPD1                                                                   \
+  SYMBOL_NAME(name):                                                           \
+  PPC64_OPD2
+
+#if defined(__arm__)
+#if !defined(__ARM_ARCH)
+#define __ARM_ARCH 4
+#endif
+
+#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
+#define ARM_HAS_BX
+#endif
+
+#ifdef ARM_HAS_BX
+#define JMP(r) bx r
+#else
+#define JMP(r) mov pc, r
+#endif
+#endif /* __arm__ */
+
+#endif /* UNWIND_ASSEMBLY_H */
diff --git a/src/coreclr/src/nativeaot/libunwind/src/config.h b/src/coreclr/src/nativeaot/libunwind/src/config.h
new file mode 100644
index 0000000000000..09bb261647ca5
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/config.h
@@ -0,0 +1,211 @@
+//===----------------------------- config.h -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//  Defines macros used within libunwind project.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LIBUNWIND_CONFIG_H
+#define LIBUNWIND_CONFIG_H
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+// Define static_assert() unless already defined by compiler.
+#ifndef __has_feature
+  #define __has_feature(__x) 0
+#endif
+#if !(__has_feature(cxx_static_assert)) && !defined(static_assert)
+  #define static_assert(__b, __m) \
+      extern int compile_time_assert_failed[ ( __b ) ? 1 : -1 ]  \
+                                                  __attribute__( ( unused ) );
+#endif
+
+// Platform specific configuration defines.
+#ifdef __APPLE__
+  #if defined(FOR_DYLD)
+    #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND
+  #else
+    #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND
+    #define _LIBUNWIND_SUPPORT_DWARF_UNWIND   1
+  #endif
+#elif defined(_WIN32)
+  #ifdef __SEH__
+    #define _LIBUNWIND_SUPPORT_SEH_UNWIND 1
+  #else
+    #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1
+  #endif
+#else
+  #if defined(__ARM_DWARF_EH__) || !defined(__arm__)
+    #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1
+    #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1
+  #endif
+#endif
+
+#if defined(_LIBUNWIND_DISABLE_VISIBILITY_ANNOTATIONS)
+  #define _LIBUNWIND_EXPORT
+  #define _LIBUNWIND_HIDDEN
+#else
+  #if !defined(__ELF__) && !defined(__MACH__)
+    #define _LIBUNWIND_EXPORT __declspec(dllexport)
+    #define _LIBUNWIND_HIDDEN
+  #else
+    #define _LIBUNWIND_EXPORT __attribute__((visibility("default")))
+    #define _LIBUNWIND_HIDDEN __attribute__((visibility("hidden")))
+  #endif
+#endif
+
+#define STR(a) #a
+#define XSTR(a) STR(a)
+#define SYMBOL_NAME(name) XSTR(__USER_LABEL_PREFIX__) #name
+
+#if defined(__APPLE__)
+#define _LIBUNWIND_WEAK_ALIAS(name, aliasname)                                 \
+  __asm__(".globl " SYMBOL_NAME(aliasname));                                   \
+  __asm__(SYMBOL_NAME(aliasname) " = " SYMBOL_NAME(name));                     \
+  extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname                        \
+      __attribute__((weak_import));
+#elif defined(__ELF__)
+#define _LIBUNWIND_WEAK_ALIAS(name, aliasname)                                 \
+  extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname                        \
+      __attribute__((weak, alias(#name)));
+#elif defined(_WIN32)
+#if defined(__MINGW32__)
+#define _LIBUNWIND_WEAK_ALIAS(name, aliasname)                                 \
+  extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname                        \
+      __attribute__((alias(#name)));
+#else
+#define _LIBUNWIND_WEAK_ALIAS(name, aliasname)                                 \
+  __pragma(comment(linker, "/alternatename:" SYMBOL_NAME(aliasname) "="        \
+                                             SYMBOL_NAME(name)))               \
+  extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname;
+#endif
+#else
+#error Unsupported target
+#endif
+
+#if (defined(__APPLE__) && defined(__arm__)) || defined(__USING_SJLJ_EXCEPTIONS__)
+#define _LIBUNWIND_BUILD_SJLJ_APIS
+#endif
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || defined(__ppc64__) || defined(__powerpc64__)
+#define _LIBUNWIND_SUPPORT_FRAME_APIS
+#endif
+
+#if defined(__i386__) || defined(__x86_64__) ||                                \
+    defined(__ppc__) || defined(__ppc64__) || defined(__powerpc64__) ||        \
+    (!defined(__APPLE__) && defined(__arm__)) ||                               \
+    (defined(__arm64__) || defined(__aarch64__)) ||                            \
+    defined(__mips__)
+#if !defined(_LIBUNWIND_BUILD_SJLJ_APIS)
+#define _LIBUNWIND_BUILD_ZERO_COST_APIS
+#endif
+#endif
+
+#if defined(__powerpc64__) && defined(_ARCH_PWR8)
+#define PPC64_HAS_VMX
+#endif
+
+#if defined(NDEBUG) && defined(_LIBUNWIND_IS_BAREMETAL)
+#define _LIBUNWIND_ABORT(msg)                                                  \
+  do {                                                                         \
+    abort();                                                                   \
+  } while (0)
+#else
+#define _LIBUNWIND_ABORT(msg)                                                  \
+  do {                                                                         \
+    fprintf(stderr, "libunwind: %s %s:%d - %s\n", __func__, __FILE__,          \
+            __LINE__, msg);                                                    \
+    fflush(stderr);                                                            \
+    abort();                                                                   \
+  } while (0)
+#endif
+
+#if defined(NDEBUG) && defined(_LIBUNWIND_IS_BAREMETAL)
+#define _LIBUNWIND_LOG0(msg)
+#define _LIBUNWIND_LOG(msg, ...)
+#else
+#define _LIBUNWIND_LOG0(msg)                                               \
+  fprintf(stderr, "libunwind: " msg "\n")
+#define _LIBUNWIND_LOG(msg, ...)                                               \
+  fprintf(stderr, "libunwind: " msg "\n", __VA_ARGS__)
+#endif
+
+#if defined(NDEBUG)
+  #define _LIBUNWIND_LOG_IF_FALSE(x) x
+#else
+  #define _LIBUNWIND_LOG_IF_FALSE(x)                                           \
+    do {                                                                       \
+      bool _ret = x;                                                           \
+      if (!_ret)                                                               \
+        _LIBUNWIND_LOG("" #x " failed in %s", __FUNCTION__);                   \
+    } while (0)
+#endif
+
+// Macros that define away in non-Debug builds
+#ifdef NDEBUG
+  #define _LIBUNWIND_DEBUG_LOG(msg, ...)
+  #define _LIBUNWIND_TRACE_API(msg, ...)
+  #define _LIBUNWIND_TRACING_UNWINDING (0)
+  #define _LIBUNWIND_TRACING_DWARF (0)
+  #define _LIBUNWIND_TRACE_UNWINDING(msg, ...)
+  #define _LIBUNWIND_TRACE_DWARF(...)
+#else
+  #ifdef __cplusplus
+    extern "C" {
+  #endif
+    extern  bool logAPIs();
+    extern  bool logUnwinding();
+    extern  bool logDWARF();
+  #ifdef __cplusplus
+    }
+  #endif
+  #define _LIBUNWIND_DEBUG_LOG(msg, ...)  _LIBUNWIND_LOG(msg, __VA_ARGS__)
+  #define _LIBUNWIND_TRACE_API(msg, ...)                                       \
+    do {                                                                       \
+      if (logAPIs())                                                           \
+        _LIBUNWIND_LOG(msg, __VA_ARGS__);                                      \
+    } while (0)
+  #define _LIBUNWIND_TRACING_UNWINDING logUnwinding()
+  #define _LIBUNWIND_TRACING_DWARF logDWARF()
+  #define _LIBUNWIND_TRACE_UNWINDING(msg, ...)                                 \
+    do {                                                                       \
+      if (logUnwinding())                                                      \
+        _LIBUNWIND_LOG(msg, __VA_ARGS__);                                      \
+    } while (0)
+  #define _LIBUNWIND_TRACE_DWARF(...)                                          \
+    do {                                                                       \
+      if (logDWARF())                                                          \
+        fprintf(stderr, __VA_ARGS__);                                          \
+    } while (0)
+#endif
+
+#ifdef __cplusplus
+// Used to fit UnwindCursor and Registers_xxx types against unw_context_t /
+// unw_cursor_t sized memory blocks.
+#if defined(_LIBUNWIND_IS_NATIVE_ONLY)
+# define COMP_OP ==
+#else
+# define COMP_OP <=
+#endif
+template <typename _Type, typename _Mem>
+struct check_fit {
+  template <typename T>
+  struct blk_count {
+    static const size_t count =
+      (sizeof(T) + sizeof(uint64_t) - 1) / sizeof(uint64_t);
+  };
+  static const bool does_fit =
+    (blk_count<_Type>::count COMP_OP blk_count<_Mem>::count);
+};
+#undef COMP_OP
+#endif // __cplusplus
+
+#endif // LIBUNWIND_CONFIG_H
diff --git a/src/coreclr/src/nativeaot/libunwind/src/dwarf2.h b/src/coreclr/src/nativeaot/libunwind/src/dwarf2.h
new file mode 100644
index 0000000000000..40f0daf468059
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/dwarf2.h
@@ -0,0 +1,239 @@
+//===------------------------------- dwarf2.h -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+
+/*
+   These constants were taken from version 3 of the DWARF standard,
+   which is Copyright (c) 2005 Free Standards Group, and
+   Copyright (c) 1992, 1993 UNIX International, Inc.
+*/
+
+#ifndef __DWARF2__
+#define __DWARF2__
+
+// DWARF unwind instructions
+enum {
+  DW_CFA_nop                 = 0x0,
+  DW_CFA_set_loc             = 0x1,
+  DW_CFA_advance_loc1        = 0x2,
+  DW_CFA_advance_loc2        = 0x3,
+  DW_CFA_advance_loc4        = 0x4,
+  DW_CFA_offset_extended     = 0x5,
+  DW_CFA_restore_extended    = 0x6,
+  DW_CFA_undefined           = 0x7,
+  DW_CFA_same_value          = 0x8,
+  DW_CFA_register            = 0x9,
+  DW_CFA_remember_state      = 0xA,
+  DW_CFA_restore_state       = 0xB,
+  DW_CFA_def_cfa             = 0xC,
+  DW_CFA_def_cfa_register    = 0xD,
+  DW_CFA_def_cfa_offset      = 0xE,
+  DW_CFA_def_cfa_expression  = 0xF,
+  DW_CFA_expression         = 0x10,
+  DW_CFA_offset_extended_sf = 0x11,
+  DW_CFA_def_cfa_sf         = 0x12,
+  DW_CFA_def_cfa_offset_sf  = 0x13,
+  DW_CFA_val_offset         = 0x14,
+  DW_CFA_val_offset_sf      = 0x15,
+  DW_CFA_val_expression     = 0x16,
+  DW_CFA_advance_loc        = 0x40, // high 2 bits are 0x1, lower 6 bits are delta
+  DW_CFA_offset             = 0x80, // high 2 bits are 0x2, lower 6 bits are register
+  DW_CFA_restore            = 0xC0, // high 2 bits are 0x3, lower 6 bits are register
+
+  // GNU extensions
+  DW_CFA_GNU_window_save              = 0x2D,
+  DW_CFA_GNU_args_size                = 0x2E,
+  DW_CFA_GNU_negative_offset_extended = 0x2F,
+
+  // AARCH64 extensions
+  DW_CFA_AARCH64_negate_ra_state      = 0x2D
+};
+
+
+// FSF exception handling Pointer-Encoding constants
+// Used in CFI augmentation by GCC
+enum {
+  DW_EH_PE_ptr       = 0x00,
+  DW_EH_PE_uleb128   = 0x01,
+  DW_EH_PE_udata2    = 0x02,
+  DW_EH_PE_udata4    = 0x03,
+  DW_EH_PE_udata8    = 0x04,
+  DW_EH_PE_signed    = 0x08,
+  DW_EH_PE_sleb128   = 0x09,
+  DW_EH_PE_sdata2    = 0x0A,
+  DW_EH_PE_sdata4    = 0x0B,
+  DW_EH_PE_sdata8    = 0x0C,
+  DW_EH_PE_absptr    = 0x00,
+  DW_EH_PE_pcrel     = 0x10,
+  DW_EH_PE_textrel   = 0x20,
+  DW_EH_PE_datarel   = 0x30,
+  DW_EH_PE_funcrel   = 0x40,
+  DW_EH_PE_aligned   = 0x50,
+  DW_EH_PE_indirect  = 0x80,
+  DW_EH_PE_omit      = 0xFF
+};
+
+
+// DWARF expressions
+enum {
+  DW_OP_addr               = 0x03, // constant address (size target specific)
+  DW_OP_deref              = 0x06,
+  DW_OP_const1u            = 0x08, // 1-byte constant
+  DW_OP_const1s            = 0x09, // 1-byte constant
+  DW_OP_const2u            = 0x0A, // 2-byte constant
+  DW_OP_const2s            = 0x0B, // 2-byte constant
+  DW_OP_const4u            = 0x0C, // 4-byte constant
+  DW_OP_const4s            = 0x0D, // 4-byte constant
+  DW_OP_const8u            = 0x0E, // 8-byte constant
+  DW_OP_const8s            = 0x0F, // 8-byte constant
+  DW_OP_constu             = 0x10, // ULEB128 constant
+  DW_OP_consts             = 0x11, // SLEB128 constant
+  DW_OP_dup                = 0x12,
+  DW_OP_drop               = 0x13,
+  DW_OP_over               = 0x14,
+  DW_OP_pick               = 0x15, // 1-byte stack index
+  DW_OP_swap               = 0x16,
+  DW_OP_rot                = 0x17,
+  DW_OP_xderef             = 0x18,
+  DW_OP_abs                = 0x19,
+  DW_OP_and                = 0x1A,
+  DW_OP_div                = 0x1B,
+  DW_OP_minus              = 0x1C,
+  DW_OP_mod                = 0x1D,
+  DW_OP_mul                = 0x1E,
+  DW_OP_neg                = 0x1F,
+  DW_OP_not                = 0x20,
+  DW_OP_or                 = 0x21,
+  DW_OP_plus               = 0x22,
+  DW_OP_plus_uconst        = 0x23, // ULEB128 addend
+  DW_OP_shl                = 0x24,
+  DW_OP_shr                = 0x25,
+  DW_OP_shra               = 0x26,
+  DW_OP_xor                = 0x27,
+  DW_OP_skip               = 0x2F, // signed 2-byte constant
+  DW_OP_bra                = 0x28, // signed 2-byte constant
+  DW_OP_eq                 = 0x29,
+  DW_OP_ge                 = 0x2A,
+  DW_OP_gt                 = 0x2B,
+  DW_OP_le                 = 0x2C,
+  DW_OP_lt                 = 0x2D,
+  DW_OP_ne                 = 0x2E,
+  DW_OP_lit0               = 0x30, // Literal 0
+  DW_OP_lit1               = 0x31, // Literal 1
+  DW_OP_lit2               = 0x32, // Literal 2
+  DW_OP_lit3               = 0x33, // Literal 3
+  DW_OP_lit4               = 0x34, // Literal 4
+  DW_OP_lit5               = 0x35, // Literal 5
+  DW_OP_lit6               = 0x36, // Literal 6
+  DW_OP_lit7               = 0x37, // Literal 7
+  DW_OP_lit8               = 0x38, // Literal 8
+  DW_OP_lit9               = 0x39, // Literal 9
+  DW_OP_lit10              = 0x3A, // Literal 10
+  DW_OP_lit11              = 0x3B, // Literal 11
+  DW_OP_lit12              = 0x3C, // Literal 12
+  DW_OP_lit13              = 0x3D, // Literal 13
+  DW_OP_lit14              = 0x3E, // Literal 14
+  DW_OP_lit15              = 0x3F, // Literal 15
+  DW_OP_lit16              = 0x40, // Literal 16
+  DW_OP_lit17              = 0x41, // Literal 17
+  DW_OP_lit18              = 0x42, // Literal 18
+  DW_OP_lit19              = 0x43, // Literal 19
+  DW_OP_lit20              = 0x44, // Literal 20
+  DW_OP_lit21              = 0x45, // Literal 21
+  DW_OP_lit22              = 0x46, // Literal 22
+  DW_OP_lit23              = 0x47, // Literal 23
+  DW_OP_lit24              = 0x48, // Literal 24
+  DW_OP_lit25              = 0x49, // Literal 25
+  DW_OP_lit26              = 0x4A, // Literal 26
+  DW_OP_lit27              = 0x4B, // Literal 27
+  DW_OP_lit28              = 0x4C, // Literal 28
+  DW_OP_lit29              = 0x4D, // Literal 29
+  DW_OP_lit30              = 0x4E, // Literal 30
+  DW_OP_lit31              = 0x4F, // Literal 31
+  DW_OP_reg0               = 0x50, // Contents of reg0
+  DW_OP_reg1               = 0x51, // Contents of reg1
+  DW_OP_reg2               = 0x52, // Contents of reg2
+  DW_OP_reg3               = 0x53, // Contents of reg3
+  DW_OP_reg4               = 0x54, // Contents of reg4
+  DW_OP_reg5               = 0x55, // Contents of reg5
+  DW_OP_reg6               = 0x56, // Contents of reg6
+  DW_OP_reg7               = 0x57, // Contents of reg7
+  DW_OP_reg8               = 0x58, // Contents of reg8
+  DW_OP_reg9               = 0x59, // Contents of reg9
+  DW_OP_reg10              = 0x5A, // Contents of reg10
+  DW_OP_reg11              = 0x5B, // Contents of reg11
+  DW_OP_reg12              = 0x5C, // Contents of reg12
+  DW_OP_reg13              = 0x5D, // Contents of reg13
+  DW_OP_reg14              = 0x5E, // Contents of reg14
+  DW_OP_reg15              = 0x5F, // Contents of reg15
+  DW_OP_reg16              = 0x60, // Contents of reg16
+  DW_OP_reg17              = 0x61, // Contents of reg17
+  DW_OP_reg18              = 0x62, // Contents of reg18
+  DW_OP_reg19              = 0x63, // Contents of reg19
+  DW_OP_reg20              = 0x64, // Contents of reg20
+  DW_OP_reg21              = 0x65, // Contents of reg21
+  DW_OP_reg22              = 0x66, // Contents of reg22
+  DW_OP_reg23              = 0x67, // Contents of reg23
+  DW_OP_reg24              = 0x68, // Contents of reg24
+  DW_OP_reg25              = 0x69, // Contents of reg25
+  DW_OP_reg26              = 0x6A, // Contents of reg26
+  DW_OP_reg27              = 0x6B, // Contents of reg27
+  DW_OP_reg28              = 0x6C, // Contents of reg28
+  DW_OP_reg29              = 0x6D, // Contents of reg29
+  DW_OP_reg30              = 0x6E, // Contents of reg30
+  DW_OP_reg31              = 0x6F, // Contents of reg31
+  DW_OP_breg0              = 0x70, // base register 0 + SLEB128 offset
+  DW_OP_breg1              = 0x71, // base register 1 + SLEB128 offset
+  DW_OP_breg2              = 0x72, // base register 2 + SLEB128 offset
+  DW_OP_breg3              = 0x73, // base register 3 + SLEB128 offset
+  DW_OP_breg4              = 0x74, // base register 4 + SLEB128 offset
+  DW_OP_breg5              = 0x75, // base register 5 + SLEB128 offset
+  DW_OP_breg6              = 0x76, // base register 6 + SLEB128 offset
+  DW_OP_breg7              = 0x77, // base register 7 + SLEB128 offset
+  DW_OP_breg8              = 0x78, // base register 8 + SLEB128 offset
+  DW_OP_breg9              = 0x79, // base register 9 + SLEB128 offset
+  DW_OP_breg10             = 0x7A, // base register 10 + SLEB128 offset
+  DW_OP_breg11             = 0x7B, // base register 11 + SLEB128 offset
+  DW_OP_breg12             = 0x7C, // base register 12 + SLEB128 offset
+  DW_OP_breg13             = 0x7D, // base register 13 + SLEB128 offset
+  DW_OP_breg14             = 0x7E, // base register 14 + SLEB128 offset
+  DW_OP_breg15             = 0x7F, // base register 15 + SLEB128 offset
+  DW_OP_breg16             = 0x80, // base register 16 + SLEB128 offset
+  DW_OP_breg17             = 0x81, // base register 17 + SLEB128 offset
+  DW_OP_breg18             = 0x82, // base register 18 + SLEB128 offset
+  DW_OP_breg19             = 0x83, // base register 19 + SLEB128 offset
+  DW_OP_breg20             = 0x84, // base register 20 + SLEB128 offset
+  DW_OP_breg21             = 0x85, // base register 21 + SLEB128 offset
+  DW_OP_breg22             = 0x86, // base register 22 + SLEB128 offset
+  DW_OP_breg23             = 0x87, // base register 23 + SLEB128 offset
+  DW_OP_breg24             = 0x88, // base register 24 + SLEB128 offset
+  DW_OP_breg25             = 0x89, // base register 25 + SLEB128 offset
+  DW_OP_breg26             = 0x8A, // base register 26 + SLEB128 offset
+  DW_OP_breg27             = 0x8B, // base register 27 + SLEB128 offset
+  DW_OP_breg28             = 0x8C, // base register 28 + SLEB128 offset
+  DW_OP_breg29             = 0x8D, // base register 29 + SLEB128 offset
+  DW_OP_breg30             = 0x8E, // base register 30 + SLEB128 offset
+  DW_OP_breg31             = 0x8F, // base register 31 + SLEB128 offset
+  DW_OP_regx               = 0x90, // ULEB128 register
+  DW_OP_fbreg              = 0x91, // SLEB128 offset
+  DW_OP_bregx              = 0x92, // ULEB128 register followed by SLEB128 offset
+  DW_OP_piece              = 0x93, // ULEB128 size of piece addressed
+  DW_OP_deref_size         = 0x94, // 1-byte size of data retrieved
+  DW_OP_xderef_size        = 0x95, // 1-byte size of data retrieved
+  DW_OP_nop                = 0x96,
+  DW_OP_push_object_addres = 0x97,
+  DW_OP_call2              = 0x98, // 2-byte offset of DIE
+  DW_OP_call4              = 0x99, // 4-byte offset of DIE
+  DW_OP_call_ref           = 0x9A, // 4- or 8-byte offset of DIE
+  DW_OP_lo_user            = 0xE0,
+  DW_OP_APPLE_uninit       = 0xF0,
+  DW_OP_hi_user            = 0xFF
+};
+
+
+#endif
diff --git a/src/coreclr/src/nativeaot/libunwind/src/libunwind.cpp b/src/coreclr/src/nativeaot/libunwind/src/libunwind.cpp
new file mode 100644
index 0000000000000..bc68033cd415c
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/libunwind.cpp
@@ -0,0 +1,339 @@
+//===--------------------------- libunwind.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//  Implements unw_* functions from <libunwind.h>
+//
+//===----------------------------------------------------------------------===//
+
+#include <libunwind.h>
+
+#include "libunwind_ext.h"
+#include "config.h"
+
+#include <stdlib.h>
+
+
+#if !defined(__USING_SJLJ_EXCEPTIONS__)
+#include "AddressSpace.hpp"
+#include "UnwindCursor.hpp"
+
+using namespace libunwind;
+
+/// internal object to represent this processes address space
+LocalAddressSpace LocalAddressSpace::sThisAddressSpace;
+
+_LIBUNWIND_EXPORT unw_addr_space_t unw_local_addr_space =
+    (unw_addr_space_t)&LocalAddressSpace::sThisAddressSpace;
+
+/// Create a cursor of a thread in this process given 'context' recorded by
+/// __unw_getcontext().
+_LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor,
+                                       unw_context_t *context) {
+  _LIBUNWIND_TRACE_API("__unw_init_local(cursor=%p, context=%p)",
+                       static_cast<void *>(cursor),
+                       static_cast<void *>(context));
+#if defined(__i386__)
+# define REGISTER_KIND Registers_x86
+#elif defined(__x86_64__)
+# define REGISTER_KIND Registers_x86_64
+#elif defined(__powerpc64__)
+# define REGISTER_KIND Registers_ppc64
+#elif defined(__ppc__)
+# define REGISTER_KIND Registers_ppc
+#elif defined(__aarch64__)
+# define REGISTER_KIND Registers_arm64
+#elif defined(__arm__)
+# define REGISTER_KIND Registers_arm
+#elif defined(__or1k__)
+# define REGISTER_KIND Registers_or1k
+#elif defined(__mips__) && defined(_ABIO32) && _MIPS_SIM == _ABIO32
+# define REGISTER_KIND Registers_mips_o32
+#elif defined(__mips64)
+# define REGISTER_KIND Registers_mips_newabi
+#elif defined(__mips__)
+# warning The MIPS architecture is not supported with this ABI and environment!
+#elif defined(__sparc__)
+# define REGISTER_KIND Registers_sparc
+#else
+# error Architecture not supported
+#endif
+  // Use "placement new" to allocate UnwindCursor in the cursor buffer.
+  new (reinterpret_cast<UnwindCursor<LocalAddressSpace, REGISTER_KIND> *>(cursor))
+      UnwindCursor<LocalAddressSpace, REGISTER_KIND>(
+          context, LocalAddressSpace::sThisAddressSpace);
+#undef REGISTER_KIND
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  co->setInfoBasedOnIPRegister();
+
+  return UNW_ESUCCESS;
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_init_local, unw_init_local)
+
+/// Get value of specified register at cursor position in stack frame.
+_LIBUNWIND_HIDDEN int __unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
+                                    unw_word_t *value) {
+  _LIBUNWIND_TRACE_API("__unw_get_reg(cursor=%p, regNum=%d, &value=%p)",
+                       static_cast<void *>(cursor), regNum,
+                       static_cast<void *>(value));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  if (co->validReg(regNum)) {
+    *value = co->getReg(regNum);
+    return UNW_ESUCCESS;
+  }
+  return UNW_EBADREG;
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_get_reg, unw_get_reg)
+
+/// Set value of specified register at cursor position in stack frame.
+_LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
+                                    unw_word_t value, unw_word_t *pos) {
+  _LIBUNWIND_TRACE_API("__unw_set_reg(cursor=%p, regNum=%d, value=0x%" PRIxPTR
+                       ")",
+                       static_cast<void *>(cursor), regNum, (long long)value);
+  typedef LocalAddressSpace::pint_t pint_t;
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  if (co->validReg(regNum)) {
+    co->setReg(regNum, (pint_t)value, (pint_t)pos);
+    // special case altering IP to re-find info (being called by personality
+    // function)
+    if (regNum == UNW_REG_IP) {
+      unw_proc_info_t info;
+      // First, get the FDE for the old location and then update it.
+      co->getInfo(&info);
+      co->setInfoBasedOnIPRegister(false);
+      // If the original call expects stack adjustment, perform this now.
+      // Normal frame unwinding would have included the offset already in the
+      // CFA computation.
+      // Note: for PA-RISC and other platforms where the stack grows up,
+      // this should actually be - info.gp. LLVM doesn't currently support
+      // any such platforms and Clang doesn't export a macro for them.
+      if (info.gp)
+        co->setReg(UNW_REG_SP, co->getReg(UNW_REG_SP) + info.gp, 0);
+    }
+    return UNW_ESUCCESS;
+  }
+  return UNW_EBADREG;
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_set_reg, unw_set_reg)
+
+/// Get value of specified float register at cursor position in stack frame.
+_LIBUNWIND_HIDDEN int __unw_get_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum,
+                                      unw_fpreg_t *value) {
+  _LIBUNWIND_TRACE_API("__unw_get_fpreg(cursor=%p, regNum=%d, &value=%p)",
+                       static_cast<void *>(cursor), regNum,
+                       static_cast<void *>(value));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  if (co->validFloatReg(regNum)) {
+    *value = co->getFloatReg(regNum);
+    return UNW_ESUCCESS;
+  }
+  return UNW_EBADREG;
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_get_fpreg, unw_get_fpreg)
+
+/// Set value of specified float register at cursor position in stack frame.
+_LIBUNWIND_HIDDEN int __unw_set_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum,
+                                      unw_fpreg_t value) {
+#if defined(_LIBUNWIND_ARM_EHABI)
+  _LIBUNWIND_TRACE_API("__unw_set_fpreg(cursor=%p, regNum=%d, value=%llX)",
+                       static_cast<void *>(cursor), regNum, value);
+#else
+  _LIBUNWIND_TRACE_API("__unw_set_fpreg(cursor=%p, regNum=%d, value=%g)",
+                       static_cast<void *>(cursor), regNum, value);
+#endif
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  if (co->validFloatReg(regNum)) {
+    co->setFloatReg(regNum, value);
+    return UNW_ESUCCESS;
+  }
+  return UNW_EBADREG;
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_set_fpreg, unw_set_fpreg)
+
+/// Get location of specified register at cursor position in stack frame.
+_LIBUNWIND_HIDDEN int __unw_get_save_loc(unw_cursor_t *cursor, int regNum, 
+                                       unw_save_loc_t* location)
+{
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  if (co->validReg(regNum)) {
+    // We only support memory locations, not register locations
+    location->u.addr = co->getRegLocation(regNum);
+    location->type = (location->u.addr == 0) ? UNW_SLT_NONE : UNW_SLT_MEMORY;
+    return UNW_ESUCCESS;
+  }
+  return UNW_EBADREG;
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_get_save_loc, unw_get_save_loc)
+
+/// Move cursor to next frame.
+_LIBUNWIND_HIDDEN int __unw_step(unw_cursor_t *cursor) {
+  _LIBUNWIND_TRACE_API("__unw_step(cursor=%p)", static_cast<void *>(cursor));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  return co->step();
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_step, unw_step)
+
+/// Get unwind info at cursor position in stack frame.
+_LIBUNWIND_HIDDEN int __unw_get_proc_info(unw_cursor_t *cursor,
+                                          unw_proc_info_t *info) {
+  _LIBUNWIND_TRACE_API("__unw_get_proc_info(cursor=%p, &info=%p)",
+                       static_cast<void *>(cursor), static_cast<void *>(info));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  co->getInfo(info);
+  if (info->end_ip == 0)
+    return UNW_ENOINFO;
+  else
+    return UNW_ESUCCESS;
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_get_proc_info, unw_get_proc_info)
+
+/// Resume execution at cursor position (aka longjump).
+_LIBUNWIND_HIDDEN int __unw_resume(unw_cursor_t *cursor) {
+  _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p)", static_cast<void *>(cursor));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  co->jumpto();
+  return UNW_EUNSPEC;
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_resume, unw_resume)
+
+/// Get name of function at cursor position in stack frame.
+_LIBUNWIND_HIDDEN int __unw_get_proc_name(unw_cursor_t *cursor, char *buf,
+                                          size_t bufLen, unw_word_t *offset) {
+  _LIBUNWIND_TRACE_API("__unw_get_proc_name(cursor=%p, &buf=%p, bufLen=%lu)",
+                       static_cast<void *>(cursor), static_cast<void *>(buf),
+                       static_cast<unsigned long>(bufLen));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  if (co->getFunctionName(buf, bufLen, offset))
+    return UNW_ESUCCESS;
+  else
+    return UNW_EUNSPEC;
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_get_proc_name, unw_get_proc_name)
+
+/// Checks if a register is a floating-point register.
+_LIBUNWIND_HIDDEN int __unw_is_fpreg(unw_cursor_t *cursor,
+                                     unw_regnum_t regNum) {
+  _LIBUNWIND_TRACE_API("__unw_is_fpreg(cursor=%p, regNum=%d)",
+                       static_cast<void *>(cursor), regNum);
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  return co->validFloatReg(regNum);
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_is_fpreg, unw_is_fpreg)
+
+/// Checks if a register is a floating-point register.
+_LIBUNWIND_HIDDEN const char *__unw_regname(unw_cursor_t *cursor,
+                                            unw_regnum_t regNum) {
+  _LIBUNWIND_TRACE_API("__unw_regname(cursor=%p, regNum=%d)",
+                       static_cast<void *>(cursor), regNum);
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  return co->getRegisterName(regNum);
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_regname, unw_regname)
+
+/// Checks if current frame is signal trampoline.
+_LIBUNWIND_HIDDEN int __unw_is_signal_frame(unw_cursor_t *cursor) {
+  _LIBUNWIND_TRACE_API("__unw_is_signal_frame(cursor=%p)",
+                       static_cast<void *>(cursor));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  return co->isSignalFrame();
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_is_signal_frame, unw_is_signal_frame)
+
+#ifdef __arm__
+// Save VFP registers d0-d15 using FSTMIADX instead of FSTMIADD
+_LIBUNWIND_HIDDEN void __unw_save_vfp_as_X(unw_cursor_t *cursor) {
+  _LIBUNWIND_TRACE_API("__unw_get_fpreg_save_vfp_as_X(cursor=%p)",
+                       static_cast<void *>(cursor));
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  return co->saveVFPAsX();
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_save_vfp_as_X, unw_save_vfp_as_X)
+#endif
+
+
+#if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+/// SPI: walks cached DWARF entries
+_LIBUNWIND_HIDDEN void __unw_iterate_dwarf_unwind_cache(void (*func)(
+    unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) {
+  _LIBUNWIND_TRACE_API("__unw_iterate_dwarf_unwind_cache(func=%p)",
+                       reinterpret_cast<void *>(func));
+  DwarfFDECache<LocalAddressSpace>::iterateCacheEntries(func);
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_iterate_dwarf_unwind_cache,
+                      unw_iterate_dwarf_unwind_cache)
+
+/// IPI: for __register_frame()
+void __unw_add_dynamic_fde(unw_word_t fde) {
+  CFI_Parser<LocalAddressSpace>::FDE_Info fdeInfo;
+  CFI_Parser<LocalAddressSpace>::CIE_Info cieInfo;
+  const char *message = CFI_Parser<LocalAddressSpace>::decodeFDE(
+                           LocalAddressSpace::sThisAddressSpace,
+                          (LocalAddressSpace::pint_t) fde, &fdeInfo, &cieInfo);
+  if (message == NULL) {
+    // dynamically registered FDEs don't have a mach_header group they are in.
+    // Use fde as mh_group
+    unw_word_t mh_group = fdeInfo.fdeStart;
+    DwarfFDECache<LocalAddressSpace>::add((LocalAddressSpace::pint_t)mh_group,
+                                          fdeInfo.pcStart, fdeInfo.pcEnd,
+                                          fdeInfo.fdeStart);
+  } else {
+    _LIBUNWIND_DEBUG_LOG("__unw_add_dynamic_fde: bad fde: %s", message);
+  }
+}
+
+/// IPI: for __deregister_frame()
+void __unw_remove_dynamic_fde(unw_word_t fde) {
+  // fde is own mh_group
+  DwarfFDECache<LocalAddressSpace>::removeAllIn((LocalAddressSpace::pint_t)fde);
+}
+#endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+#endif // !defined(__USING_SJLJ_EXCEPTIONS__)
+
+
+
+// Add logging hooks in Debug builds only
+#ifndef NDEBUG
+#include <stdlib.h>
+
+_LIBUNWIND_HIDDEN
+bool logAPIs() {
+  // do manual lock to avoid use of _cxa_guard_acquire or initializers
+  static bool checked = false;
+  static bool log = false;
+  if (!checked) {
+    log = (getenv("LIBUNWIND_PRINT_APIS") != NULL);
+    checked = true;
+  }
+  return log;
+}
+
+_LIBUNWIND_HIDDEN
+bool logUnwinding() {
+  // do manual lock to avoid use of _cxa_guard_acquire or initializers
+  static bool checked = false;
+  static bool log = false;
+  if (!checked) {
+    log = (getenv("LIBUNWIND_PRINT_UNWINDING") != NULL);
+    checked = true;
+  }
+  return log;
+}
+
+_LIBUNWIND_HIDDEN
+bool logDWARF() {
+  // do manual lock to avoid use of _cxa_guard_acquire or initializers
+  static bool checked = false;
+  static bool log = false;
+  if (!checked) {
+    log = (getenv("LIBUNWIND_PRINT_DWARF") != NULL);
+    checked = true;
+  }
+  return log;
+}
+
+#endif // NDEBUG
+
diff --git a/src/coreclr/src/nativeaot/libunwind/src/libunwind_ext.h b/src/coreclr/src/nativeaot/libunwind/src/libunwind_ext.h
new file mode 100644
index 0000000000000..b240ba7fbcacd
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/libunwind_ext.h
@@ -0,0 +1,66 @@
+//===------------------------ libunwind_ext.h -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//  Extensions to libunwind API.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LIBUNWIND_EXT__
+#define __LIBUNWIND_EXT__
+
+#include "config.h"
+#include <libunwind.h>
+#include <unwind.h>
+
+#define UNW_STEP_SUCCESS 1
+#define UNW_STEP_END     0
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int __unw_getcontext(unw_context_t *);
+extern int __unw_init_local(unw_cursor_t *, unw_context_t *);
+extern int __unw_step(unw_cursor_t *);
+extern int __unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *);
+extern int __unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *);
+extern int __unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t, unw_word_t *);
+extern int __unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t);
+extern int __unw_resume(unw_cursor_t *);
+
+#ifdef __arm__
+/* Save VFP registers in FSTMX format (instead of FSTMD). */
+extern void __unw_save_vfp_as_X(unw_cursor_t *);
+#endif
+
+extern const char *__unw_regname(unw_cursor_t *, unw_regnum_t);
+extern int __unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *);
+extern int __unw_is_fpreg(unw_cursor_t *, unw_regnum_t);
+extern int __unw_is_signal_frame(unw_cursor_t *);
+extern int __unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *);
+extern int __unw_get_save_loc(unw_cursor_t *, int, unw_save_loc_t *);
+
+// SPI
+extern void __unw_iterate_dwarf_unwind_cache(void (*func)(
+    unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh));
+
+// IPI
+extern void __unw_add_dynamic_fde(unw_word_t fde);
+extern void __unw_remove_dynamic_fde(unw_word_t fde);
+
+#if defined(_LIBUNWIND_ARM_EHABI)
+extern const uint32_t* decode_eht_entry(const uint32_t*, size_t*, size_t*);
+extern _Unwind_Reason_Code _Unwind_VRS_Interpret(_Unwind_Context *context,
+                                                 const uint32_t *data,
+                                                 size_t offset, size_t len);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __LIBUNWIND_EXT__
diff --git a/src/coreclr/src/nativeaot/libunwind/src/unwind_ext.h b/src/coreclr/src/nativeaot/libunwind/src/unwind_ext.h
new file mode 100644
index 0000000000000..c40ce6a1610f4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/src/unwind_ext.h
@@ -0,0 +1,37 @@
+//===-------------------------- unwind_ext.h ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//
+//  Extensions to unwind API.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __UNWIND_EXT__
+#define __UNWIND_EXT__
+
+#include "unwind.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// These platform specific functions to get and set the top context are
+// implemented elsewhere.
+
+extern struct _Unwind_FunctionContext *
+__Unwind_SjLj_GetTopOfFunctionStack();
+
+extern void
+__Unwind_SjLj_SetTopOfFunctionStack(struct _Unwind_FunctionContext *fc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __UNWIND_EXT__
+
+
diff --git a/src/coreclr/src/nativeaot/libunwind/test/CMakeLists.txt b/src/coreclr/src/nativeaot/libunwind/test/CMakeLists.txt
new file mode 100644
index 0000000000000..d902e3e829410
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/test/CMakeLists.txt
@@ -0,0 +1,35 @@
+include(AddLLVM) # for add_lit_testsuite
+macro(pythonize_bool var)
+  if (${var})
+    set(${var} True)
+  else()
+    set(${var} False)
+  endif()
+endmacro()
+
+if (NOT DEFINED LIBCXX_ENABLE_SHARED)
+  set(LIBCXX_ENABLE_SHARED ON)
+endif()
+
+pythonize_bool(LIBUNWIND_BUILD_32_BITS)
+pythonize_bool(LIBCXX_ENABLE_SHARED)
+pythonize_bool(LIBUNWIND_ENABLE_SHARED)
+pythonize_bool(LIBUNWIND_ENABLE_THREADS)
+pythonize_bool(LIBUNWIND_ENABLE_EXCEPTIONS)
+pythonize_bool(LIBUNWIND_USE_COMPILER_RT)
+pythonize_bool(LIBUNWIND_BUILD_EXTERNAL_THREAD_LIBRARY)
+set(LIBUNWIND_TARGET_INFO "libcxx.test.target_info.LocalTI" CACHE STRING
+    "TargetInfo to use when setting up test environment.")
+set(LIBUNWIND_EXECUTOR "None" CACHE STRING
+    "Executor to use when running tests.")
+
+set(AUTO_GEN_COMMENT "## Autogenerated by libunwind configuration.\n# Do not edit!")
+configure_file(
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+  @ONLY)
+
+add_lit_testsuite(check-unwind "Running libunwind tests"
+  ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS ${LIBUNWIND_TEST_DEPS}
+  )
diff --git a/src/coreclr/src/nativeaot/libunwind/test/alignment.pass.cpp b/src/coreclr/src/nativeaot/libunwind/test/alignment.pass.cpp
new file mode 100644
index 0000000000000..b0da7f1551346
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/test/alignment.pass.cpp
@@ -0,0 +1,28 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// The Itanium ABI requires that _Unwind_Exception objects are "double-word
+// aligned".
+
+#include <unwind.h>
+
+// EHABI  : 8-byte aligned
+// itanium: largest supported alignment for the system
+#if defined(_LIBUNWIND_ARM_EHABI)
+static_assert(alignof(_Unwind_Control_Block) == 8,
+              "_Unwind_Control_Block must be double-word aligned");
+#else
+struct MaxAligned {} __attribute__((__aligned__));
+static_assert(alignof(_Unwind_Exception) == alignof(MaxAligned),
+              "_Unwind_Exception must be maximally aligned");
+#endif
+
+int main()
+{
+}
diff --git a/src/coreclr/src/nativeaot/libunwind/test/libunwind/__init__.py b/src/coreclr/src/nativeaot/libunwind/test/libunwind/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/src/coreclr/src/nativeaot/libunwind/test/libunwind/test/__init__.py b/src/coreclr/src/nativeaot/libunwind/test/libunwind/test/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/src/coreclr/src/nativeaot/libunwind/test/libunwind/test/config.py b/src/coreclr/src/nativeaot/libunwind/test/libunwind/test/config.py
new file mode 100644
index 0000000000000..05e3f3cc21f31
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/test/libunwind/test/config.py
@@ -0,0 +1,68 @@
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+import os
+import sys
+
+from libcxx.test.config import Configuration as LibcxxConfiguration
+
+
+class Configuration(LibcxxConfiguration):
+    # pylint: disable=redefined-outer-name
+    def __init__(self, lit_config, config):
+        super(Configuration, self).__init__(lit_config, config)
+        self.libunwind_src_root = None
+        self.libunwind_obj_root = None
+        self.abi_library_path = None
+        self.libcxx_src_root = None
+
+    def configure_src_root(self):
+        self.libunwind_src_root = (self.get_lit_conf('libunwind_src_root')
+            or os.path.dirname(self.config.test_source_root))
+        self.libcxx_src_root = (self.get_lit_conf('libcxx_src_root')
+            or os.path.join(self.libunwind_src_root, '..', 'libcxx'))
+
+    def configure_obj_root(self):
+        self.libunwind_obj_root = self.get_lit_conf('libunwind_obj_root')
+        super(Configuration, self).configure_obj_root()
+
+    def has_cpp_feature(self, feature, required_value):
+        return int(self.cxx.dumpMacros().get('__cpp_' + feature, 0)) >= required_value
+
+    def configure_features(self):
+        super(Configuration, self).configure_features()
+        if not self.get_lit_bool('enable_exceptions', True):
+            self.config.available_features.add('libcxxabi-no-exceptions')
+
+    def configure_compile_flags(self):
+        self.cxx.compile_flags += ['-DLIBUNWIND_NO_TIMER']
+        if not self.get_lit_bool('enable_exceptions', True):
+            self.cxx.compile_flags += ['-fno-exceptions', '-DLIBUNWIND_HAS_NO_EXCEPTIONS']
+        # Stack unwinding tests need unwinding tables and these are not
+        # generated by default on all Targets.
+        self.cxx.compile_flags += ['-funwind-tables']
+        if not self.get_lit_bool('enable_threads', True):
+            self.cxx.compile_flags += ['-D_LIBUNWIND_HAS_NO_THREADS']
+            self.config.available_features.add('libunwind-no-threads')
+        super(Configuration, self).configure_compile_flags()
+
+    def configure_compile_flags_header_includes(self):
+        self.configure_config_site_header()
+
+        libunwind_headers = self.get_lit_conf(
+            'libunwind_headers',
+            os.path.join(self.libunwind_src_root, 'include'))
+        if not os.path.isdir(libunwind_headers):
+            self.lit_config.fatal("libunwind_headers='%s' is not a directory."
+                                  % libunwind_headers)
+        self.cxx.compile_flags += ['-I' + libunwind_headers]
+
+    def configure_compile_flags_exceptions(self):
+        pass
+
+    def configure_compile_flags_rtti(self):
+        pass
diff --git a/src/coreclr/src/nativeaot/libunwind/test/libunwind_01.pass.cpp b/src/coreclr/src/nativeaot/libunwind/test/libunwind_01.pass.cpp
new file mode 100644
index 0000000000000..6957d98f956d7
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/test/libunwind_01.pass.cpp
@@ -0,0 +1,42 @@
+#include <libunwind.h>
+#include <stdlib.h>
+
+void backtrace(int lower_bound) {
+  unw_context_t context;
+  unw_getcontext(&context);
+
+  unw_cursor_t cursor;
+  unw_init_local(&cursor, &context);
+
+  int n = 0;
+  do {
+    ++n;
+    if (n > 100) {
+      abort();
+    }
+  } while (unw_step(&cursor) > 0);
+
+  if (n < lower_bound) {
+    abort();
+  }
+}
+
+void test1(int i) {
+  backtrace(i);
+}
+
+void test2(int i, int j) {
+  backtrace(i);
+  test1(j);
+}
+
+void test3(int i, int j, int k) {
+  backtrace(i);
+  test2(j, k);
+}
+
+int main() {
+  test1(1);
+  test2(1, 2);
+  test3(1, 2, 3);
+}
diff --git a/src/coreclr/src/nativeaot/libunwind/test/libunwind_02.pass.cpp b/src/coreclr/src/nativeaot/libunwind/test/libunwind_02.pass.cpp
new file mode 100644
index 0000000000000..a0efd1df79fa4
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/test/libunwind_02.pass.cpp
@@ -0,0 +1,38 @@
+#include <assert.h>
+#include <stdlib.h>
+#include <unwind.h>
+
+#define EXPECTED_NUM_FRAMES 50
+#define NUM_FRAMES_UPPER_BOUND 100
+
+_Unwind_Reason_Code callback(_Unwind_Context *context, void *cnt) {
+  (void)context;
+  int *i = (int *)cnt;
+  ++*i;
+  if (*i > NUM_FRAMES_UPPER_BOUND) {
+    abort();
+  }
+  return _URC_NO_REASON;
+}
+
+void test_backtrace() {
+  int n = 0;
+  _Unwind_Backtrace(&callback, &n);
+  if (n < EXPECTED_NUM_FRAMES) {
+    abort();
+  }
+}
+
+int test(int i) {
+  if (i == 0) {
+    test_backtrace();
+    return 0;
+  } else {
+    return i + test(i - 1);
+  }
+}
+
+int main() {
+  int total = test(50);
+  assert(total == 1275);
+}
diff --git a/src/coreclr/src/nativeaot/libunwind/test/lit.cfg b/src/coreclr/src/nativeaot/libunwind/test/lit.cfg
new file mode 100644
index 0000000000000..1d284bdfd771a
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/test/lit.cfg
@@ -0,0 +1,70 @@
+# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79:
+
+# Configuration file for the 'lit' test runner.
+
+
+import os
+import site
+
+site.addsitedir(os.path.dirname(__file__))
+
+
+# Tell pylint that we know config and lit_config exist somewhere.
+if 'PYLINT_IMPORT' in os.environ:
+    config = object()
+    lit_config = object()
+
+# name: The name of this test suite.
+config.name = 'libunwind'
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = ['.cpp', '.s']
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# needed to test libunwind with code that throws exceptions
+config.enable_exceptions = True
+
+# Infer the libcxx_test_source_root for configuration import.
+# If libcxx_source_root isn't specified in the config, assume that the libcxx
+# and libunwind source directories are sibling directories.
+libcxx_src_root = getattr(config, 'libcxx_src_root', None)
+if not libcxx_src_root:
+    libcxx_src_root = os.path.join(config.test_source_root, '../../libcxx')
+libcxx_test_src_root = os.path.join(libcxx_src_root, 'utils')
+if os.path.isfile(os.path.join(libcxx_test_src_root, 'libcxx', '__init__.py')):
+    site.addsitedir(libcxx_test_src_root)
+else:
+    lit_config.fatal('Could not find libcxx test directory for test imports'
+                     ' in: %s' % libcxx_test_src_root)
+
+# Infer the test_exec_root from the libcxx_object root.
+obj_root = getattr(config, 'libunwind_obj_root', None)
+
+# Check that the test exec root is known.
+if obj_root is None:
+    import libcxx.test.config
+    libcxx.test.config.loadSiteConfig(
+        lit_config, config, 'libunwind_site_config', 'LIBUNWIND_SITE_CONFIG')
+    obj_root = getattr(config, 'libunwind_obj_root', None)
+    if obj_root is None:
+        import tempfile
+        obj_root = tempfile.mkdtemp(prefix='libunwind-testsuite-')
+        lit_config.warning('Creating temporary directory for object root: %s' %
+                           obj_root)
+
+config.test_exec_root = os.path.join(obj_root, 'test')
+
+cfg_variant = getattr(config, 'configuration_variant', 'libunwind')
+if cfg_variant:
+    lit_config.note('Using configuration variant: %s' % cfg_variant)
+
+# Load the Configuration class from the module name <cfg_variant>.test.config.
+config_module_name = '.'.join([cfg_variant, 'test', 'config'])
+config_module = __import__(config_module_name, fromlist=['Configuration'])
+
+configuration = config_module.Configuration(lit_config, config)
+configuration.configure()
+configuration.print_config_info()
+config.test_format = configuration.get_test_format()
diff --git a/src/coreclr/src/nativeaot/libunwind/test/lit.site.cfg.in b/src/coreclr/src/nativeaot/libunwind/test/lit.site.cfg.in
new file mode 100644
index 0000000000000..34da72ac10684
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/test/lit.site.cfg.in
@@ -0,0 +1,30 @@
+@AUTO_GEN_COMMENT@
+config.cxx_under_test           = "@LIBUNWIND_COMPILER@"
+config.project_obj_root         = "@CMAKE_BINARY_DIR@"
+config.libunwind_src_root       = "@LIBUNWIND_SOURCE_DIR@"
+config.libunwind_obj_root       = "@LIBUNWIND_BINARY_DIR@"
+config.abi_library_path         = "@LIBUNWIND_LIBRARY_DIR@"
+config.libcxx_src_root          = "@LIBUNWIND_LIBCXX_PATH@"
+config.libunwind_headers        = "@LIBUNWIND_SOURCE_DIR@/include"
+config.cxx_library_root         = "@LIBUNWIND_LIBCXX_LIBRARY_PATH@"
+config.llvm_unwinder            = True
+config.builtins_library         = "@LIBUNWIND_BUILTINS_LIBRARY@"
+config.enable_threads           = @LIBUNWIND_ENABLE_THREADS@
+config.use_sanitizer            = "@LLVM_USE_SANITIZER@"
+config.enable_32bit             = @LIBUNWIND_BUILD_32_BITS@
+config.target_info              = "@LIBUNWIND_TARGET_INFO@"
+config.test_linker_flags        = "@LIBUNWIND_TEST_LINKER_FLAGS@"
+config.test_compiler_flags      = "@LIBUNWIND_TEST_COMPILER_FLAGS@"
+config.executor                 = "@LIBUNWIND_EXECUTOR@"
+config.libunwind_shared         = @LIBUNWIND_ENABLE_SHARED@
+config.enable_shared            = @LIBCXX_ENABLE_SHARED@
+config.enable_exceptions        = @LIBUNWIND_ENABLE_EXCEPTIONS@
+config.host_triple              = "@LLVM_HOST_TRIPLE@"
+config.target_triple            = "@TARGET_TRIPLE@"
+config.use_target               = bool("@LIBUNWIND_TARGET_TRIPLE@")
+config.sysroot                  = "@LIBUNWIND_SYSROOT@"
+config.gcc_toolchain            = "@LIBUNWIND_GCC_TOOLCHAIN@"
+config.cxx_ext_threads          = @LIBUNWIND_BUILD_EXTERNAL_THREAD_LIBRARY@
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@LIBUNWIND_SOURCE_DIR@/test/lit.cfg")
diff --git a/src/coreclr/src/nativeaot/libunwind/test/unw_getcontext.pass.cpp b/src/coreclr/src/nativeaot/libunwind/test/unw_getcontext.pass.cpp
new file mode 100644
index 0000000000000..b012706a0bf92
--- /dev/null
+++ b/src/coreclr/src/nativeaot/libunwind/test/unw_getcontext.pass.cpp
@@ -0,0 +1,8 @@
+#include <assert.h>
+#include <libunwind.h>
+
+int main() {
+  unw_context_t context;
+  int ret = unw_getcontext(&context);
+  assert(ret == UNW_ESUCCESS);
+}