Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the CPUID and XSAVE logics for APX #104637

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/design/features/xarch-apx.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# APX Integration in .NET

Let's keep documentation on APX integration and notes on things here. I will evolve this as necessary.
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ class AsmOffsets
// Debug build offsets
#if TARGET_AMD64
#if TARGET_UNIX
public const int SIZEOF__REGDISPLAY = 0x1a90;
public const int OFFSETOF__REGDISPLAY__SP = 0x1a78;
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1a80;
public const int SIZEOF__REGDISPLAY = 0x1b90;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how these values are calculated, the values are updated based on the error massage, so want to double check with the reviewers.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These constants are what the sizeof(...) / offsetof(...) would get during C/C++ compilation. The offsets above are verified to match with the real values during the C/C++ compilation phase, so if the runtime build passes, you are safe.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the explanation!

public const int OFFSETOF__REGDISPLAY__SP = 0x1b78;
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1b80;
#else // TARGET_UNIX
public const int SIZEOF__REGDISPLAY = 0xbf0;
public const int OFFSETOF__REGDISPLAY__SP = 0xbd8;
Expand Down Expand Up @@ -68,9 +68,9 @@ class AsmOffsets
// Release build offsets
#if TARGET_AMD64
#if TARGET_UNIX
public const int SIZEOF__REGDISPLAY = 0x1a80;
public const int OFFSETOF__REGDISPLAY__SP = 0x1a70;
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1a78;
public const int SIZEOF__REGDISPLAY = 0x1b80;
public const int OFFSETOF__REGDISPLAY__SP = 0x1b70;
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1b78;
#else // TARGET_UNIX
public const int SIZEOF__REGDISPLAY = 0xbf0;
public const int OFFSETOF__REGDISPLAY__SP = 0xbd0;
Expand Down Expand Up @@ -120,7 +120,7 @@ class AsmOffsets

#if TARGET_AMD64
#if TARGET_UNIX
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xc20;
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xca0;
#else // TARGET_UNIX
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0;
#endif // TARGET_UNIX
Expand Down
146 changes: 82 additions & 64 deletions src/coreclr/inc/corinfoinstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,38 +81,40 @@ enum CORINFO_InstructionSet
InstructionSet_VectorT128=36,
InstructionSet_VectorT256=37,
InstructionSet_VectorT512=38,
InstructionSet_X86Base_X64=39,
InstructionSet_SSE_X64=40,
InstructionSet_SSE2_X64=41,
InstructionSet_SSE3_X64=42,
InstructionSet_SSSE3_X64=43,
InstructionSet_SSE41_X64=44,
InstructionSet_SSE42_X64=45,
InstructionSet_AVX_X64=46,
InstructionSet_AVX2_X64=47,
InstructionSet_AES_X64=48,
InstructionSet_BMI1_X64=49,
InstructionSet_BMI2_X64=50,
InstructionSet_FMA_X64=51,
InstructionSet_LZCNT_X64=52,
InstructionSet_PCLMULQDQ_X64=53,
InstructionSet_POPCNT_X64=54,
InstructionSet_AVXVNNI_X64=55,
InstructionSet_MOVBE_X64=56,
InstructionSet_X86Serialize_X64=57,
InstructionSet_EVEX_X64=58,
InstructionSet_AVX512F_X64=59,
InstructionSet_AVX512F_VL_X64=60,
InstructionSet_AVX512BW_X64=61,
InstructionSet_AVX512BW_VL_X64=62,
InstructionSet_AVX512CD_X64=63,
InstructionSet_AVX512CD_VL_X64=64,
InstructionSet_AVX512DQ_X64=65,
InstructionSet_AVX512DQ_VL_X64=66,
InstructionSet_AVX512VBMI_X64=67,
InstructionSet_AVX512VBMI_VL_X64=68,
InstructionSet_AVX10v1_X64=69,
InstructionSet_AVX10v1_V512_X64=70,
InstructionSet_APX=39,
InstructionSet_X86Base_X64=40,
InstructionSet_SSE_X64=41,
InstructionSet_SSE2_X64=42,
InstructionSet_SSE3_X64=43,
InstructionSet_SSSE3_X64=44,
InstructionSet_SSE41_X64=45,
InstructionSet_SSE42_X64=46,
InstructionSet_AVX_X64=47,
InstructionSet_AVX2_X64=48,
InstructionSet_AES_X64=49,
InstructionSet_BMI1_X64=50,
InstructionSet_BMI2_X64=51,
InstructionSet_FMA_X64=52,
InstructionSet_LZCNT_X64=53,
InstructionSet_PCLMULQDQ_X64=54,
InstructionSet_POPCNT_X64=55,
InstructionSet_AVXVNNI_X64=56,
InstructionSet_MOVBE_X64=57,
InstructionSet_X86Serialize_X64=58,
InstructionSet_EVEX_X64=59,
InstructionSet_AVX512F_X64=60,
InstructionSet_AVX512F_VL_X64=61,
InstructionSet_AVX512BW_X64=62,
InstructionSet_AVX512BW_VL_X64=63,
InstructionSet_AVX512CD_X64=64,
InstructionSet_AVX512CD_VL_X64=65,
InstructionSet_AVX512DQ_X64=66,
InstructionSet_AVX512DQ_VL_X64=67,
InstructionSet_AVX512VBMI_X64=68,
InstructionSet_AVX512VBMI_VL_X64=69,
InstructionSet_AVX10v1_X64=70,
InstructionSet_AVX10v1_V512_X64=71,
InstructionSet_APX_X64=72,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_X86Base=1,
Expand Down Expand Up @@ -153,38 +155,40 @@ enum CORINFO_InstructionSet
InstructionSet_VectorT128=36,
InstructionSet_VectorT256=37,
InstructionSet_VectorT512=38,
InstructionSet_X86Base_X64=39,
InstructionSet_SSE_X64=40,
InstructionSet_SSE2_X64=41,
InstructionSet_SSE3_X64=42,
InstructionSet_SSSE3_X64=43,
InstructionSet_SSE41_X64=44,
InstructionSet_SSE42_X64=45,
InstructionSet_AVX_X64=46,
InstructionSet_AVX2_X64=47,
InstructionSet_AES_X64=48,
InstructionSet_BMI1_X64=49,
InstructionSet_BMI2_X64=50,
InstructionSet_FMA_X64=51,
InstructionSet_LZCNT_X64=52,
InstructionSet_PCLMULQDQ_X64=53,
InstructionSet_POPCNT_X64=54,
InstructionSet_AVXVNNI_X64=55,
InstructionSet_MOVBE_X64=56,
InstructionSet_X86Serialize_X64=57,
InstructionSet_EVEX_X64=58,
InstructionSet_AVX512F_X64=59,
InstructionSet_AVX512F_VL_X64=60,
InstructionSet_AVX512BW_X64=61,
InstructionSet_AVX512BW_VL_X64=62,
InstructionSet_AVX512CD_X64=63,
InstructionSet_AVX512CD_VL_X64=64,
InstructionSet_AVX512DQ_X64=65,
InstructionSet_AVX512DQ_VL_X64=66,
InstructionSet_AVX512VBMI_X64=67,
InstructionSet_AVX512VBMI_VL_X64=68,
InstructionSet_AVX10v1_X64=69,
InstructionSet_AVX10v1_V512_X64=70,
InstructionSet_APX=39,
InstructionSet_X86Base_X64=40,
InstructionSet_SSE_X64=41,
InstructionSet_SSE2_X64=42,
InstructionSet_SSE3_X64=43,
InstructionSet_SSSE3_X64=44,
InstructionSet_SSE41_X64=45,
InstructionSet_SSE42_X64=46,
InstructionSet_AVX_X64=47,
InstructionSet_AVX2_X64=48,
InstructionSet_AES_X64=49,
InstructionSet_BMI1_X64=50,
InstructionSet_BMI2_X64=51,
InstructionSet_FMA_X64=52,
InstructionSet_LZCNT_X64=53,
InstructionSet_PCLMULQDQ_X64=54,
InstructionSet_POPCNT_X64=55,
InstructionSet_AVXVNNI_X64=56,
InstructionSet_MOVBE_X64=57,
InstructionSet_X86Serialize_X64=58,
InstructionSet_EVEX_X64=59,
InstructionSet_AVX512F_X64=60,
InstructionSet_AVX512F_VL_X64=61,
InstructionSet_AVX512BW_X64=62,
InstructionSet_AVX512BW_VL_X64=63,
InstructionSet_AVX512CD_X64=64,
InstructionSet_AVX512CD_VL_X64=65,
InstructionSet_AVX512DQ_X64=66,
InstructionSet_AVX512DQ_VL_X64=67,
InstructionSet_AVX512VBMI_X64=68,
InstructionSet_AVX512VBMI_VL_X64=69,
InstructionSet_AVX10v1_X64=70,
InstructionSet_AVX10v1_V512_X64=71,
InstructionSet_APX_X64=72,
#endif // TARGET_X86

};
Expand Down Expand Up @@ -364,6 +368,8 @@ struct CORINFO_InstructionSetFlags
AddInstructionSet(InstructionSet_AVX10v1_X64);
if (HasInstructionSet(InstructionSet_AVX10v1_V512))
AddInstructionSet(InstructionSet_AVX10v1_V512_X64);
if (HasInstructionSet(InstructionSet_APX))
AddInstructionSet(InstructionSet_APX_X64);
#endif // TARGET_AMD64
#ifdef TARGET_X86
#endif // TARGET_X86
Expand Down Expand Up @@ -572,6 +578,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512);
if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512))
resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512_X64);
if (resultflags.HasInstructionSet(InstructionSet_APX) && !resultflags.HasInstructionSet(InstructionSet_APX_X64))
resultflags.RemoveInstructionSet(InstructionSet_APX);
if (resultflags.HasInstructionSet(InstructionSet_APX_X64) && !resultflags.HasInstructionSet(InstructionSet_APX))
resultflags.RemoveInstructionSet(InstructionSet_APX_X64);
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_SSE);
if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE))
Expand Down Expand Up @@ -990,6 +1000,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "VectorT256";
case InstructionSet_VectorT512 :
return "VectorT512";
case InstructionSet_APX :
return "APX";
case InstructionSet_APX_X64 :
return "APX_X64";
#endif // TARGET_AMD64
#ifdef TARGET_X86
case InstructionSet_X86Base :
Expand Down Expand Up @@ -1068,6 +1082,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "VectorT256";
case InstructionSet_VectorT512 :
return "VectorT512";
case InstructionSet_APX :
return "APX";
#endif // TARGET_X86

default:
Expand Down Expand Up @@ -1138,6 +1154,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX;
#endif // TARGET_AMD64
#ifdef TARGET_X86
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
Expand Down Expand Up @@ -1175,6 +1192,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX;
#endif // TARGET_X86

default:
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 7f7fd340-4779-455a-8046-628f3cd8c3c7 */
0x7f7fd340,
0x4779,
0x455a,
{0x80, 0x46, 0x62, 0x8f, 0x3c, 0xd8, 0xc3, 0xc7}
constexpr GUID JITEEVersionIdentifier = { /* c3886dcb-d533-44b8-86c0-ff79ac4ce9df */
0xc3886dcb,
0xd533,
0x44b8,
{0x86, 0xc0, 0xff, 0x79, 0xac, 0x4c, 0xe9, 0xdf}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Avx10v1=44,
READYTORUN_INSTRUCTION_Avx10v1_V512=46,
READYTORUN_INSTRUCTION_EVEX=47,
READYTORUN_INSTRUCTION_Apx=48,

};

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ NI_Illegal, NI_Illegal }, // VectorT128
{ NI_Illegal, NI_Illegal }, // VectorT256
{ NI_Illegal, NI_Illegal }, // VectorT512
{ NI_Illegal, NI_Illegal }, // APX
{ FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 },
{ FIRST_NI_SSE_X64, LAST_NI_SSE_X64 },
{ FIRST_NI_SSE2_X64, LAST_NI_SSE2_X64 },
Expand Down
10 changes: 7 additions & 3 deletions src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@
#define REDHAWK_PALEXPORT extern "C"
#define REDHAWK_PALAPI __stdcall

#ifndef XSTATE_MASK_APX
#define XSTATE_MASK_APX (0x80000)
#endif // XSTATE_MASK_APX

// Index for the fiber local storage of the attached thread pointer
static uint32_t g_flsIndex = FLS_OUT_OF_INDEXES;

Expand Down Expand Up @@ -541,7 +545,7 @@ REDHAWK_PALEXPORT CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextB
#endif //TARGET_X86

#if defined(TARGET_X86) || defined(TARGET_AMD64)
const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512;
const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX;
const ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_MPX | xStateFeatureMask;
#elif defined(TARGET_ARM64)
const DWORD64 xStateFeatureMask = XSTATE_MASK_ARM64_SVE;
Expand Down Expand Up @@ -632,9 +636,9 @@ REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalGetCompleteThreadCont
// This should not normally fail.
// The system silently ignores any feature specified in the FeatureMask which is not enabled on the processor.
#if defined(TARGET_X86) || defined(TARGET_AMD64)
if (!SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512))
if (!SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX))
{
_ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512");
_ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX");
return FALSE;
}
#elif defined(TARGET_ARM64)
Expand Down
23 changes: 23 additions & 0 deletions src/coreclr/pal/inc/pal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1384,12 +1384,14 @@ typedef struct _KNONVOLATILE_CONTEXT_POINTERS {
#define XSTATE_AVX512_KMASK (5)
#define XSTATE_AVX512_ZMM_H (6)
#define XSTATE_AVX512_ZMM (7)
#define XSTATE_APX (19)

#define XSTATE_MASK_GSSE (UI64(1) << (XSTATE_GSSE))
#define XSTATE_MASK_AVX (XSTATE_MASK_GSSE)
#define XSTATE_MASK_AVX512 ((UI64(1) << (XSTATE_AVX512_KMASK)) | \
(UI64(1) << (XSTATE_AVX512_ZMM_H)) | \
(UI64(1) << (XSTATE_AVX512_ZMM)))
#define XSTATE_MASK_APX (UI64(1) << (XSTATE_APX))

typedef struct DECLSPEC_ALIGN(16) _M128A {
ULONGLONG Low;
Expand Down Expand Up @@ -1626,6 +1628,27 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT {
M512 Zmm30;
M512 Zmm31;
};

struct
{
DWORD64 Egpr16;
DWORD64 Egpr17;
DWORD64 Egpr18;
DWORD64 Egpr19;
DWORD64 Egpr20;
DWORD64 Egpr21;
DWORD64 Egpr22;
DWORD64 Egpr23;
DWORD64 Egpr24;
DWORD64 Egpr25;
DWORD64 Egpr26;
DWORD64 Egpr27;
DWORD64 Egpr28;
DWORD64 Egpr29;
DWORD64 Egpr30;
DWORD64 Egpr31;
};

} CONTEXT, *PCONTEXT, *LPCONTEXT;

//
Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/pal/src/arch/amd64/asmconstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
#define XSTATE_AVX512_KMASK (5)
#define XSTATE_AVX512_ZMM_H (6)
#define XSTATE_AVX512_ZMM (7)
#define XSTATE_APX (19)

#define XSTATE_MASK_GSSE (1 << (XSTATE_GSSE))
#define XSTATE_MASK_AVX (XSTATE_MASK_GSSE)
#define XSTATE_MASK_AVX512 ((1 << (XSTATE_AVX512_KMASK)) | \
(1 << (XSTATE_AVX512_ZMM_H)) | \
(1 << (XSTATE_AVX512_ZMM)))
#define XSTATE_MASK_APX (1 << (XSTATE_APX))

// The arch bit is normally set in the flag constants below. Since this is already arch-specific code and the arch bit is not
// relevant, the arch bit is excluded from the flag constants below for simpler tests.
Expand Down Expand Up @@ -91,7 +93,8 @@
#define CONTEXT_KMask0 CONTEXT_Ymm0H+(16*16)
#define CONTEXT_Zmm0H CONTEXT_KMask0+(8*8)
#define CONTEXT_Zmm16 CONTEXT_Zmm0H+(32*16)
#define CONTEXT_Size CONTEXT_Zmm16+(64*16)
#define CONTEXT_Egpr CONTEXT_Zmm16+(16*8)
#define CONTEXT_Size CONTEXT_Egpr+(64*16)

#else // HOST_64BIT

Expand Down
Loading
Loading