From 39024547e590b500e9478be06fae772630a0f74c Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Tue, 4 Oct 2022 08:48:08 -0700 Subject: [PATCH] Support Arm64 "constructed" constants in SuperPMI asm diffs SuperPMI asm diffs tries to ignore constants that can change between multiple replays, such as addresses that the replay engine must generate and not simply hand back from the collected data. Often, addresses have associated relocations generated during replay. SuperPMI can use these relocations to adjust the constants to allow two replays to match. However, there are cases on Arm64 where an address both doesn't report a relocation and is "constructed" using multiple `mov`/`movk` instructions. One case is the `allocPgoInstrumentationBySchema()` API which returns a pointer to a PGO data buffer. An address within this buffer is constructed via a sequence such as: ``` mov x0, #63408 movk x0, #23602, lsl #16 movk x0, #606, lsl #32 ``` When SuperPMI replays this API, it constructs a new buffer and returns that pointer, which is used to construct various actual addresses that are generated as "constructed" constants, shown above. This change "de-constructs" the constants and looks them up in the replay address map. If base and diff match the mapped constants, there is no asm diff. --- .../superpmi/superpmi-shared/spmiutil.cpp | 43 ++++++ .../tools/superpmi/superpmi-shared/spmiutil.h | 5 + .../tools/superpmi/superpmi/neardiffer.cpp | 128 +++++++++++++++--- 3 files changed, 160 insertions(+), 16 deletions(-) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp index 4fa386c8fed04..034e309634ebd 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp @@ -335,3 +335,46 @@ void PutThumb2BlRel24(UINT16* p, INT32 imm24) p[0] = Opcode0; p[1] = Opcode1; } + +// GetArm64MovConstant / GetArm64MovkConstant: Decode arm64 mov / movk instructions, e.g.: +// d29ff600 mov x0, #65456 +// f2ab8640 movk x0, #23602, lsl #16 +// f2c04bc0 movk x0, #606, lsl #32 +// +// This is used in the NearDiffer to determine if a sequence of mov/movk is actually an address. +// +// Return `true` if the instruction pointed to by `p` is a mov/movk, `false` otherwise. +// If true, fill out the target register in `*pReg`, constant in `*pCon`, and (for movk) shift value in `*pShift`. + +bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon) +{ + UINT32 instr = *p; + if ((instr & 0xffe00000) == 0xd2800000) + { + *pReg = instr & 0x1f; + *pCon = (instr >> 5) & 0xffff; + return true; + } + + return false; +} + +bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift) +{ + UINT32 instr = *p; + if ((instr & 0xff800000) == 0xf2800000) + { + *pReg = instr & 0x1f; + *pCon = (instr >> 5) & 0xffff; + *pShift = ((instr >> 21) & 0x3) * 16; + return true; + } + + return false; +} + +// PutArm64MovkConstant: set the constant field in an Arm64 `movk` instruction +void PutArm64MovkConstant(UINT32* p, unsigned con) +{ + *p = (*p & ~(0xffff << 5)) | ((con & 0xffff) << 5); +} diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h index aa5605576625c..f414e9be3623c 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h @@ -82,6 +82,11 @@ void PutArm64Rel12(UINT32* pCode, INT32 imm12); void PutThumb2Mov32(UINT16* p, UINT32 imm32); void PutThumb2BlRel24(UINT16* p, INT32 imm24); +bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon); +bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift); + +void PutArm64MovkConstant(UINT32* p, unsigned con); + template inline constexpr unsigned ArrLen(T (&)[size]) { diff --git a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp index 2d480ee776dc5..8287970d7bebb 100644 --- a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp +++ b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp @@ -298,22 +298,24 @@ struct DiffData CompileResult* cr2; // Details of the first block - size_t blocksize1; - size_t datablock1; - size_t datablockSize1; - size_t originalBlock1; - size_t originalDataBlock1; - size_t otherCodeBlock1; - size_t otherCodeBlockSize1; + unsigned char* block1; + size_t blocksize1; + unsigned char* datablock1; + size_t datablockSize1; + size_t originalBlock1; + size_t originalDataBlock1; + size_t otherCodeBlock1; + size_t otherCodeBlockSize1; // Details of the second block - size_t blocksize2; - size_t datablock2; - size_t datablockSize2; - size_t originalBlock2; - size_t originalDataBlock2; - size_t otherCodeBlock2; - size_t otherCodeBlockSize2; + unsigned char* block2; + size_t blocksize2; + unsigned char* datablock2; + size_t datablockSize2; + size_t originalBlock2; + size_t originalDataBlock2; + size_t otherCodeBlock2; + size_t otherCodeBlockSize2; }; // @@ -330,6 +332,7 @@ bool NearDiffer::compareOffsets( return true; } + const SPMI_TARGET_ARCHITECTURE targetArch = GetSpmiTargetArchitecture(); const DiffData* data = (const DiffData*)payload; size_t ip1 = data->originalBlock1 + blockOffset; size_t ip2 = data->originalBlock2 + blockOffset; @@ -435,6 +438,99 @@ bool NearDiffer::compareOffsets( if ((mapped1 == mapped2) && (mapped1 != (size_t)-1)) return true; + // There are some cases on arm64 where we generate multiple instruction register construction of addresses + // but we don't have a relocation for them (so they aren't handled by `applyRelocs`). One case is + // allocPgoInstrumentationBySchema(), which returns an address into which the JIT writes PGO probe data. + // The instruction sequence is something like this: + // mov x0, #63408 + // movk x0, #23602, lsl #16 + // movk x0, #606, lsl #32 + // + // Here, we try to match this sequence and look it up in the address map. + // + // Some version of this logic might apply to ARM as well. + // + if (targetArch == SPMI_TARGET_ARCHITECTURE_ARM64) + { + bool movk2 = false, movk3 = false; + unsigned reg1_1, reg1_2, reg2_1, reg2_2, reg3_1, reg3_2, reg4_1, reg4_2; + unsigned con1_1, con1_2, con2_1, con2_2, con3_1, con3_2, con4_1, con4_2; + unsigned shift2_1, shift2_2, shift3_1, shift3_2, shift4_1, shift4_2; + UINT32* iaddr1 = (UINT32*)(data->block1 + blockOffset); + UINT32* iaddr2 = (UINT32*)(data->block2 + blockOffset); + UINT32* iaddr1end = (UINT32*)(data->block1 + data->blocksize1); + UINT32* iaddr2end = (UINT32*)(data->block2 + data->blocksize2); + + // We're assuming that a mov/movk isn't the last instruction in the instruction buffer. + if ((iaddr1 < iaddr1end) && + (iaddr2 < iaddr2end) && + GetArm64MovConstant(iaddr1, ®1_1, &con1_1) && + GetArm64MovConstant(iaddr2, ®1_2, &con1_2) && + (reg1_1 == reg1_2)) + { + if ((iaddr1 + 1 < iaddr1end) && + (iaddr2 + 1 < iaddr2end) && + GetArm64MovkConstant(iaddr1 + 1, ®2_1, &con2_1, &shift2_1) && + GetArm64MovkConstant(iaddr2 + 1, ®2_2, &con2_2, &shift2_2) && + (reg2_1 == reg2_2) && + (shift2_1 == shift2_2) && + (shift2_1 == 16)) + { + // We currently assume the address requires at least 2 'movk' instructions, thus, is >4GB. + // The 3rd 'movk' is optional. + if ((iaddr1 + 2 < iaddr1end) && + (iaddr2 + 2 < iaddr2end) && + GetArm64MovkConstant(iaddr1 + 2, ®3_1, &con3_1, &shift3_1) && + GetArm64MovkConstant(iaddr2 + 2, ®3_2, &con3_2, &shift3_2) && + (reg3_1 == reg3_2) && + (shift3_1 == shift3_2) && + (shift3_1 == 32)) + { + movk2 = true; + // Note: this only works if size_t is 64-bit. + size_t addr1 = (size_t)con1_1 + ((size_t)con2_1 << 16) + ((size_t)con3_1 << 32); + size_t addr2 = (size_t)con1_2 + ((size_t)con2_2 << 16) + ((size_t)con3_2 << 32); + if ((iaddr1 + 3 < iaddr1end) && + (iaddr2 + 3 < iaddr2end) && + GetArm64MovkConstant(iaddr1 + 3, ®4_1, &con4_1, &shift4_1) && + GetArm64MovkConstant(iaddr2 + 3, ®4_2, &con4_2, &shift4_2) && + (reg4_1 == reg4_2) && + (shift4_1 == shift4_2) && + (shift4_1 == 48)) + { + movk3 = true; + addr1 += (size_t)con4_1 << 48; + addr2 += (size_t)con4_2 << 48; + } + + // Check the constants! We don't need to check 'addr1 == addr2' because if that were + // true we wouldn't have gotten here. + + size_t mapped1 = (size_t)data->cr1->searchAddressMap((void*)addr1); + size_t mapped2 = (size_t)data->cr2->searchAddressMap((void*)addr2); + if ((mapped1 == mapped2) && (mapped1 != (size_t)-1)) + { + // Now, zero out the constants in the `movk` instructions so when the disassembler + // gets to them, they compare equal. + PutArm64MovkConstant(iaddr1 + 1, 0); + PutArm64MovkConstant(iaddr2 + 1, 0); + if (movk2) + { + PutArm64MovkConstant(iaddr1 + 2, 0); + PutArm64MovkConstant(iaddr2 + 2, 0); + } + if (movk3) + { + PutArm64MovkConstant(iaddr1 + 3, 0); + PutArm64MovkConstant(iaddr2 + 3, 0); + } + return true; + } + } + } + } + } + return false; } @@ -513,11 +609,11 @@ bool NearDiffer::compareCodeSection(MethodContext* mc, cr2, // Details of the first block - (size_t)blocksize1, (size_t)datablock1, (size_t)datablockSize1, (size_t)originalBlock1, + block1, (size_t)blocksize1, datablock1, (size_t)datablockSize1, (size_t)originalBlock1, (size_t)originalDataBlock1, (size_t)otherCodeBlock1, (size_t)otherCodeBlockSize1, // Details of the second block - (size_t)blocksize2, (size_t)datablock2, (size_t)datablockSize2, (size_t)originalBlock2, + block2, (size_t)blocksize2, datablock2, (size_t)datablockSize2, (size_t)originalBlock2, (size_t)originalDataBlock2, (size_t)otherCodeBlock2, (size_t)otherCodeBlockSize2}; #ifdef USE_COREDISTOOLS