Skip to content

Commit

Permalink
Support Arm64 "constructed" constants in SuperPMI asm diffs
Browse files Browse the repository at this point in the history
SuperPMI asm diffs tries to ignore constants that can change between
multiple replays, such as addresses that the replay engine must generate
and not simply hand back from the collected data.

Often, addresses have associated relocations generated during replay.
SuperPMI can use these relocations to adjust the constants to allow
two replays to match. However, there are cases on Arm64 where an address
both doesn't report a relocation and is "constructed" using multiple
`mov`/`movk` instructions.

One case is the `allocPgoInstrumentationBySchema()`
API which returns a pointer to a PGO data buffer. An address within this
buffer is constructed via a sequence such as:
```
mov     x0, dotnet#63408
movk    x0, dotnet#23602, lsl dotnet#16
movk    x0, dotnet#606, lsl dotnet#32
```

When SuperPMI replays this API, it constructs a new buffer and returns that
pointer, which is used to construct various actual addresses that are
generated as "constructed" constants, shown above.

This change "de-constructs" the constants and looks them up in the replay
address map. If base and diff match the mapped constants, there is no asm diff.
  • Loading branch information
BruceForstall committed Oct 4, 2022
1 parent fb5f07f commit 3902454
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 16 deletions.
43 changes: 43 additions & 0 deletions src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,3 +335,46 @@ void PutThumb2BlRel24(UINT16* p, INT32 imm24)
p[0] = Opcode0;
p[1] = Opcode1;
}

// GetArm64MovConstant / GetArm64MovkConstant: Decode arm64 mov / movk instructions, e.g.:
// d29ff600 mov x0, #65456
// f2ab8640 movk x0, #23602, lsl #16
// f2c04bc0 movk x0, #606, lsl #32
//
// This is used in the NearDiffer to determine if a sequence of mov/movk is actually an address.
//
// Return `true` if the instruction pointed to by `p` is a mov/movk, `false` otherwise.
// If true, fill out the target register in `*pReg`, constant in `*pCon`, and (for movk) shift value in `*pShift`.

bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon)
{
UINT32 instr = *p;
if ((instr & 0xffe00000) == 0xd2800000)
{
*pReg = instr & 0x1f;
*pCon = (instr >> 5) & 0xffff;
return true;
}

return false;
}

bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift)
{
UINT32 instr = *p;
if ((instr & 0xff800000) == 0xf2800000)
{
*pReg = instr & 0x1f;
*pCon = (instr >> 5) & 0xffff;
*pShift = ((instr >> 21) & 0x3) * 16;
return true;
}

return false;
}

// PutArm64MovkConstant: set the constant field in an Arm64 `movk` instruction
void PutArm64MovkConstant(UINT32* p, unsigned con)
{
*p = (*p & ~(0xffff << 5)) | ((con & 0xffff) << 5);
}
5 changes: 5 additions & 0 deletions src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ void PutArm64Rel12(UINT32* pCode, INT32 imm12);
void PutThumb2Mov32(UINT16* p, UINT32 imm32);
void PutThumb2BlRel24(UINT16* p, INT32 imm24);

bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon);
bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift);

void PutArm64MovkConstant(UINT32* p, unsigned con);

template <typename T, int size>
inline constexpr unsigned ArrLen(T (&)[size])
{
Expand Down
128 changes: 112 additions & 16 deletions src/coreclr/tools/superpmi/superpmi/neardiffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,22 +298,24 @@ struct DiffData
CompileResult* cr2;

// Details of the first block
size_t blocksize1;
size_t datablock1;
size_t datablockSize1;
size_t originalBlock1;
size_t originalDataBlock1;
size_t otherCodeBlock1;
size_t otherCodeBlockSize1;
unsigned char* block1;
size_t blocksize1;
unsigned char* datablock1;
size_t datablockSize1;
size_t originalBlock1;
size_t originalDataBlock1;
size_t otherCodeBlock1;
size_t otherCodeBlockSize1;

// Details of the second block
size_t blocksize2;
size_t datablock2;
size_t datablockSize2;
size_t originalBlock2;
size_t originalDataBlock2;
size_t otherCodeBlock2;
size_t otherCodeBlockSize2;
unsigned char* block2;
size_t blocksize2;
unsigned char* datablock2;
size_t datablockSize2;
size_t originalBlock2;
size_t originalDataBlock2;
size_t otherCodeBlock2;
size_t otherCodeBlockSize2;
};

//
Expand All @@ -330,6 +332,7 @@ bool NearDiffer::compareOffsets(
return true;
}

const SPMI_TARGET_ARCHITECTURE targetArch = GetSpmiTargetArchitecture();
const DiffData* data = (const DiffData*)payload;
size_t ip1 = data->originalBlock1 + blockOffset;
size_t ip2 = data->originalBlock2 + blockOffset;
Expand Down Expand Up @@ -435,6 +438,99 @@ bool NearDiffer::compareOffsets(
if ((mapped1 == mapped2) && (mapped1 != (size_t)-1))
return true;

// There are some cases on arm64 where we generate multiple instruction register construction of addresses
// but we don't have a relocation for them (so they aren't handled by `applyRelocs`). One case is
// allocPgoInstrumentationBySchema(), which returns an address into which the JIT writes PGO probe data.
// The instruction sequence is something like this:
// mov x0, #63408
// movk x0, #23602, lsl #16
// movk x0, #606, lsl #32
//
// Here, we try to match this sequence and look it up in the address map.
//
// Some version of this logic might apply to ARM as well.
//
if (targetArch == SPMI_TARGET_ARCHITECTURE_ARM64)
{
bool movk2 = false, movk3 = false;
unsigned reg1_1, reg1_2, reg2_1, reg2_2, reg3_1, reg3_2, reg4_1, reg4_2;
unsigned con1_1, con1_2, con2_1, con2_2, con3_1, con3_2, con4_1, con4_2;
unsigned shift2_1, shift2_2, shift3_1, shift3_2, shift4_1, shift4_2;
UINT32* iaddr1 = (UINT32*)(data->block1 + blockOffset);
UINT32* iaddr2 = (UINT32*)(data->block2 + blockOffset);
UINT32* iaddr1end = (UINT32*)(data->block1 + data->blocksize1);
UINT32* iaddr2end = (UINT32*)(data->block2 + data->blocksize2);

// We're assuming that a mov/movk isn't the last instruction in the instruction buffer.
if ((iaddr1 < iaddr1end) &&
(iaddr2 < iaddr2end) &&
GetArm64MovConstant(iaddr1, &reg1_1, &con1_1) &&
GetArm64MovConstant(iaddr2, &reg1_2, &con1_2) &&
(reg1_1 == reg1_2))
{
if ((iaddr1 + 1 < iaddr1end) &&
(iaddr2 + 1 < iaddr2end) &&
GetArm64MovkConstant(iaddr1 + 1, &reg2_1, &con2_1, &shift2_1) &&
GetArm64MovkConstant(iaddr2 + 1, &reg2_2, &con2_2, &shift2_2) &&
(reg2_1 == reg2_2) &&
(shift2_1 == shift2_2) &&
(shift2_1 == 16))
{
// We currently assume the address requires at least 2 'movk' instructions, thus, is >4GB.
// The 3rd 'movk' is optional.
if ((iaddr1 + 2 < iaddr1end) &&
(iaddr2 + 2 < iaddr2end) &&
GetArm64MovkConstant(iaddr1 + 2, &reg3_1, &con3_1, &shift3_1) &&
GetArm64MovkConstant(iaddr2 + 2, &reg3_2, &con3_2, &shift3_2) &&
(reg3_1 == reg3_2) &&
(shift3_1 == shift3_2) &&
(shift3_1 == 32))
{
movk2 = true;
// Note: this only works if size_t is 64-bit.
size_t addr1 = (size_t)con1_1 + ((size_t)con2_1 << 16) + ((size_t)con3_1 << 32);
size_t addr2 = (size_t)con1_2 + ((size_t)con2_2 << 16) + ((size_t)con3_2 << 32);
if ((iaddr1 + 3 < iaddr1end) &&
(iaddr2 + 3 < iaddr2end) &&
GetArm64MovkConstant(iaddr1 + 3, &reg4_1, &con4_1, &shift4_1) &&
GetArm64MovkConstant(iaddr2 + 3, &reg4_2, &con4_2, &shift4_2) &&
(reg4_1 == reg4_2) &&
(shift4_1 == shift4_2) &&
(shift4_1 == 48))
{
movk3 = true;
addr1 += (size_t)con4_1 << 48;
addr2 += (size_t)con4_2 << 48;
}

// Check the constants! We don't need to check 'addr1 == addr2' because if that were
// true we wouldn't have gotten here.

size_t mapped1 = (size_t)data->cr1->searchAddressMap((void*)addr1);
size_t mapped2 = (size_t)data->cr2->searchAddressMap((void*)addr2);
if ((mapped1 == mapped2) && (mapped1 != (size_t)-1))
{
// Now, zero out the constants in the `movk` instructions so when the disassembler
// gets to them, they compare equal.
PutArm64MovkConstant(iaddr1 + 1, 0);
PutArm64MovkConstant(iaddr2 + 1, 0);
if (movk2)
{
PutArm64MovkConstant(iaddr1 + 2, 0);
PutArm64MovkConstant(iaddr2 + 2, 0);
}
if (movk3)
{
PutArm64MovkConstant(iaddr1 + 3, 0);
PutArm64MovkConstant(iaddr2 + 3, 0);
}
return true;
}
}
}
}
}

return false;
}

Expand Down Expand Up @@ -513,11 +609,11 @@ bool NearDiffer::compareCodeSection(MethodContext* mc,
cr2,

// Details of the first block
(size_t)blocksize1, (size_t)datablock1, (size_t)datablockSize1, (size_t)originalBlock1,
block1, (size_t)blocksize1, datablock1, (size_t)datablockSize1, (size_t)originalBlock1,
(size_t)originalDataBlock1, (size_t)otherCodeBlock1, (size_t)otherCodeBlockSize1,

// Details of the second block
(size_t)blocksize2, (size_t)datablock2, (size_t)datablockSize2, (size_t)originalBlock2,
block2, (size_t)blocksize2, datablock2, (size_t)datablockSize2, (size_t)originalBlock2,
(size_t)originalDataBlock2, (size_t)otherCodeBlock2, (size_t)otherCodeBlockSize2};

#ifdef USE_COREDISTOOLS
Expand Down

0 comments on commit 3902454

Please sign in to comment.