Skip to content

Commit

Permalink
JIT: Handle TYP_DOUBLE on arm softFP similarly to TYP_LONG (#103869)
Browse files Browse the repository at this point in the history
Avoids `PUTARG_REG` being a multi-reg node on arm32 to handle this special case.

The gist of the change: we expect multi-reg structs, including `TYP_LONG` on
arm32/x86, to be put into a `FIELD_LIST(PUTARG_REG(low), PUTARG_REG(high))` form
before LSRA. For multi-reg structs this happens during morph, while for
`TYP_LONG` we keep them as primitives until lowering, so it happens during
lowering.

There was a special case for arm32 with softFP where `TYP_DOUBLE` also ends up
being a similar multi-reg struct that is passed in two integer registers. There
was a bunch of special casing to handle this in the backend, by making
`PUTARG_REG` and `BITCAST` multireg nodes on arm32 only.

This PR instead puts `TYP_DOUBLE` into the expected multireg-struct form during
lowering, allowing it to be handled like any other multireg struct.
  • Loading branch information
jakobbotsch committed Jun 25, 2024
1 parent 3bab584 commit d0c8f83
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 120 deletions.
17 changes: 0 additions & 17 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1200,23 +1200,6 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode)
{
var_types type = treeNode->GetRegType(regIndex);
regNumber argReg = treeNode->GetRegNumByIdx(regIndex);
#ifdef TARGET_ARM
if (type == TYP_LONG)
{
// We should only see long fields for DOUBLEs passed in 2 integer registers, via bitcast.
// All other LONGs should have been decomposed.
// Handle the first INT, and then handle the 2nd below.
assert(nextArgNode->OperIs(GT_BITCAST));
type = TYP_INT;
inst_Mov(type, argReg, fieldReg, /* canSkip */ true);

// Now set up the next register for the 2nd INT
argReg = REG_NEXT(argReg);
regIndex++;
assert(argReg == treeNode->GetRegNumByIdx(regIndex));
fieldReg = nextArgNode->AsMultiRegOp()->GetRegNumByIdx(1);
}
#endif // TARGET_ARM

// If child node is not already in the register we need, move it
inst_Mov(type, argReg, fieldReg, /* canSkip */ true);
Expand Down
18 changes: 1 addition & 17 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8135,23 +8135,7 @@ void CodeGen::genCodeForBitCast(GenTreeOp* treeNode)
}
else
{
#ifdef TARGET_ARM
if (compiler->opts.compUseSoftFP && (targetType == TYP_LONG))
{
// This is a special arm-softFP case when a TYP_LONG node was introduced during lowering
// for a call argument, so it was not handled by decomposelongs phase as all other TYP_LONG nodes.
// Example foo(double LclVar V01), LclVar V01 has to be passed in general registers r0, r1,
// so lowering will add `BITCAST long(LclVar double V01)` and codegen has to support it here.
const regNumber srcReg = op1->GetRegNum();
const regNumber otherReg = treeNode->AsMultiRegOp()->gtOtherReg;
assert(otherReg != REG_NA);
inst_RV_RV_RV(INS_vmov_d2i, targetReg, otherReg, srcReg, EA_8BYTE);
}
else
#endif // TARGET_ARM
{
genBitCast(targetType, targetReg, op1->TypeGet(), op1->GetRegNum());
}
genBitCast(targetType, targetReg, op1->TypeGet(), op1->GetRegNum());
}
genProduceReg(treeNode);
}
Expand Down
22 changes: 1 addition & 21 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -888,15 +888,8 @@ int GenTree::GetRegisterDstCount(Compiler* compiler) const
#if !defined(TARGET_64BIT)
else if (OperIsMultiRegOp())
{
// A MultiRegOp is a GT_MUL_LONG, GT_PUTARG_REG, or GT_BITCAST.
// For the latter two (ARM-only), they only have multiple registers if they produce a long value
// (GT_MUL_LONG always produces a long value).
#ifdef TARGET_ARM
return (TypeGet() == TYP_LONG) ? 2 : 1;
#else
assert(OperIs(GT_MUL_LONG));
return 2;
#endif
}
#endif
#ifdef FEATURE_HW_INTRINSICS
Expand Down Expand Up @@ -8968,24 +8961,11 @@ void GenTreeOp::CheckDivideByConstOptimized(Compiler* comp)
// Return Value:
// Returns the newly created PutArgReg node.
//
// Notes:
// The node is generated as GenTreeMultiRegOp on RyuJIT/armel, GenTreeOp on all the other archs.
//
GenTree* Compiler::gtNewPutArgReg(var_types type, GenTree* arg, regNumber argReg)
{
assert(arg != nullptr);

GenTree* node = nullptr;
#if defined(TARGET_ARM)
// A PUTARG_REG could be a MultiRegOp on arm since we could move a double register to two int registers.
node = new (this, GT_PUTARG_REG) GenTreeMultiRegOp(GT_PUTARG_REG, type, arg, nullptr);
if (type == TYP_LONG)
{
node->AsMultiRegOp()->gtOtherReg = REG_NEXT(argReg);
}
#else
node = gtNewOperNode(GT_PUTARG_REG, type, arg);
#endif
GenTree* node = gtNewOperNode(GT_PUTARG_REG, type, arg);
node->SetRegNum(argReg);

return node;
Expand Down
9 changes: 0 additions & 9 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1253,12 +1253,6 @@ struct GenTree
{
return true;
}
#if defined(TARGET_ARM)
if (OperIs(GT_PUTARG_REG, GT_BITCAST))
{
return true;
}
#endif // TARGET_ARM
#endif // TARGET_64BIT
return false;
}
Expand Down Expand Up @@ -1725,9 +1719,6 @@ struct GenTree
{
case GT_INTRINSIC:
case GT_LEA:
#if defined(TARGET_ARM)
case GT_PUTARG_REG:
#endif // defined(TARGET_ARM)
#if defined(TARGET_ARM64)
case GT_SELECT_NEGCC:
case GT_SELECT_INCCC:
Expand Down
8 changes: 0 additions & 8 deletions src/coreclr/jit/gtlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,7 @@ GTNODE(INTRINSIC , GenTreeIntrinsic ,0,0,GTK_BINOP|GTK_EXOP)
GTNODE(KEEPALIVE , GenTree ,0,0,GTK_UNOP|GTK_NOVALUE) // keep operand alive, generate no code, produce no result

GTNODE(CAST , GenTreeCast ,0,0,GTK_UNOP|GTK_EXOP) // conversion to another type
#if defined(TARGET_ARM)
GTNODE(BITCAST , GenTreeMultiRegOp ,0,1,GTK_UNOP) // reinterpretation of bits as another type
#else
GTNODE(BITCAST , GenTreeOp ,0,1,GTK_UNOP) // reinterpretation of bits as another type
#endif
GTNODE(CKFINITE , GenTreeOp ,0,1,GTK_UNOP|DBK_NOCONTAIN) // Check for NaN
GTNODE(LCLHEAP , GenTreeOp ,0,1,GTK_UNOP|DBK_NOCONTAIN) // alloca()

Expand Down Expand Up @@ -310,11 +306,7 @@ GTNODE(EMITNOP , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHI
GTNODE(PINVOKE_PROLOG , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // pinvoke prolog seq
GTNODE(PINVOKE_EPILOG , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // pinvoke epilog seq
GTNODE(RETURNTRAP , GenTreeOp ,0,0,GTK_UNOP|GTK_NOVALUE|DBK_NOTHIR) // a conditional call to wait on gc
#if defined(TARGET_ARM)
GTNODE(PUTARG_REG , GenTreeMultiRegOp ,0,0,GTK_UNOP|DBK_NOTHIR) // operator that places outgoing arg in register
#else
GTNODE(PUTARG_REG , GenTreeOp ,0,0,GTK_UNOP|DBK_NOTHIR) // operator that places outgoing arg in register
#endif
GTNODE(PUTARG_STK , GenTreePutArgStk ,0,0,GTK_UNOP|GTK_NOVALUE|DBK_NOTHIR) // operator that places outgoing arg in stack
#if FEATURE_ARG_SPLIT
GTNODE(PUTARG_SPLIT , GenTreePutArgSplit ,0,0,GTK_UNOP|DBK_NOTHIR) // operator that places outgoing arg in registers with stack (split struct in ARM32)
Expand Down
4 changes: 1 addition & 3 deletions src/coreclr/jit/gtstructs.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,8 @@ GTSTRUCT_N(OpCC , GT_SELECTCC, GT_SELECT_INCCC, GT_JCMP, GT_JTEST, GT_SEL
#else
GTSTRUCT_3(OpCC , GT_SELECTCC, GT_JCMP, GT_JTEST)
#endif
#if defined(TARGET_X86)
#if !defined(TARGET_64BIT)
GTSTRUCT_1(MultiRegOp , GT_MUL_LONG)
#elif defined (TARGET_ARM)
GTSTRUCT_3(MultiRegOp , GT_MUL_LONG, GT_PUTARG_REG, GT_BITCAST)
#endif
/*****************************************************************************/
#undef GTSTRUCT_0
Expand Down
39 changes: 24 additions & 15 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1789,6 +1789,28 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg, bool late)
assert(!arg->OperIsPutArg());

#if !defined(TARGET_64BIT)
if (comp->opts.compUseSoftFP && (type == TYP_DOUBLE))
{
// Unlike TYP_LONG we do no decomposition for doubles, yet we maintain
// it as a primitive type until lowering. So we need to get it into the
// right form here.

unsigned argLclNum = comp->lvaGrabTemp(false DEBUGARG("double arg on softFP"));
GenTree* store = comp->gtNewTempStore(argLclNum, arg);
GenTree* low = comp->gtNewLclFldNode(argLclNum, TYP_INT, 0);
GenTree* high = comp->gtNewLclFldNode(argLclNum, TYP_INT, 4);
GenTree* longNode = new (comp, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, low, high);
BlockRange().InsertAfter(arg, store, low, high, longNode);

*ppArg = arg = longNode;
type = TYP_LONG;

comp->lvaSetVarDoNotEnregister(argLclNum DEBUGARG(DoNotEnregisterReason::LocalField));

JITDUMP("Created new nodes for double-typed arg on softFP:\n");
DISPRANGE(LIR::ReadOnlyRange(store, longNode));
}

if (varTypeIsLong(type))
{
noway_assert(arg->OperIs(GT_LONG));
Expand Down Expand Up @@ -1826,9 +1848,8 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg, bool late)
#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
if (call->IsVarargs() || comp->opts.compUseSoftFP || callArg->AbiInfo.IsMismatchedArgType())
{
// For vararg call or on armel, reg args should be all integer.
// For arg type and arg reg mismatch, reg arg should be integer on riscv64
// Insert copies as needed to move float value to integer register.
// Insert copies as needed to move float value to integer register
// if the ABI requires it.
GenTree* newNode = LowerFloatArg(ppArg, callArg);
if (newNode != nullptr)
{
Expand Down Expand Up @@ -1942,18 +1963,6 @@ GenTree* Lowering::LowerFloatArgReg(GenTree* arg, regNumber regNum)
GenTree* intArg = comp->gtNewBitCastNode(intType, arg);
intArg->SetRegNum(regNum);

#ifdef TARGET_ARM
if (floatType == TYP_DOUBLE)
{
// A special case when we introduce TYP_LONG
// during lowering for arm32 softFP to pass double
// in int registers.
assert(comp->opts.compUseSoftFP);

regNumber nextReg = REG_NEXT(regNum);
intArg->AsMultiRegOp()->gtOtherReg = nextReg;
}
#endif
return intArg;
}
#endif
Expand Down
10 changes: 0 additions & 10 deletions src/coreclr/jit/lsraarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,6 @@ int LinearScan::BuildNode(GenTree* tree)

case GT_PUTARG_REG:
srcCount = BuildPutArgReg(tree->AsUnOp());
dstCount = tree->AsMultiRegOp()->GetRegCount();
break;

case GT_BITCAST:
Expand All @@ -673,15 +672,6 @@ int LinearScan::BuildNode(GenTree* tree)
argMask = genSingleTypeRegMask(argReg);
}

// If type of node is `long` then it is actually `double`.
// The actual `long` types must have been transformed as a field list with two fields.
if (tree->TypeGet() == TYP_LONG)
{
dstCount++;
assert(genRegArgNext(argReg) == REG_NEXT(argReg));
argMask |= genSingleTypeRegMask(REG_NEXT(argReg));
dstCount = 2;
}
if (!tree->gtGetOp1()->isContained())
{
BuildUse(tree->gtGetOp1());
Expand Down
24 changes: 4 additions & 20 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4430,27 +4430,11 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node)
numPlacedArgLocals++;
}

#ifdef TARGET_ARM
// If type of node is `long` then it is actually `double`.
// The actual `long` types must have been transformed as a field list with two fields.
if (node->TypeGet() == TYP_LONG)
{
srcCount++;
SingleTypeRegSet argMaskHi = genSingleTypeRegMask(REG_NEXT(argReg));
assert(genRegArgNext(argReg) == REG_NEXT(argReg));
use = BuildUse(op1, argMaskHi, 1);
BuildDef(node, argMask, 0);
BuildDef(node, argMaskHi, 1);
}
else
#endif // TARGET_ARM
RefPosition* def = BuildDef(node, argMask);
if (isSpecialPutArg)
{
RefPosition* def = BuildDef(node, argMask);
if (isSpecialPutArg)
{
def->getInterval()->isSpecialPutArg = true;
def->getInterval()->assignRelatedInterval(use->getInterval());
}
def->getInterval()->isSpecialPutArg = true;
def->getInterval()->assignRelatedInterval(use->getInterval());
}

return srcCount;
Expand Down

0 comments on commit d0c8f83

Please sign in to comment.