Skip to content

Commit

Permalink
Do not create ADDR(HWI/SIMD) in the inliner (#74276)
Browse files Browse the repository at this point in the history
* Simplify some inlining code

* Fix up the tail call hackaround

Integer zero works better with ASG parents (they turn into init blks).

* Add a test for the implicit byref bug

* Fix the implicit byref bug
  • Loading branch information
SingleAccretion committed Sep 7, 2022
1 parent 8d03644 commit ebd80ad
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 164 deletions.
9 changes: 0 additions & 9 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5890,15 +5890,6 @@ class Compiler
Statement* fgInlinePrependStatements(InlineInfo* inlineInfo);
void fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, Statement* stmt);

#if FEATURE_MULTIREG_RET
GenTree* fgGetStructAsStructPtr(GenTree* tree);
GenTree* fgAssignStructInlineeToVar(GenTree* child, CORINFO_CLASS_HANDLE retClsHnd);
void fgAttachStructInlineeToAsg(GenTree* tree, GenTree* child, CORINFO_CLASS_HANDLE retClsHnd);
#endif // FEATURE_MULTIREG_RET

static fgWalkPreFn fgUpdateInlineReturnExpressionPlaceHolder;
static fgWalkPostFn fgLateDevirtualization;

#ifdef DEBUG
static fgWalkPreFn fgDebugCheckInlineCandidates;

Expand Down
201 changes: 78 additions & 123 deletions src/coreclr/jit/fginline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi

// If an inline was rejected and the call returns a struct, we may
// have deferred some work when importing call for cases where the
// struct is returned in register(s).
// struct is returned in multiple registers.
//
// See the bail-out clauses in impFixupCallStructReturn for inline
// candidates.
Expand All @@ -265,21 +265,18 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi

switch (howToReturnStruct)
{

#if FEATURE_MULTIREG_RET

// Is this a type that is returned in multiple registers
// or a via a primitive type that is larger than the struct type?
// if so we need to force into into a form we accept.
// i.e. LclVar = call()
// Force multi-reg nodes into the "lcl = node()" form if necessary.
//
case Compiler::SPK_ByValue:
case Compiler::SPK_ByValueAsHfa:
{
// See assert below, we only look one level above for an asg parent.
if (parent->gtOper == GT_ASG)
if (parent->OperIs(GT_ASG))
{
// Either lhs is a call V05 = call(); or lhs is addr, and asg becomes a copyBlk.
AttachStructInlineeToAsg(parent, tree, retClsHnd);
// The inlinee can only be the RHS.
assert(parent->gtGetOp2() == tree);
AttachStructInlineeToAsg(parent->AsOp(), retClsHnd);
}
else
{
Expand Down Expand Up @@ -310,141 +307,99 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
}

#if FEATURE_MULTIREG_RET

/***************************************************************************************************
* child - The inlinee of the retExpr node.
* retClsHnd - The struct class handle of the type of the inlinee.
*
* Assign the inlinee to a tmp, if it is a call, just assign it to a lclVar, else we can
* use a copyblock to do the assignment.
*/
GenTree* AssignStructInlineeToVar(GenTree* child, CORINFO_CLASS_HANDLE retClsHnd)
//------------------------------------------------------------------------
// AttachStructInlineeToAsg: Update an "ASG(..., inlinee)" tree.
//
// Morphs inlinees that are multi-reg nodes into the (only) supported shape
// of "lcl = node()", either by marking the LHS local "lvIsMultiRegRet" or
// assigning the node into a temp and using that as the RHS.
//
// Arguments:
// asg - The assignment with the inlinee on the RHS
// retClsHnd - The struct handle for the inlinee
//
void AttachStructInlineeToAsg(GenTreeOp* asg, CORINFO_CLASS_HANDLE retClsHnd)
{
assert(child->gtOper != GT_RET_EXPR && child->gtOper != GT_MKREFANY);
assert(asg->OperIs(GT_ASG));

unsigned tmpNum = m_compiler->lvaGrabTemp(false DEBUGARG("RetBuf for struct inline return candidates."));
m_compiler->lvaSetStruct(tmpNum, retClsHnd, false);
var_types structType = m_compiler->lvaGetDesc(tmpNum)->lvType;
GenTree* dst = asg->gtGetOp1();
GenTree* inlinee = asg->gtGetOp2();

GenTree* dst = m_compiler->gtNewLclvNode(tmpNum, structType);

// If we have a call, we'd like it to be: V00 = call(), but first check if
// we have a ", , , call()" -- this is very defensive as we may never get
// an inlinee that is made of commas. If the inlinee is not a call, then
// we use a copy block to do the assignment.
GenTree* src = child;
GenTree* lastComma = nullptr;
while (src->gtOper == GT_COMMA)
// We need to force all assignments from multi-reg nodes into the "lcl = node()" form.
if (inlinee->IsMultiRegNode())
{
lastComma = src;
src = src->AsOp()->gtOp2;
}

GenTree* newInlinee = nullptr;
if (src->gtOper == GT_CALL)
{
// If inlinee was just a call, new inlinee is v05 = call()
newInlinee = m_compiler->gtNewAssignNode(dst, src);

// When returning a multi-register value in a local var, make sure the variable is
// marked as lvIsMultiRegRet, so it does not get promoted.
if (src->AsCall()->HasMultiRegRetVal())
// Special case: we already have a local, the only thing to do is mark it appropriately. Except
// if it may turn into an indirection.
if (dst->OperIs(GT_LCL_VAR) && !m_compiler->lvaIsImplicitByRefLocal(dst->AsLclVar()->GetLclNum()))
{
m_compiler->lvaGetDesc(tmpNum)->lvIsMultiRegRet = true;
m_compiler->lvaGetDesc(dst->AsLclVar())->lvIsMultiRegRet = true;
}

// If inlinee was comma, but a deeper call, new inlinee is (, , , v05 = call())
if (child->gtOper == GT_COMMA)
else
{
lastComma->AsOp()->gtOp2 = newInlinee;
newInlinee = child;
// Here, we assign our node into a fresh temp and then use that temp as the new value.
asg->gtOp2 = AssignStructInlineeToVar(inlinee, retClsHnd);
}
}
else
else if (dst->IsMultiRegLclVar())
{
// Inlinee is not a call, so just create a copy block to the tmp.
src = child;
GenTree* dstAddr = GetStructAsStructPtr(dst);
GenTree* srcAddr = GetStructAsStructPtr(src);
newInlinee = m_compiler->gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false);
// This is no longer a multi-reg assignment -- clear the flag.
dst->AsLclVar()->ClearMultiReg();
}

GenTree* production = m_compiler->gtNewLclvNode(tmpNum, structType);
return m_compiler->gtNewOperNode(GT_COMMA, structType, newInlinee, production);
}

/***************************************************************************************************
* tree - The tree pointer that has one of its child nodes as retExpr.
* child - The inlinee child.
* retClsHnd - The struct class handle of the type of the inlinee.
*
* V04 = call() assignments are okay as we codegen it. Everything else needs to be a copy block or
* would need a temp. For example, a cast(ldobj) will then be, cast(v05 = ldobj, v05); But it is
* a very rare (or impossible) scenario that we'd have a retExpr transform into a ldobj other than
* a lclVar/call. So it is not worthwhile to do pattern matching optimizations like addr(ldobj(op1))
* can just be op1.
*/
void AttachStructInlineeToAsg(GenTree* tree, GenTree* child, CORINFO_CLASS_HANDLE retClsHnd)
//------------------------------------------------------------------------
// AssignStructInlineeToVar: Assign the struct inlinee to a temp local.
//
// Arguments:
// inlinee - The inlinee of the RET_EXPR node
// retClsHnd - The struct class handle of the type of the inlinee.
//
// Return Value:
// Value representing the freshly assigned temp.
//
GenTree* AssignStructInlineeToVar(GenTree* inlinee, CORINFO_CLASS_HANDLE retClsHnd)
{
// We are okay to have:
// 1. V02 = call();
// 2. copyBlk(dstAddr, srcAddr);
assert(tree->gtOper == GT_ASG);
assert(!inlinee->OperIs(GT_MKREFANY, GT_RET_EXPR));

unsigned lclNum = m_compiler->lvaGrabTemp(false DEBUGARG("RetBuf for struct inline return candidates."));
LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum);
m_compiler->lvaSetStruct(lclNum, retClsHnd, false);

// We have an assignment, we codegen only V05 = call().
if (child->gtOper == GT_CALL && tree->AsOp()->gtOp1->gtOper == GT_LCL_VAR)
// Sink the assignment below any COMMAs: this is required for multi-reg nodes.
GenTree* src = inlinee;
GenTree* lastComma = nullptr;
while (src->OperIs(GT_COMMA))
{
// If it is a multireg return on x64/ux, the local variable should be marked as lvIsMultiRegRet
if (child->AsCall()->HasMultiRegRetVal())
{
unsigned lclNum = tree->AsOp()->gtOp1->AsLclVarCommon()->GetLclNum();
m_compiler->lvaGetDesc(lclNum)->lvIsMultiRegRet = true;
}
return;
lastComma = src;
src = src->AsOp()->gtOp2;
}

GenTree* dstAddr = GetStructAsStructPtr(tree->AsOp()->gtOp1);
GenTree* srcAddr = GetStructAsStructPtr(
(child->gtOper == GT_CALL)
? AssignStructInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call.
: child); // Just get the address, if not a call.

tree->ReplaceWith(m_compiler->gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false), m_compiler);
}
// When assigning a multi-register value to a local var, make sure the variable is marked as lvIsMultiRegRet.
if (src->IsMultiRegNode())
{
varDsc->lvIsMultiRegRet = true;
}

/*********************************************************************************
*
* tree - The node which needs to be converted to a struct pointer.
*
* Return the pointer by either __replacing__ the tree node with a suitable pointer
* type or __without replacing__ and just returning a subtree or by __modifying__
* a subtree.
*/
GenTree* GetStructAsStructPtr(GenTree* tree)
{
noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) ||
tree->OperIsSimdOrHWintrinsic() || tree->IsCnsVec());
// GT_CALL, cannot get address of call.
// GT_MKREFANY, inlining should've been aborted due to mkrefany opcode.
// GT_RET_EXPR, cannot happen after fgUpdateInlineReturnExpressionPlaceHolder
GenTree* dst = m_compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
GenTree* asg = m_compiler->gtNewBlkOpNode(dst, src, /* isVolatile */ false, /* isCopyBlock */ true);

switch (tree->OperGet())
// If inlinee was comma, new inlinee is (, , , lcl = inlinee).
if (inlinee->OperIs(GT_COMMA))
{
case GT_BLK:
case GT_OBJ:
case GT_IND:
return tree->AsOp()->gtOp1;

case GT_COMMA:
tree->AsOp()->gtOp2 = GetStructAsStructPtr(tree->AsOp()->gtOp2);
tree->gtType = TYP_BYREF;
return tree;

default:
return m_compiler->gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
lastComma->AsOp()->gtOp2 = asg;
asg = inlinee;
}
}

// Block morphing does not support (promoted) locals under commas, as such, instead of "COMMA(asg, lcl)" we
// do "OBJ(COMMA(asg, ADDR(LCL)))". TODO-1stClassStructs: improve block morphing and delete this workaround.
//
GenTree* lcl = m_compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
GenTree* addr = m_compiler->gtNewOperNode(GT_ADDR, TYP_I_IMPL, lcl);
addr = m_compiler->gtNewOperNode(GT_COMMA, addr->TypeGet(), asg, addr);
GenTree* obj = m_compiler->gtNewObjNode(varDsc->GetLayout(), addr);

return obj;
}
#endif // FEATURE_MULTIREG_RET

//------------------------------------------------------------------------
Expand Down Expand Up @@ -1453,7 +1408,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)

// Replace the call with the return expression. Note that iciCall won't be part of the IR
// but may still be referenced from a GT_RET_EXPR node. We will replace GT_RET_EXPR node
// in fgUpdateInlineReturnExpressionPlaceHolder. At that time we will also update the flags
// in UpdateInlineReturnExpressionPlaceHolder. At that time we will also update the flags
// on the basic block of GT_RET_EXPR node.
if (iciCall->gtInlineCandidateInfo->retExpr->OperGet() == GT_RET_EXPR)
{
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7151,6 +7151,15 @@ GenTree* Compiler::gtNewZeroConNode(var_types type)
zero = gtNewDconNode(0.0);
break;

#ifdef FEATURE_SIMD
case TYP_SIMD8:
case TYP_SIMD12:
case TYP_SIMD16:
case TYP_SIMD32:
zero = gtNewZeroConNode(type, CORINFO_TYPE_FLOAT);
break;
#endif // FEATURE_SIMD

default:
noway_assert(!"Bad type in gtNewZeroConNode");
zero = nullptr;
Expand Down
34 changes: 2 additions & 32 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6918,32 +6918,8 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call)
// if the root node was an `ASG`, `RET` or `CAST`.
// Return a zero con node to exit morphing of the old trees without asserts
// and forbid POST_ORDER morphing doing something wrong with our call.
var_types callType;
if (varTypeIsStruct(origCallType))
{
CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd;
Compiler::structPassingKind howToReturnStruct;
callType = getReturnTypeForStruct(retClsHnd, call->GetUnmanagedCallConv(), &howToReturnStruct);
assert((howToReturnStruct != SPK_Unknown) && (howToReturnStruct != SPK_ByReference));
if (howToReturnStruct == SPK_ByValue)
{
callType = TYP_I_IMPL;
}
else if (howToReturnStruct == SPK_ByValueAsHfa || varTypeIsSIMD(callType))
{
callType = TYP_FLOAT;
}
assert((callType != TYP_UNKNOWN) && !varTypeIsStruct(callType));
}
else
{
callType = origCallType;
}
assert((callType != TYP_UNKNOWN) && !varTypeIsStruct(callType));
callType = genActualType(callType);

GenTree* zero = gtNewZeroConNode(callType);
result = fgMorphTree(zero);
var_types zeroType = (origCallType == TYP_STRUCT) ? TYP_INT : genActualType(origCallType);
result = fgMorphTree(gtNewZeroConNode(zeroType));
}
else
{
Expand Down Expand Up @@ -8666,12 +8642,6 @@ GenTree* Compiler::fgMorphConst(GenTree* tree)
// Return value:
// GenTreeLclVar if the obj can be replaced by it, null otherwise.
//
// Notes:
// TODO-CQ: currently this transformation is done only under copy block,
// but it is beneficial to do for each OBJ node. However, `PUT_ARG_STACK`
// for some platforms does not expect struct `LCL_VAR` as a source, so
// it needs more work.
//
GenTreeLclVar* Compiler::fgMorphTryFoldObjAsLclVar(GenTreeObj* obj, bool destroyNodes)
{
if (opts.OptimizationEnabled())
Expand Down
23 changes: 23 additions & 0 deletions src/tests/JIT/Directed/arglist/vararg.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4441,6 +4441,26 @@ static bool TestEchoFourDoubleStructManagedViaAddress()
return equal;
}

// Miscellaneous tests

[MethodImpl(MethodImplOptions.NoInlining)]
static bool TestEchoFourDoubleStructViaParameterAssign()
{
FourDoubleStruct arg = new FourDoubleStruct();
arg.a = 1.0;
arg.b = 2.0;
arg.c = 3.0;
arg.d = 4.0;

FourDoubleStruct returnValue = ManagedNativeVarargTests.TestEchoFourDoubleStructViaParameterAssign(arg, __arglist());
bool equal = arg.a == returnValue.a &&
arg.b == returnValue.b &&
arg.c == returnValue.c &&
arg.d == returnValue.d;

return equal;
}

////////////////////////////////////////////////////////////////////////
// Report Failure
////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -5065,6 +5085,9 @@ static int Main(string[] args)
// Parameter address tests
success = ReportFailure(TestEchoFourDoubleStructManagedViaAddress(), "TestEchoFourDoubleStructManagedViaAddress()", success, 155);

// Miscellaneous tests
success = ReportFailure(TestEchoFourDoubleStructViaParameterAssign(), "TestEchoFourDoubleStructViaParameterAssign()", success, 156);

printf("\n", __arglist());
printf("%d Tests run. %d Passed, %d Failed.\n", __arglist(m_testCount, m_passCount, m_failCount));

Expand Down
Loading

0 comments on commit ebd80ad

Please sign in to comment.