From f5453680166e64b94baeeecc6200903cb8362e5a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 31 Jan 2024 12:26:48 +0100 Subject: [PATCH] JIT: Establish loop invariant base case based on IR (#97182) Avoid having a cross-phase dependency on loop inversion here. Instead, validate that the condition is an actual zero-trip test. --- src/coreclr/jit/compiler.h | 4 +- src/coreclr/jit/flowgraph.cpp | 138 ++++++++++++++++-- src/coreclr/jit/gentree.h | 1 - src/coreclr/jit/loopcloning.cpp | 11 -- src/coreclr/jit/optimizer.cpp | 5 - .../JitBlue/Runtime_95315/Runtime_95315.cs | 1 + 6 files changed, 132 insertions(+), 28 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 069e5608460d4..379eaf065e266 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1958,7 +1958,7 @@ struct NaturalLoopIterInfo unsigned IterVar = BAD_VAR_NUM; #ifdef DEBUG - // Tree that initializes induction varaible outside the loop. + // Tree that initializes induction variable outside the loop. // Only valid if HasConstInit is true. GenTree* InitTree = nullptr; #endif @@ -2096,6 +2096,8 @@ class FlowGraphNaturalLoop void MatchInit(NaturalLoopIterInfo* info, BasicBlock* initBlock, GenTree* init); bool MatchLimit(unsigned iterVar, GenTree* test, NaturalLoopIterInfo* info); bool CheckLoopConditionBaseCase(BasicBlock* initBlock, NaturalLoopIterInfo* info); + bool IsZeroTripTest(BasicBlock* initBlock, NaturalLoopIterInfo* info); + bool InitBlockEntersLoopOnTrue(BasicBlock* initBlock); template static bool EvaluateRelop(T op1, T op2, genTreeOps oper); public: diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index f5ce79df20d4f..9ee019f7822ce 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -5029,7 +5029,7 @@ bool FlowGraphNaturalLoop::AnalyzeIteration(NaturalLoopIterInfo* info) if (!CheckLoopConditionBaseCase(initBlock, info)) { - JITDUMP(" Loop condition is not always true\n"); + JITDUMP(" Loop condition may not be true on the first iteration\n"); return false; } @@ -5263,11 +5263,10 @@ bool FlowGraphNaturalLoop::EvaluateRelop(T op1, T op2, genTreeOps oper) //------------------------------------------------------------------------ // CheckLoopConditionBaseCase: Verify that the loop condition is true when the -// loop is entered (or validated immediately on entry). +// loop is entered. // // Returns: -// True if we could prove that the condition is true at all interesting -// points in the loop. +// True if we could prove that the condition is true on entry. // // Remarks: // Currently handles the following cases: @@ -5306,18 +5305,137 @@ bool FlowGraphNaturalLoop::CheckLoopConditionBaseCase(BasicBlock* initBlock, Nat } // Do we have a zero-trip test? - if (initBlock->KindIs(BBJ_COND)) + if (initBlock->KindIs(BBJ_COND) && IsZeroTripTest(initBlock, info)) { - GenTree* enteringJTrue = initBlock->lastStmt()->GetRootNode(); - assert(enteringJTrue->OperIs(GT_JTRUE)); - if (enteringJTrue->gtGetOp1()->OperIsCompare() && ((enteringJTrue->gtGetOp1()->gtFlags & GTF_RELOP_ZTT) != 0)) + return true; + } + + return false; +} + +//------------------------------------------------------------------------ +// IsZeroTripTest: Check whether `initBlock`, a BBJ_COND block that enters the +// loop in one case and not in the other, implies that the loop invariant is +// true on entry. +// +// Returns: +// True if we could prove that the loop invariant is true on entry through +// "initBlock". +// +bool FlowGraphNaturalLoop::IsZeroTripTest(BasicBlock* initBlock, NaturalLoopIterInfo* info) +{ + assert(initBlock->KindIs(BBJ_COND)); + GenTree* enteringJTrue = initBlock->lastStmt()->GetRootNode(); + assert(enteringJTrue->OperIs(GT_JTRUE)); + GenTree* relop = enteringJTrue->gtGetOp1(); + if (!relop->OperIsCmpCompare()) + { + return false; + } + + // Technically optExtractInitTestIncr only handles the "false" + // entry case, and preheader creation should ensure that that's the + // only time we'll see a BBJ_COND init block. However, it does not + // hurt to let this logic be correct by construction. + bool enterOnTrue = InitBlockEntersLoopOnTrue(initBlock); + + JITDUMP(" Init block " FMT_BB " enters the loop when condition [%06u] evaluates to %s\n", initBlock->bbNum, + Compiler::dspTreeID(relop), enterOnTrue ? "true" : "false"); + + GenTree* limitCandidate; + genTreeOps oper; + + if (relop->gtGetOp1()->OperIsScalarLocal() && (relop->gtGetOp1()->AsLclVarCommon()->GetLclNum() == info->IterVar)) + { + JITDUMP(" op1 is the iteration variable\n"); + oper = relop->gtOper; + limitCandidate = relop->gtGetOp2(); + } + else if (relop->gtGetOp2()->OperIsScalarLocal() && + (relop->gtGetOp2()->AsLclVarCommon()->GetLclNum() == info->IterVar)) + { + JITDUMP(" op2 is the iteration variable\n"); + oper = GenTree::SwapRelop(relop->gtOper); + limitCandidate = relop->gtGetOp1(); + } + else + { + JITDUMP(" Relop does not involve iteration variable\n"); + return false; + } + + if (!enterOnTrue) + { + oper = GenTree::ReverseRelop(oper); + } + + // Here we want to prove that [iterVar] [oper] [limitCandidate] implies + // [iterVar] [info->IterOper()] [info->Limit()]. Currently we just do the + // simple exact checks, but this could be improved. Note that using + // `GenTree::Compare` for the limits is ok for a "same value" check for the + // limited shapes of limits we recognize. + // + if ((relop->IsUnsigned() != info->TestTree->IsUnsigned()) || (oper != info->TestOper()) || + !GenTree::Compare(limitCandidate, info->Limit())) + { + JITDUMP(" Condition guarantees V%02u %s%s [%06u], but invariant requires V%02u %s%s [%06u]\n", info->IterVar, + relop->IsUnsigned() ? "(uns) " : "", GenTree::OpName(oper), Compiler::dspTreeID(limitCandidate), + info->IterVar, info->TestTree->IsUnsigned() ? "(uns) " : "", GenTree::OpName(info->TestOper()), + Compiler::dspTreeID(info->Limit())); + return false; + } + + JITDUMP(" Condition is established before entry at [%06u]\n", Compiler::dspTreeID(enteringJTrue->gtGetOp1())); + return true; +} + +//------------------------------------------------------------------------ +// InitBlockEntersLoopOnTrue: Determine whether a BBJ_COND init block enters the +// loop in the false or true case. +// +// Parameters: +// initBlock - A BBJ_COND block that is assumed to dominate the loop, and +// only enter the loop in one of the two cases. +// +// Returns: +// True if the loop is entered if the condition evaluates to true; otherwise false. +// +// Remarks: +// Handles only limited cases (optExtractInitTestIncr ensures that we see +// only limited cases). +// +bool FlowGraphNaturalLoop::InitBlockEntersLoopOnTrue(BasicBlock* initBlock) +{ + assert(initBlock->KindIs(BBJ_COND)); + + if (initBlock->FalseTargetIs(GetHeader())) + { + return false; + } + + if (initBlock->TrueTargetIs(GetHeader())) + { + return true; + } + + // `optExtractInitTestIncr` may look at preds of preds to find an init + // block, so try a little bit harder. Today this always happens since we + // always have preheaders created in the places we call + // FlowGraphNaturalLoop::AnalyzeIteration. + for (FlowEdge* enterEdge : EntryEdges()) + { + BasicBlock* entering = enterEdge->getSourceBlock(); + if (initBlock->FalseTargetIs(entering)) + { + return false; + } + if (initBlock->TrueTargetIs(entering)) { - JITDUMP(" Condition is established before entry at [%06u]\n", - Compiler::dspTreeID(enteringJTrue->gtGetOp1())); return true; } } + assert(!"Could not find init block enter side"); return false; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 89e924220bdd0..44fbdf76837b5 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -503,7 +503,6 @@ enum GenTreeFlags : unsigned int GTF_RELOP_NAN_UN = 0x80000000, // GT_ -- Is branch taken if ops are NaN? GTF_RELOP_JMP_USED = 0x40000000, // GT_ -- result of compare used for jump or ?: - GTF_RELOP_ZTT = 0x08000000, // GT_ -- Loop test cloned for converting while-loops into do-while // with explicit "loop test" in the header block. GTF_RET_MERGED = 0x80000000, // GT_RETURN -- This is a return generated during epilog merging. diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index bb7bb5133f1b6..23848154bcff1 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1841,17 +1841,6 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c return false; } - // TODO-Quirk: Can be removed, loop invariant is validated by - // `FlowGraphNaturalLoop::AnalyzeIteration`. - if (!iterInfo->TestTree->OperIsCompare() || ((iterInfo->TestTree->gtFlags & GTF_RELOP_ZTT) == 0)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP - ". Loop inversion NOT present, loop test [%06u] may not protect " - "entry from head.\n", - loop->GetIndex(), iterInfo->TestTree->gtTreeID); - return false; - } - #ifdef DEBUG const unsigned ivLclNum = iterInfo->IterVar; GenTree* const op1 = iterInfo->Iterator(); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 6d072cf1f60c2..54ca4a5c77b80 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2156,11 +2156,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) assert(originalCompareTree->OperIsCompare()); assert(clonedCompareTree->OperIsCompare()); - // Flag compare and cloned copy so later we know this loop - // has a proper zero trip test. - originalCompareTree->gtFlags |= GTF_RELOP_ZTT; - clonedCompareTree->gtFlags |= GTF_RELOP_ZTT; - // The original test branches to remain in the loop. The // new cloned test will branch to avoid the loop. So the // cloned compare needs to reverse the branch condition. diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_95315/Runtime_95315.cs b/src/tests/JIT/Regression/JitBlue/Runtime_95315/Runtime_95315.cs index 3f15e179afb9f..ead49f1510967 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_95315/Runtime_95315.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_95315/Runtime_95315.cs @@ -11,6 +11,7 @@ public class Runtime_95315 [Fact] public static void TestEntryPoint() { + // Make sure 'Bar' tiers up completely... for (int i = 0; i < 4; i++) { for (int j = 0; j < 200; j++)