diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4c3f2d1827c7a..806d049d0c463 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6967,6 +6967,7 @@ class Compiler bool optRedundantRelop(BasicBlock* const block); bool optRedundantBranch(BasicBlock* const block); bool optJumpThread(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop); + bool optJumpThreadCheck(BasicBlock* const block, BasicBlock* const domBlock); bool optJumpThreadCore(JumpThreadInfo& jti); bool optReachable(BasicBlock* const fromBlock, BasicBlock* const toBlock, BasicBlock* const excludedBlock); BitVecTraits* optReachableBitVecTraits; diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index 43465c31f48ae..c7f29754b2b61 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -46,30 +46,50 @@ PhaseStatus Compiler::optRedundantBranches() // if (block->bbJumpKind == BBJ_COND) { - madeChanges |= m_compiler->optRedundantRelop(block); + bool madeChangesThisBlock = m_compiler->optRedundantRelop(block); - BasicBlock* bbNext = block->bbNext; - BasicBlock* bbJump = block->bbJumpDest; + BasicBlock* const bbNext = block->bbNext; + BasicBlock* const bbJump = block->bbJumpDest; - madeChanges |= m_compiler->optRedundantBranch(block); + madeChangesThisBlock |= m_compiler->optRedundantBranch(block); - // It's possible that either bbNext or bbJump were unlinked and it's proven - // to be profitable to pay special attention to their successors. - if (madeChanges && (bbNext->countOfInEdges() == 0)) + // If we modified some flow out of block but it's still + // a BBJ_COND, retry; perhaps one of the later optimizations + // we can do has enabled one of the earlier optimizations. + // + if (madeChangesThisBlock && (block->bbJumpKind == BBJ_COND)) + { + JITDUMP("Will retry RBO in " FMT_BB " after partial optimization\n", block->bbNum); + madeChangesThisBlock |= m_compiler->optRedundantBranch(block); + } + + // It's possible that the changed flow into bbNext or bbJump may unblock + // further optimizations there. + // + // Note this misses cascading retries, consider reworking the overall + // strategy here to iterate until closure. + // + if (madeChangesThisBlock && (bbNext->countOfInEdges() == 0)) { for (BasicBlock* succ : bbNext->Succs()) { + JITDUMP("Will retry RBO in " FMT_BB "; pred " FMT_BB " now unreachable\n", succ->bbNum, + bbNext->bbNum); m_compiler->optRedundantBranch(succ); } } - if (madeChanges && (bbJump->countOfInEdges() == 0)) + if (madeChangesThisBlock && (bbJump->countOfInEdges() == 0)) { for (BasicBlock* succ : bbJump->Succs()) { + JITDUMP("Will retry RBO in " FMT_BB "; pred " FMT_BB " now unreachable\n", succ->bbNum, + bbNext->bbNum); m_compiler->optRedundantBranch(succ); } } + + madeChanges |= madeChangesThisBlock; } } }; @@ -405,6 +425,8 @@ void Compiler::optRelopImpliesRelop(RelopImplicationInfo* rii) // bool Compiler::optRedundantBranch(BasicBlock* const block) { + JITDUMP("\n--- Trying RBO in " FMT_BB " ---\n", block->bbNum); + Statement* const stmt = block->lastStmt(); if (stmt == nullptr) @@ -722,93 +744,20 @@ struct JumpThreadInfo }; //------------------------------------------------------------------------ -// optJumpThread: try and bypass the current block by rerouting -// flow from predecessors directly to successors. +// optJumpThreadCheck: see if block is suitable for jump threading. // // Arguments: -// block - block with branch to optimize -// domBlock - a dominating block that has an equivalent branch -// domIsSameRelop - if true, dominating block does the same compare; -// if false, dominating block does a reverse compare +// block - block in question +// domBlock - dom block used in inferencing (if any) // -// Returns: -// True if the branch was optimized. -// -// Notes: -// -// Conceptually this just transforms flow as follows: -// -// domBlock domBlock -// / | / | -// Ts Fs Ts Fs True/False successor -// .... .... .... .... -// Tp Fp Tp Fp True/False pred -// \ / | | -// \ / | | -// block ==> | | -// / \ | | -// / \ | | -// Tt Ft Tt Ft True/false target -// -// However we may try to re-purpose block, and so end up producing flow more like this: -// -// domBlock domBlock -// / | / | -// Ts Fs Ts Fs True/False successor -// .... .... .... .... -// Tp Fp Tp Fp True/False pred -// \ / | | -// \ / | | -// block ==> | block (repurposed) -// / \ | | -// / \ | | -// Tt Ft Tt Ft True/false target -// -bool Compiler::optJumpThread(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop) +bool Compiler::optJumpThreadCheck(BasicBlock* const block, BasicBlock* const domBlock) { - assert(block->bbJumpKind == BBJ_COND); - assert(domBlock->bbJumpKind == BBJ_COND); - if (fgCurBBEpochSize != (fgBBNumMax + 1)) { JITDUMP("Looks like we've added a new block (e.g. during optLoopHoist) since last renumber, so no threading\n"); return false; } - // If the dominating block is not the immediate dominator - // we might need to duplicate a lot of code to thread - // the jumps. See if that's the case. - // - const bool isIDom = domBlock == block->bbIDom; - if (!isIDom) - { - // Walk up the dom tree until we hit dom block. - // - // If none of the doms in the stretch are BBJ_COND, - // then we must have already optimized them, and - // so should not have to duplicate code to thread. - // - BasicBlock* idomBlock = block->bbIDom; - while ((idomBlock != nullptr) && (idomBlock != domBlock)) - { - if (idomBlock->bbJumpKind == BBJ_COND) - { - JITDUMP(" -- " FMT_BB " not closest branching dom, so no threading\n", idomBlock->bbNum); - return false; - } - JITDUMP(" -- bypassing %sdom " FMT_BB " as it was already optimized\n", - (idomBlock == block->bbIDom) ? "i" : "", idomBlock->bbNum); - idomBlock = idomBlock->bbIDom; - } - - // If we didn't bail out above, we should have reached domBlock. - // - assert(idomBlock == domBlock); - } - - JITDUMP("Both successors of %sdom " FMT_BB " reach " FMT_BB " -- attempting jump threading\n", isIDom ? "i" : "", - domBlock->bbNum, block->bbNum); - // If the block is the first block of try-region, then skip jump threading if (bbIsTryBeg(block)) { @@ -824,6 +773,9 @@ bool Compiler::optJumpThread(BasicBlock* const block, BasicBlock* const domBlock // This only blocks jump threading in a small number of cases. // Revisit once we can ensure that loop headers are not critical blocks. // + // Likewise we can't properly update the loop table if we thread the entry block. + // Not clear how much impact this has. + // for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++) { const LoopDsc& loop = optLoopTable[loopNum]; @@ -838,6 +790,12 @@ bool Compiler::optJumpThread(BasicBlock* const block, BasicBlock* const domBlock JITDUMP(FMT_BB " is the header for " FMT_LP "; no threading\n", block->bbNum, loopNum); return false; } + + if (block == loop.lpEntry) + { + JITDUMP(FMT_BB " is the entry for " FMT_LP "; no threading\n", block->bbNum, loopNum); + return false; + } } // Since flow is going to bypass block, make sure there @@ -874,7 +832,7 @@ bool Compiler::optJumpThread(BasicBlock* const block, BasicBlock* const domBlock // same EH region, as we might not be able to fully // describe control flow between them. // - if (BasicBlock::sameEHRegion(block, domBlock)) + if ((domBlock != nullptr) && BasicBlock::sameEHRegion(block, domBlock)) { // We will ignore the side effect on this tree. // @@ -888,6 +846,97 @@ bool Compiler::optJumpThread(BasicBlock* const block, BasicBlock* const domBlock } } + return true; +} + +//------------------------------------------------------------------------ +// optJumpThreadDom: try and bypass the current block by rerouting +// flow from predecessors directly to successors. +// +// Arguments: +// block - block with branch to optimize +// domBlock - a dominating block that has an equivalent branch +// domIsSameRelop - if true, dominating block does the same compare; +// if false, dominating block does a reverse compare +// +// Returns: +// True if the branch was optimized. +// +// Notes: +// +// Conceptually this just transforms flow as follows: +// +// domBlock domBlock +// / | / | +// Ts Fs Ts Fs True/False successor +// .... .... .... .... +// Tp Fp Tp Fp True/False pred +// \ / | | +// \ / | | +// block ==> | | +// / \ | | +// / \ | | +// Tt Ft Tt Ft True/false target +// +// However we may try to re-purpose block, and so end up producing flow more like this: +// +// domBlock domBlock +// / | / | +// Ts Fs Ts Fs True/False successor +// .... .... .... .... +// Tp Fp Tp Fp True/False pred +// \ / | | +// \ / | | +// block ==> | block (repurposed) +// / \ | | +// / \ | | +// Tt Ft Tt Ft True/false target +// +bool Compiler::optJumpThread(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop) +{ + assert(block->bbJumpKind == BBJ_COND); + assert(domBlock->bbJumpKind == BBJ_COND); + + // If the dominating block is not the immediate dominator + // we might need to duplicate a lot of code to thread + // the jumps. See if that's the case. + // + const bool isIDom = domBlock == block->bbIDom; + if (!isIDom) + { + // Walk up the dom tree until we hit dom block. + // + // If none of the doms in the stretch are BBJ_COND, + // then we must have already optimized them, and + // so should not have to duplicate code to thread. + // + BasicBlock* idomBlock = block->bbIDom; + while ((idomBlock != nullptr) && (idomBlock != domBlock)) + { + if (idomBlock->bbJumpKind == BBJ_COND) + { + JITDUMP(" -- " FMT_BB " not closest branching dom, so no threading\n", idomBlock->bbNum); + return false; + } + JITDUMP(" -- bypassing %sdom " FMT_BB " as it was already optimized\n", + (idomBlock == block->bbIDom) ? "i" : "", idomBlock->bbNum); + idomBlock = idomBlock->bbIDom; + } + + // If we didn't bail out above, we should have reached domBlock. + // + assert(idomBlock == domBlock); + } + + JITDUMP("Both successors of %sdom " FMT_BB " reach " FMT_BB " -- attempting jump threading\n", isIDom ? "i" : "", + domBlock->bbNum, block->bbNum); + + const bool check = optJumpThreadCheck(block, domBlock); + if (!check) + { + return false; + } + // In order to optimize we have to be able to determine which predecessors // are correlated exclusively with a true value for block's relop, and which // are correlated exclusively with a false value (aka true preds and false preds). @@ -1034,7 +1083,7 @@ bool Compiler::optJumpThread(BasicBlock* const block, BasicBlock* const domBlock // optJumpThreadCore: restructure block flow based on jump thread information // // Arguments: -// jit - information on how to jump thread this block +// jti - information on how to jump thread this block // // Returns: // True if the branch was optimized. @@ -1055,27 +1104,52 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti) return false; } + bool modifiedFlow = false; + if ((jti.m_numAmbiguousPreds > 0) && (jti.m_fallThroughPred != nullptr)) { - // Treat the fall through pred as an ambiguous pred. - JITDUMP(FMT_BB " has both ambiguous preds and a fall through pred\n", jti.m_block->bbNum); - JITDUMP("Treating fall through pred " FMT_BB " as an ambiguous pred\n", jti.m_fallThroughPred->bbNum); + // If fall through pred is BBJ_NONE, and we have only (ambiguous, true) or (ambiguous, false) preds, + // we can change the fall through to BBJ_ALWAYS. + // + const bool fallThroughIsTruePred = BlockSetOps::IsMember(this, jti.m_truePreds, jti.m_fallThroughPred->bbNum); - if (BlockSetOps::IsMember(this, jti.m_truePreds, jti.m_fallThroughPred->bbNum)) + if ((jti.m_fallThroughPred->bbJumpKind == BBJ_NONE) && ((fallThroughIsTruePred && (jti.m_numFalsePreds == 0)) || + (!fallThroughIsTruePred && (jti.m_numTruePreds == 0)))) { - BlockSetOps::RemoveElemD(this, jti.m_truePreds, jti.m_fallThroughPred->bbNum); - assert(jti.m_numTruePreds > 0); - jti.m_numTruePreds--; + JITDUMP(FMT_BB " has ambiguous preds and a (%s) fall through pred and no (%s) preds.\n" + "Converting fall through pred " FMT_BB " to BBJ_ALWAYS\n", + jti.m_block->bbNum, fallThroughIsTruePred ? "true" : "false", + fallThroughIsTruePred ? "false" : "true", jti.m_fallThroughPred->bbNum); + + // Possibly defer this until after early out below. + // + jti.m_fallThroughPred->bbJumpKind = BBJ_ALWAYS; + jti.m_fallThroughPred->bbJumpDest = jti.m_block; + modifiedFlow = true; } else { - assert(jti.m_numFalsePreds > 0); - jti.m_numFalsePreds--; + // Treat the fall through pred as an ambiguous pred. + JITDUMP(FMT_BB " has both ambiguous preds and a fall through pred\n", jti.m_block->bbNum); + JITDUMP("Treating fall through pred " FMT_BB " as an ambiguous pred\n", jti.m_fallThroughPred->bbNum); + + if (fallThroughIsTruePred) + { + BlockSetOps::RemoveElemD(this, jti.m_truePreds, jti.m_fallThroughPred->bbNum); + assert(jti.m_numTruePreds > 0); + jti.m_numTruePreds--; + } + else + { + assert(jti.m_numFalsePreds > 0); + jti.m_numFalsePreds--; + } + + assert(!(BlockSetOps::IsMember(this, jti.m_ambiguousPreds, jti.m_fallThroughPred->bbNum))); + BlockSetOps::AddElemD(this, jti.m_ambiguousPreds, jti.m_fallThroughPred->bbNum); + jti.m_numAmbiguousPreds++; } - assert(!(BlockSetOps::IsMember(this, jti.m_ambiguousPreds, jti.m_fallThroughPred->bbNum))); - BlockSetOps::AddElemD(this, jti.m_ambiguousPreds, jti.m_fallThroughPred->bbNum); - jti.m_numAmbiguousPreds++; jti.m_fallThroughPred = nullptr; } @@ -1086,6 +1160,7 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti) // This is possible, but also should be rare. // JITDUMP(FMT_BB " now only has ambiguous preds, not jump threading\n", jti.m_block->bbNum); + assert(!modifiedFlow); return false; } @@ -1162,7 +1237,6 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti) // Note we should not be altering flow for the fall-through pred. // assert(predBlock != jti.m_fallThroughPred); - assert(predBlock->bbNext != jti.m_block); if (isTruePred) { @@ -1170,9 +1244,16 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti) " implies predicate true; we can safely redirect flow to be " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, jti.m_block->bbNum, predBlock->bbNum, jti.m_trueTarget->bbNum); - fgRemoveRefPred(jti.m_block, predBlock); - fgReplaceJumpTarget(predBlock, jti.m_trueTarget, jti.m_block); - fgAddRefPred(jti.m_trueTarget, predBlock); + if (predBlock->bbJumpKind == BBJ_SWITCH) + { + fgReplaceSwitchJumpTarget(predBlock, jti.m_trueTarget, jti.m_block); + } + else + { + fgRemoveRefPred(jti.m_block, predBlock); + fgReplaceJumpTarget(predBlock, jti.m_trueTarget, jti.m_block); + fgAddRefPred(jti.m_trueTarget, predBlock); + } } else { @@ -1180,9 +1261,16 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti) " implies predicate false; we can safely redirect flow to be " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, jti.m_block->bbNum, predBlock->bbNum, jti.m_falseTarget->bbNum); - fgRemoveRefPred(jti.m_block, predBlock); - fgReplaceJumpTarget(predBlock, jti.m_falseTarget, jti.m_block); - fgAddRefPred(jti.m_falseTarget, predBlock); + if (predBlock->bbJumpKind == BBJ_SWITCH) + { + fgReplaceSwitchJumpTarget(predBlock, jti.m_falseTarget, jti.m_block); + } + else + { + fgRemoveRefPred(jti.m_block, predBlock); + fgReplaceJumpTarget(predBlock, jti.m_falseTarget, jti.m_block); + fgAddRefPred(jti.m_falseTarget, predBlock); + } } }