diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 1e7695baab62cc..6453b3070ceb8d 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -202,8 +202,8 @@ class DataAggregator : public DataReader { /// Return a vector of offsets corresponding to a trace in a function /// if the trace is valid, std::nullopt otherwise. std::optional, 16>> - getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, uint64_t To, - uint64_t Count = 1) const; + getFallthroughsInTrace(BinaryFunction &BF, const LBREntry &First, + const LBREntry &Second, uint64_t Count = 1) const; /// Record external entry into the function \p BF. /// @@ -268,8 +268,9 @@ class DataAggregator : public DataReader { /// Register a \p Branch. bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds); - /// Register a trace between two addresses. - bool doTrace(const uint64_t From, const uint64_t To, uint64_t Count = 1); + /// Register a trace between two LBR entries supplied in execution order. + bool doTrace(const LBREntry &First, const LBREntry &Second, + uint64_t Count = 1); /// Parser helpers /// Return false if we exhausted our parser buffer and finished parsing diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index dbd0ed07c7c1d9..72905d0ecf6a05 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -805,8 +805,11 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true); // Record returns as call->call continuation fall-through. - if (IsReturn) - return doTrace(To - 1, To, Count); + if (IsReturn) { + LBREntry First{To - 1, To - 1, false}; + LBREntry Second{To, To, false}; + return doTrace(First, Second, Count); + } BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false); if (!FromFunc && !ToFunc) @@ -821,23 +824,24 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); } -bool DataAggregator::doTrace(const uint64_t From, const uint64_t To, +bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, uint64_t Count) { - BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From); - BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To); + BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To); + BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From); if (!FromFunc || !ToFunc) { LLVM_DEBUG({ dbgs() << "Out of range trace starting in "; if (FromFunc) dbgs() << formatv("{0} @ {1:x}", *FromFunc, - From - FromFunc->getAddress()); + First.To - FromFunc->getAddress()); else - dbgs() << Twine::utohexstr(From); + dbgs() << Twine::utohexstr(First.To); dbgs() << " and ending in "; if (ToFunc) - dbgs() << formatv("{0} @ {1:x}", *ToFunc, To - ToFunc->getAddress()); + dbgs() << formatv("{0} @ {1:x}", *ToFunc, + Second.From - ToFunc->getAddress()); else - dbgs() << Twine::utohexstr(To); + dbgs() << Twine::utohexstr(Second.From); dbgs() << '\n'; }); NumLongRangeTraces += Count; @@ -847,30 +851,32 @@ bool DataAggregator::doTrace(const uint64_t From, const uint64_t To, NumInvalidTraces += Count; LLVM_DEBUG({ dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() - << formatv(" @ {0:x}", From - FromFunc->getAddress()) + << formatv(" @ {0:x}", First.To - FromFunc->getAddress()) << " and ending in " << ToFunc->getPrintName() - << formatv(" @ {0:x}\n", To - ToFunc->getAddress()); + << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress()); }); return false; } std::optional FTs = - BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), From, To) - : getFallthroughsInTrace(*FromFunc, From, To, Count); + BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To, + Second.From) + : getFallthroughsInTrace(*FromFunc, First, Second, Count); if (!FTs) { - LLVM_DEBUG(dbgs() << "Invalid trace starting in " - << FromFunc->getPrintName() << " @ " - << Twine::utohexstr(From - FromFunc->getAddress()) - << " and ending in " << ToFunc->getPrintName() << " @ " - << ToFunc->getPrintName() << " @ " - << Twine::utohexstr(To - ToFunc->getAddress()) << '\n'); + LLVM_DEBUG( + dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() + << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) + << " and ending in " << ToFunc->getPrintName() << " @ " + << ToFunc->getPrintName() << " @ " + << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); NumInvalidTraces += Count; return false; } LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " - << FromFunc->getPrintName() << ":" << Twine::utohexstr(From) - << " to " << Twine::utohexstr(To) << ".\n"); + << FromFunc->getPrintName() << ":" + << Twine::utohexstr(First.To) << " to " + << Twine::utohexstr(Second.From) << ".\n"); BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc); for (auto [From, To] : *FTs) { if (BAT) { @@ -884,8 +890,10 @@ bool DataAggregator::doTrace(const uint64_t From, const uint64_t To, } std::optional, 16>> -DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, - uint64_t To, uint64_t Count) const { +DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, + const LBREntry &FirstLBR, + const LBREntry &SecondLBR, + uint64_t Count) const { SmallVector, 16> Branches; BinaryContext &BC = BF.getBinaryContext(); @@ -896,8 +904,8 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, assert(BF.hasCFG() && "can only record traces in CFG state"); // Offsets of the trace within this function. - From = From - BF.getAddress(); - To = To - BF.getAddress(); + const uint64_t From = FirstLBR.To - BF.getAddress(); + const uint64_t To = SecondLBR.From - BF.getAddress(); if (From > To) return std::nullopt; @@ -908,6 +916,24 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, if (!FromBB || !ToBB) return std::nullopt; + // Adjust FromBB if the first LBR is a return from the last instruction in + // the previous block (that instruction should be a call). + if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && + !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { + const BinaryBasicBlock *PrevBB = + BF.getLayout().getBlock(FromBB->getIndex() - 1); + if (PrevBB->getSuccessor(FromBB->getLabel())) { + const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); + if (Instr && BC.MIB->isCall(*Instr)) + FromBB = PrevBB; + else + LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR + << '\n'); + } else { + LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); + } + } + // Fill out information for fall-through edges. The From and To could be // within the same basic block, e.g. when two call instructions are in the // same block. In this case we skip the processing. @@ -924,8 +950,8 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, // Check for bad LBRs. if (!BB->getSuccessor(NextBB->getLabel())) { LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n" - << " " << From << '\n' - << " " << To << '\n'); + << " " << FirstLBR << '\n' + << " " << SecondLBR << '\n'); return std::nullopt; } @@ -1582,11 +1608,16 @@ void DataAggregator::processBranchEvents() { NamedRegionTimer T("processBranch", "Processing branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); - for (const auto &[Loc, Info] : FallthroughLBRs) { + for (const auto &AggrLBR : FallthroughLBRs) { + const Trace &Loc = AggrLBR.first; + const FTInfo &Info = AggrLBR.second; + LBREntry First{Loc.From, Loc.From, false}; + LBREntry Second{Loc.To, Loc.To, false}; if (Info.InternCount) - doTrace(Loc.From, Loc.To, Info.InternCount); + doTrace(First, Second, Info.InternCount); if (Info.ExternCount) { - doTrace(0, Loc.To, Info.ExternCount); + First.From = 0; + doTrace(First, Second, Info.ExternCount); } } @@ -1750,16 +1781,21 @@ void DataAggregator::processPreAggregated() { TimerGroupName, TimerGroupDesc, opts::TimeAggregator); uint64_t NumTraces = 0; - for (const auto &[From, To, Count, Mispreds, Type] : AggregatedLBRs) { - bool IsExternalOrigin = Type == AggregatedLBREntry::FT_EXTERNAL_ORIGIN; - switch (Type) { + for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { + switch (AggrEntry.EntryType) { case AggregatedLBREntry::BRANCH: - doBranch(From.Offset, To.Offset, Count, Mispreds); + doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, + AggrEntry.Mispreds); break; case AggregatedLBREntry::FT: case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { - doTrace(IsExternalOrigin ? 0 : From.Offset, To.Offset, Count); - NumTraces += Count; + LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT + ? AggrEntry.From.Offset + : 0, + AggrEntry.From.Offset, false}; + LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false}; + doTrace(First, Second, AggrEntry.Count); + NumTraces += AggrEntry.Count; break; } }