Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VPlan] Compute cost for most opcodes in VPWidenRecipe (NFCI). #98764

Merged
merged 5 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7279,7 +7279,8 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
ElementCount VF) const {
InstructionCost Cost = 0;
LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext();
VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM);
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(),
LLVMCtx, CM);

// Cost modeling for inductions is inaccurate in the legacy cost model
// compared to the recipes that are generated. To match here initially during
Expand Down
15 changes: 11 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -736,14 +736,16 @@ class VPLiveOut : public VPUser {
/// Struct to hold various analysis needed for cost computations.
struct VPCostContext {
const TargetTransformInfo &TTI;
const TargetLibraryInfo &TLI;
VPTypeAnalysis Types;
LLVMContext &LLVMCtx;
LoopVectorizationCostModel &CM;
SmallPtrSet<Instruction *, 8> SkipCostComputation;

VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy,
LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM)
: TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
Type *CanIVTy, LLVMContext &LLVMCtx,
LoopVectorizationCostModel &CM)
: TTI(TTI), TLI(TLI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}

/// Return the cost for \p UI with \p VF using the legacy cost model as
/// fallback until computing the cost of all recipes migrates to VPlan.
Expand Down Expand Up @@ -862,7 +864,8 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
protected:
/// Compute the cost of this recipe using the legacy cost model and the
/// underlying instructions.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
virtual InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: the comment can be refined as the legacy cost model can be overriden.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adjusted the comment, thanks!

};

// Helper macro to define common classof implementations for recipes.
Expand Down Expand Up @@ -1423,6 +1426,10 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
/// Produce widened copies of all Ingredients.
void execute(VPTransformState &State) override;

/// Return the cost of this VPWidenRecipe.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Return" >> "Compute and return"

InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

unsigned getOpcode() const { return Opcode; }

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down
74 changes: 74 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1137,6 +1137,80 @@ void VPWidenRecipe::execute(VPTransformState &State) {
#endif
}

InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
switch (Opcode) {
case Instruction::FNeg: {
Type *VectorTy =
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
return Ctx.TTI.getArithmeticInstrCost(
Opcode, VectorTy, CostKind,
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
}

case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::URem:
// More complex computation, let the legacy cost-model handle this for now.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"// TODO: ..."

return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
VPValue *Op2 = getOperand(1);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: RHS?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed, thanks!

// Certain instructions can be cheaper to vectorize if they have a constant
// second vector operand. One example of this are shifts on x86.
TargetTransformInfo::OperandValueInfo Op2Info = {
TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None};
if (Op2->isLiveIn())
Op2Info = Ctx.TTI.getOperandInfo(Op2->getLiveInIRValue());

if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
getOperand(1)->isDefinedOutsideVectorRegions())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"getOperand(1)" >> "RHS" (or "Operand2")

Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
Type *VectorTy =
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

VPWidenRecipe must have an underlying value, which must be an Instruction?


SmallVector<const Value *, 4> Operands;
if (CtxI)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this branch always taken?

Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
return Ctx.TTI.getArithmeticInstrCost(
Opcode, VectorTy, CostKind,
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
Op2Info, Operands, CtxI, &Ctx.TLI);
}
case Instruction::Freeze: {
// This opcode is unknown. Assume that it is the same as 'mul'.
Type *VectorTy =
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
}
case Instruction::ICmp:
case Instruction::FCmp: {
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
CostKind, CtxI);
}
default:
llvm_unreachable("Unsupported opcode for instruction");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't want some kind of fall back?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think VPWidenRecipe recipes should only be created for the matched opcodes above and if there are any missed cases this should highlight them via a crash, similar to how we handle similar cases elsewhere.

}
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
Expand Down
Loading