Skip to content

Commit

Permalink
[VPlan] Compute cost for most opcodes in VPWidenRecipe (NFCI).
Browse files Browse the repository at this point in the history
  • Loading branch information
fhahn committed Jul 14, 2024
1 parent 4120907 commit 5527585
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 5 deletions.
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7279,7 +7279,8 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
ElementCount VF) const {
InstructionCost Cost = 0;
LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext();
VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM);
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(),
LLVMCtx, CM);

// Cost modeling for inductions is inaccurate in the legacy cost model
// compared to the recipes that are generated. To match here initially during
Expand Down
15 changes: 11 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -736,14 +736,16 @@ class VPLiveOut : public VPUser {
/// Struct to hold various analysis needed for cost computations.
struct VPCostContext {
const TargetTransformInfo &TTI;
const TargetLibraryInfo &TLI;
VPTypeAnalysis Types;
LLVMContext &LLVMCtx;
LoopVectorizationCostModel &CM;
SmallPtrSet<Instruction *, 8> SkipCostComputation;

VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy,
LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM)
: TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
Type *CanIVTy, LLVMContext &LLVMCtx,
LoopVectorizationCostModel &CM)
: TTI(TTI), TLI(TLI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}

/// Return the cost for \p UI with \p VF using the legacy cost model as
/// fallback until computing the cost of all recipes migrates to VPlan.
Expand Down Expand Up @@ -862,7 +864,8 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
protected:
/// Compute the cost of this recipe using the legacy cost model and the
/// underlying instructions.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
virtual InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const;
};

// Helper macro to define common classof implementations for recipes.
Expand Down Expand Up @@ -1423,6 +1426,10 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
/// Produce widened copies of all Ingredients.
void execute(VPTransformState &State) override;

/// Return the cost of this VPWidenRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

unsigned getOpcode() const { return Opcode; }

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down
74 changes: 74 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1137,6 +1137,80 @@ void VPWidenRecipe::execute(VPTransformState &State) {
#endif
}

InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
switch (Opcode) {
case Instruction::FNeg: {
Type *VectorTy =
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
return Ctx.TTI.getArithmeticInstrCost(
Opcode, VectorTy, CostKind,
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
}

case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::URem:
// More complex computation, let the legacy cost-model handle this for now.
return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
VPValue *Op2 = getOperand(1);
// Certain instructions can be cheaper to vectorize if they have a constant
// second vector operand. One example of this are shifts on x86.
TargetTransformInfo::OperandValueInfo Op2Info = {
TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None};
if (Op2->isLiveIn())
Op2Info = Ctx.TTI.getOperandInfo(Op2->getLiveInIRValue());

if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
getOperand(1)->isDefinedOutsideVectorRegions())
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
Type *VectorTy =
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());

SmallVector<const Value *, 4> Operands;
if (CtxI)
Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
return Ctx.TTI.getArithmeticInstrCost(
Opcode, VectorTy, CostKind,
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
Op2Info, Operands, CtxI, &Ctx.TLI);
}
case Instruction::Freeze: {
// This opcode is unknown. Assume that it is the same as 'mul'.
Type *VectorTy =
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
}
case Instruction::ICmp:
case Instruction::FCmp: {
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
CostKind, CtxI);
}
default:
llvm_unreachable("Unsupported opcode for instruction");
}
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
Expand Down

0 comments on commit 5527585

Please sign in to comment.