diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 58ef665e86d65e..27e99694aafd30 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4354,8 +4354,8 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks( SmallVector InvalidCosts; for (const auto &Plan : VPlans) { for (ElementCount VF : Plan->vectorFactors()) { - VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, - CM); + VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), + LLVMCtx, CM); auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { for (auto &R : *VPBB) { @@ -7062,7 +7062,8 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, ElementCount VF) const { InstructionCost Cost = 0; LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext(); - VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM); + VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), + LLVMCtx, CM); // Cost modeling for inductions is inaccurate in the legacy cost model // compared to the recipes that are generated. To match here initially during diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 016ad75c21ceb0..a99f3882092c2c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -736,14 +736,16 @@ class VPLiveOut : public VPUser { /// Struct to hold various analysis needed for cost computations. struct VPCostContext { const TargetTransformInfo &TTI; + const TargetLibraryInfo &TLI; VPTypeAnalysis Types; LLVMContext &LLVMCtx; LoopVectorizationCostModel &CM; SmallPtrSet SkipCostComputation; - VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy, - LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM) - : TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {} + VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, + Type *CanIVTy, LLVMContext &LLVMCtx, + LoopVectorizationCostModel &CM) + : TTI(TTI), TLI(TLI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {} /// Return the cost for \p UI with \p VF using the legacy cost model as /// fallback until computing the cost of all recipes migrates to VPlan. @@ -796,7 +798,7 @@ class VPRecipeBase : public ilist_node_with_parent, /// Return the cost of this recipe, taking into account if the cost /// computation should be skipped and the ForceTargetInstructionCost flag. /// Also takes care of printing the cost for debugging. - virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx); + InstructionCost cost(ElementCount VF, VPCostContext &Ctx); /// Insert an unlinked recipe into a basic block immediately before /// the specified recipe. @@ -860,9 +862,11 @@ class VPRecipeBase : public ilist_node_with_parent, DebugLoc getDebugLoc() const { return DL; } protected: - /// Compute the cost of this recipe using the legacy cost model and the - /// underlying instructions. - InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const; + /// Compute the cost of this recipe either using a recipe's specialized + /// implementation or using the legacy cost model and the underlying + /// instructions. + virtual InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const; }; // Helper macro to define common classof implementations for recipes. @@ -1426,6 +1430,10 @@ class VPWidenRecipe : public VPRecipeWithIRFlags { /// processing State.VF elements. void execute(VPTransformState &State) override; + /// Return the cost of this VPWidenRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + unsigned getOpcode() const { return Opcode; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index bc57ea4d52471e..c9d603612aecea 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1140,6 +1140,80 @@ void VPWidenRecipe::execute(VPTransformState &State) { #endif } +InstructionCost VPWidenRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + switch (Opcode) { + case Instruction::FNeg: { + Type *VectorTy = + ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF); + return Ctx.TTI.getArithmeticInstrCost( + Opcode, VectorTy, CostKind, + {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, + {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}); + } + + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::URem: + // More complex computation, let the legacy cost-model handle this for now. + return Ctx.getLegacyCost(cast(getUnderlyingValue()), VF); + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + VPValue *RHS = getOperand(1); + // Certain instructions can be cheaper to vectorize if they have a constant + // second vector operand. One example of this are shifts on x86. + TargetTransformInfo::OperandValueInfo RHSInfo = { + TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}; + if (RHS->isLiveIn()) + RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue()); + + if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue && + getOperand(1)->isDefinedOutsideVectorRegions()) + RHSInfo.Kind = TargetTransformInfo::OK_UniformValue; + Type *VectorTy = + ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF); + Instruction *CtxI = dyn_cast_or_null(getUnderlyingValue()); + + SmallVector Operands; + if (CtxI) + Operands.append(CtxI->value_op_begin(), CtxI->value_op_end()); + return Ctx.TTI.getArithmeticInstrCost( + Opcode, VectorTy, CostKind, + {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, + RHSInfo, Operands, CtxI, &Ctx.TLI); + } + case Instruction::Freeze: { + // This opcode is unknown. Assume that it is the same as 'mul'. + Type *VectorTy = + ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF); + return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Instruction *CtxI = dyn_cast_or_null(getUnderlyingValue()); + Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); + return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(), + CostKind, CtxI); + } + default: + llvm_unreachable("Unsupported opcode for instruction"); + } +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {