Skip to content

Commit

Permalink
[RFC][LV] VPlan-based cost model
Browse files Browse the repository at this point in the history
This patch follows D89322 to add an initial skeleton of vplan-based cost model.

This difference is that instead of incorporating a cost() interface to VPRecipes,
all cost implementations are put together in VPlanCostModel.

This allows VPlanCostModel to concentrate on assigning costs to vplan,
thus seprating the cost model code from the vplan IR, similar to LLVM IR cost
modeling.

During the transition, it will still use the legacy model to obtain cost until
all cost calculation for recipes are implemented.

Please let me know if you agree with the main idea of this patch.
If there is a general consensus, I'll proceed to implement the cost for the
other recipes for review.

Differential Revision: https://reviews.llvm.org/D158716

- Address comments
- Move VPCM object outside of the loop
- Add getElementType() and getReturnElementType()
  • Loading branch information
arcbbb committed Sep 28, 2023
1 parent a0c0d43 commit edc764b
Show file tree
Hide file tree
Showing 5 changed files with 390 additions and 1 deletion.
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_llvm_component_library(LLVMVectorize
Vectorize.cpp
VectorCombine.cpp
VPlan.cpp
VPlanCostModel.cpp
VPlanHCFGBuilder.cpp
VPlanRecipes.cpp
VPlanSLP.cpp
Expand Down
30 changes: 29 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#include "LoopVectorizationPlanner.h"
#include "VPRecipeBuilder.h"
#include "VPlan.h"
#include "VPlanCostModel.h"
#include "VPlanHCFGBuilder.h"
#include "VPlanTransforms.h"
#include "llvm/ADT/APInt.h"
Expand Down Expand Up @@ -363,6 +364,11 @@ cl::opt<bool> EnableVPlanNativePath(
"support for outer loop vectorization."));
}

cl::opt<bool> CostUsingVPlan("vplan-use-vplan-cost-model", cl::init(false),
cl::Hidden,
cl::desc("Enable VPlan based costing path. To "
"become the default in the future."));

// This flag enables the stress testing of the VPlan H-CFG construction in the
// VPlan-native vectorization path. It must be used in conjuction with
// -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
Expand Down Expand Up @@ -1171,6 +1177,8 @@ using VectorizationCostTy = std::pair<InstructionCost, bool>;
/// TargetTransformInfo to query the different backends for the cost of
/// different operations.
class LoopVectorizationCostModel {
friend class VPlanCostModel;

public:
LoopVectorizationCostModel(ScalarEpilogueLowering SEL, Loop *L,
PredicatedScalarEvolution &PSE, LoopInfo *LI,
Expand Down Expand Up @@ -8648,6 +8656,20 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
}

Type *VPlanCostModel::truncateToMinimalBitwidth(Type *ValTy,
Instruction *I) const {
auto MinBWs = CM.getMinimalBitwidths();
if (MinBWs.contains(I))
ValTy = IntegerType::get(ValTy->getContext(), MinBWs[I]);
return ValTy;
}

InstructionCost VPlanCostModel::getLegacyInstructionCost(Instruction *I,
ElementCount VF) {
VectorizationCostTy Cost = CM.getInstructionCost(I, VF);
return Cost.first;
}

void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
ElementCount MaxVF) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
Expand Down Expand Up @@ -8677,10 +8699,16 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
VF = SubRange.End;
}

VPlanCostModel VPCM(*TTI, PSE.getSE()->getContext(), CM);
for (const VPlanPtr &Plan : VPlans) {
SmallVector<VectorizationFactor> Costs;
for (ElementCount CostVF : Plan->getVFs()) {
auto [VecCost, IsVec] = CM.expectedCost(CostVF, &InvalidCosts);
VectorizationCostTy C;
if (CostUsingVPlan) {
C.first = VPCM.expectedCost(*Plan, CostVF, C.second);
} else
C = CM.expectedCost(CostVF, &InvalidCosts);
auto [VecCost, IsVec] = C;
#ifndef NDEBUG
unsigned AssumedMinimumVscale = 1;
if (std::optional<unsigned> VScale = getVScaleForTuning())
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,11 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
}

bool hasUnderlyingInstr() const {
return getNumDefinedValues() == 1 &&
getVPSingleValue()->getUnderlyingValue() != nullptr;
}

/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPDef *D) {
// All VPDefs are also VPRecipeBases.
Expand Down
284 changes: 284 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanCostModel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
//===- VPlanCostModel.h - VPlan-based Vectorizer Cost Model ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// VPlan-based cost model
///
//===----------------------------------------------------------------------===//

#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/Debug.h"

#include "VPlan.h"
#include "VPlanCFG.h"
#include "VPlanCostModel.h"
#include "VPlanValue.h"

using namespace llvm;

#define DEBUG_TYPE "vplan-cost-model"

namespace llvm {
InstructionCost VPlanCostModel::expectedCost(const VPlan &Plan, ElementCount VF,
bool &IsVec) {
InstructionCost VectorIterCost = 0;
for (const VPBlockBase *Block : vp_depth_first_deep(Plan.getEntry()))
VectorIterCost += getCost(Block, VF, IsVec);

return VectorIterCost;
}

InstructionCost VPlanCostModel::getCost(const VPBlockBase *Block,
ElementCount VF, bool &IsVec) {
return TypeSwitch<const VPBlockBase *, InstructionCost>(Block)
.Case<VPBasicBlock>([&](const VPBasicBlock *BBlock) {
InstructionCost Cost = 0;
for (const VPRecipeBase &Recipe : *BBlock)
Cost += getCost(&Recipe, VF, IsVec);
return Cost;
})
.Default([&](const VPBlockBase *BBlock) -> InstructionCost { return 0; });
}

InstructionCost VPlanCostModel::getCost(const VPRecipeBase *Recipe,
ElementCount VF, bool &IsVec) {
auto *ScCondTy = Type::getInt1Ty(Context);
auto *VecCondTy = VectorType::get(ScCondTy, VF);
InstructionCost Cost =
TypeSwitch<const VPRecipeBase *, InstructionCost>(Recipe)
.Case<VPInstruction>([&](const VPInstruction *VPI)
-> InstructionCost {
unsigned Opcode = VPI->getOpcode();
if (Instruction::isBinaryOp(Opcode)) {
// Operands: A, B
IsVec |= true;
Type *VectorTy = VectorType::get(getReturnElementType(VPI), VF);
return TTI.getArithmeticInstrCost(Opcode, VectorTy, CostKind);
}
switch (Opcode) {
case VPInstruction::Not: {
// Operands: A
IsVec |= true;
Type *VectorTy = VectorType::get(getElementType(VPI, 0), VF);
return TTI.getArithmeticInstrCost(Instruction::Xor, VectorTy,
CostKind);
}
case VPInstruction::ICmpULE: {
// Operands: IV, TripCount
IsVec |= true;
Type *VectorTy = VectorType::get(getElementType(VPI, 0), VF);
return TTI.getCmpSelInstrCost(Instruction::ICmp, VectorTy,
VecCondTy, CmpInst::ICMP_ULE,
CostKind);
}
case Instruction::Select: {
// Operands: Cond, Op1, Op2
IsVec |= true;
Type *VectorTy = VectorType::get(getReturnElementType(VPI), VF);
return TTI.getCmpSelInstrCost(
Instruction::Select, VectorTy, VecCondTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
}
case VPInstruction::ActiveLaneMask: {
// Operands: IV, TripCount
IsVec |= true;
Type *OpTy = Type::getIntNTy(
Context, getElementType(VPI, 0)->getScalarSizeInBits());
IntrinsicCostAttributes ICA(Intrinsic::get_active_lane_mask,
VecCondTy, {OpTy, OpTy});
return TTI.getIntrinsicInstrCost(ICA, CostKind);
}
case VPInstruction::FirstOrderRecurrenceSplice: {
// Operands: FOR, FOR.backedge
IsVec |= true;
Type *VectorTy = VectorType::get(getReturnElementType(VPI), VF);
SmallVector<int> Mask(VF.getKnownMinValue());
std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
return TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
cast<VectorType>(VectorTy), Mask,
CostKind, VF.getKnownMinValue() - 1);
}
case VPInstruction::CalculateTripCountMinusVF: {
// Operands: TripCount
Type *ScalarTy = getReturnElementType(VPI);
return TTI.getArithmeticInstrCost(Instruction::Sub, ScalarTy,
CostKind) +
TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
ScCondTy, CmpInst::ICMP_UGT,
CostKind) +
TTI.getCmpSelInstrCost(
Instruction::Select, ScalarTy, ScCondTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
}
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementNUW:
// Operands: IVPhi, CanonicalIVIncrement
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::CanonicalIVIncrementForPartNUW: {
// Operands: StartV
Type *ScalarTy = getReturnElementType(VPI);
return TTI.getArithmeticInstrCost(Instruction::Add, ScalarTy,
CostKind);
}
case VPInstruction::BranchOnCond:
// Operands: Cond
case VPInstruction::BranchOnCount: {
// Operands: IV, TripCount
Type *ScalarTy = getElementType(VPI, 0);
return TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
ScCondTy, CmpInst::ICMP_EQ,
CostKind) +
TTI.getCFInstrCost(Instruction::Br, CostKind);
}
default:
llvm_unreachable("Unsupported opcode for VPInstruction");
} // end of switch
})
.Case<VPWidenMemoryInstructionRecipe>(
[&](const VPWidenMemoryInstructionRecipe *VPWMIR) {
IsVec |= true;
return getMemoryOpCost(VPWMIR, VF);
})
.Default([&](const VPRecipeBase *R) -> InstructionCost {
if (!R->hasUnderlyingInstr()) {
LLVM_DEBUG(
dbgs() << "VPlanCM: unsupported recipe ";
VPSlotTracker SlotTracker((Recipe->getParent())
? Recipe->getParent()->getPlan()
: nullptr);
Recipe->print(dbgs(), Twine(), SlotTracker); dbgs() << '\n');
return 0;
}
Instruction *I = const_cast<Instruction *>(R->getUnderlyingInstr());
return getLegacyInstructionCost(I, VF);
});

LLVM_DEBUG(dbgs() << "VPlanCM: cost " << Cost << " for VF " << VF
<< " for VPInstruction: ";
VPSlotTracker SlotTracker((Recipe->getParent())
? Recipe->getParent()->getPlan()
: nullptr);
Recipe->print(dbgs(), Twine(), SlotTracker); dbgs() << '\n');
return Cost;
}

InstructionCost VPlanCostModel::getMemoryOpCost(const Instruction *I, Type *Ty,
bool IsConsecutive,
bool IsMasked, bool IsReverse) {
const Align Alignment = getLoadStoreAlignment(const_cast<Instruction *>(I));
const Value *Ptr = getLoadStorePointerOperand(I);
unsigned AS = getLoadStoreAddressSpace(const_cast<Instruction *>(I));
if (IsConsecutive) {
InstructionCost Cost = 0;
if (IsMasked) {
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), Ty, Alignment, AS,
CostKind);
} else {
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
Cost += TTI.getMemoryOpCost(I->getOpcode(), Ty, Alignment, AS, CostKind,
OpInfo, I);
}
if (IsReverse)
Cost +=
TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
cast<VectorType>(Ty), std::nullopt, CostKind, 0);
return Cost;
}
return TTI.getAddressComputationCost(Ty) +
TTI.getGatherScatterOpCost(I->getOpcode(), Ty, Ptr, IsMasked,
Alignment, CostKind, I);
}

InstructionCost
VPlanCostModel::getMemoryOpCost(const VPWidenMemoryInstructionRecipe *VPWMIR,
ElementCount VF) {
Instruction *I = &VPWMIR->getIngredient();
const bool IsMasked = VPWMIR->getMask() != nullptr;
Type *VectorTy = VectorType::get(getReturnElementType(VPWMIR), VF);

return getMemoryOpCost(I, VectorTy, VPWMIR->isConsecutive(), IsMasked,
VPWMIR->isReverse());
}

// Return element type the recipe processes since VF is not carried in VPlan
Type *VPlanCostModel::getElementType(const VPRecipeBase *Recipe,
unsigned N) const {
auto TruncatedType = [&](Value *V) -> Type * {
Type *ValTy = V->getType();
;
if (llvm::Instruction *Inst = llvm::dyn_cast<llvm::Instruction>(V))
ValTy = truncateToMinimalBitwidth(V->getType(), Inst);
return ValTy;
};
Value *V = Recipe->getOperand(N)->getUnderlyingValue();
if (V)
return TruncatedType(V);
assert(Recipe->getOperand(N)->hasDefiningRecipe() &&
"VPValue has no live-in and defining recipe");
return getReturnElementType(Recipe->getOperand(N)->getDefiningRecipe());
}

Type *VPlanCostModel::getReturnElementType(const VPRecipeBase *Recipe) const {
auto *Int1Ty = Type::getInt1Ty(Context);
Type *ValTy =
TypeSwitch<const VPRecipeBase *, Type *>(Recipe)
.Case<VPInstruction>([&](const VPInstruction *VPI) -> Type * {
unsigned Opcode = VPI->getOpcode();
if (Instruction::isBinaryOp(Opcode))
// Operands: A, B
return getElementType(VPI, 0);
switch (Opcode) {
case VPInstruction::Not:
// Operands: A
case VPInstruction::ICmpULE:
// Operands: IV, TripCount
return Int1Ty;
case Instruction::Select:
// Operands: Cond, Op1, Op2
return getElementType(VPI, 1);
case VPInstruction::ActiveLaneMask:
// Operands: IV, TripCount
return Int1Ty;
case VPInstruction::FirstOrderRecurrenceSplice:
// Operands: FOR, FOR.backedge
case VPInstruction::CalculateTripCountMinusVF:
// Operands: TripCount
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementNUW:
// Operands: IVPhi, CanonicalIVIncrement
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::CanonicalIVIncrementForPartNUW:
// Operands: StartV
return getElementType(VPI, 0);
case VPInstruction::BranchOnCond:
// Operands: Cond
case VPInstruction::BranchOnCount: {
// Operands: IV, TripCount
llvm_unreachable("Operation doesn't have return type");
}
default:
llvm_unreachable("Unsupported opcode for VPInstruction");
}
})
.Case<VPWidenMemoryInstructionRecipe>(
[&](const VPWidenMemoryInstructionRecipe *VPWMIR) -> Type * {
Instruction *I = &VPWMIR->getIngredient();
Type *ValTy = truncateToMinimalBitwidth(getLoadStoreType(I), I);
return ValTy;
})
.Default([&](const VPRecipeBase *R) -> Type * {
llvm_unreachable("Unsupported VPRecipe");
});
return ValTy;
}

} // namespace llvm
Loading

0 comments on commit edc764b

Please sign in to comment.