Skip to content
This repository has been archived by the owner on Jan 20, 2024. It is now read-only.

[Flang][OpenMP][MLIR] Create lifetime markers for allocations only used within OpenMP loop regions #232

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions flang/test/Lower/OpenMP/loop-lifetime.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
! This test checks the insertion of lifetime information for loop indices of
! OpenMP loop operations.
! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm -fopenmp %s -o - | FileCheck %s

! CHECK-LABEL: define void @wsloop_i32
subroutine wsloop_i32()
! CHECK-DAG: %[[LASTITER:.*]] = alloca i32
! CHECK-DAG: %[[LB:.*]] = alloca i32
! CHECK-DAG: %[[UB:.*]] = alloca i32
! CHECK-DAG: %[[STRIDE:.*]] = alloca i32
! CHECK-DAG: %[[I:.*]] = alloca i32
integer :: i

! CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %[[I]])
! CHECK-NEXT: br label %[[WSLOOP_BLOCK:.*]]
! CHECK: [[WSLOOP_BLOCK]]:
! CHECK-NOT: {{^.*}}:
! CHECK: br label %[[CONT_BLOCK:.*]]
! CHECK: [[CONT_BLOCK]]:
! CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %[[I]])
!$omp do
do i = 1, 10
print *, i
end do
!$omp end do
end subroutine

! CHECK-LABEL: define void @wsloop_i64
subroutine wsloop_i64()
! CHECK-DAG: %[[LASTITER:.*]] = alloca i32
! CHECK-DAG: %[[LB:.*]] = alloca i64
! CHECK-DAG: %[[UB:.*]] = alloca i64
! CHECK-DAG: %[[STRIDE:.*]] = alloca i64
! CHECK-DAG: %[[I:.*]] = alloca i64
integer*8 :: i

! CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[I]])
! CHECK-NEXT: br label %[[WSLOOP_BLOCK:.*]]
! CHECK: [[WSLOOP_BLOCK]]:
! CHECK-NOT: {{^.*}}:
! CHECK: br label %[[CONT_BLOCK:.*]]
! CHECK: [[CONT_BLOCK]]:
! CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %[[I]])
!$omp do
do i = 1, 10
print *, i
end do
!$omp end do
end subroutine

! CHECK-LABEL: define void @simdloop_i32
subroutine simdloop_i32()
! CHECK: %[[I:.*]] = alloca i32
integer :: i

! CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %[[I]])
! CHECK-NEXT: br label %[[SIMDLOOP_BLOCK:.*]]
! CHECK: [[SIMDLOOP_BLOCK]]:
! CHECK-NOT: {{^.*}}:
! CHECK: br label %[[CONT_BLOCK:.*]]
! CHECK: [[CONT_BLOCK]]:
! CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %[[I]])
!$omp simd
do i=1, 9
print *, i
end do
!$omp end simd
end subroutine

! CHECK-LABEL: define void @simdloop_i64
subroutine simdloop_i64()
! CHECK: %[[I:.*]] = alloca i64
integer*8 :: i

! CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[I]])
! CHECK-NEXT: br label %[[SIMDLOOP_BLOCK:.*]]
! CHECK: [[SIMDLOOP_BLOCK]]:
! CHECK-NOT: {{^.*}}:
! CHECK: br label %[[CONT_BLOCK:.*]]
! CHECK: [[CONT_BLOCK]]:
! CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %[[I]])
!$omp simd
do i=1, 9
print *, i
end do
!$omp end simd
end subroutine
39 changes: 0 additions & 39 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2755,45 +2755,6 @@ OpenMPIRBuilder::applyWorkshareLoopDevice(DebugLoc DL, CanonicalLoopInfo *CLI) {
// body
Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);

// Sink allocas used only inside loop body
// original code:
// %item_used_in_loop_body = alloca i32
// ;no more instructions which uses item_used_in_loop_body
// loopbody:
// use(%item_used_in_loop_body)
//
// After sinking:
// loopbody:
// %item_used_in_loop_body_moved_alloca = alloca i32
// use(%item_used_in_loop_body_moved_alloca)
//
// TODO: OMPIRBuilder should not be responsible for sinking allocas
// which are used only inside loop body region.
for (AllocaInst *AllocaItem : CEAC.getAllocas()) {
bool ReadyToMove = true;
for (User *AllocaUse : AllocaItem->users()) {
Instruction *Inst;
if ((Inst = dyn_cast<LoadInst>(AllocaUse)) &&
ParallelRegionBlockSet.count(Inst->getParent()))
continue;
if ((Inst = dyn_cast<StoreInst>(AllocaUse)) &&
ParallelRegionBlockSet.count(Inst->getParent()))
continue;
ReadyToMove = false;
break;
}
if (ReadyToMove) {
Builder.restoreIP({CLI->getBody(), CLI->getBody()->begin()});
AllocaInst *NewAlloca =
Builder.CreateAlloca(CLI->getIndVarType(), 0, "moved_alloca");
std::vector<User *> Users(AllocaItem->user_begin(),
AllocaItem->user_end());
for (User *use : Users) {
use->replaceUsesOfWith(AllocaItem, NewAlloca);
}
ToBeDeleted.push_back(AllocaItem);
}
}
// We need to model loop body region as the function f(cnt, loop_arg).
// That's why we replace loop induction variable by the new counter
// which will be one of loop body function argument
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,50 @@ static void collectReductionInfo(
}
}

/// Populate a set of previously created llvm.alloca instructions that are only
/// used inside of the given region but defined outside of it. Allocations of
/// non-primitive types are skipped by this function.
static void getSinkableAllocas(LLVM::ModuleTranslation &moduleTranslation,
Region &region,
SetVector<llvm::AllocaInst *> &allocasToSink) {
Operation *op = region.getParentOp();

for (auto storeOp : region.getOps<LLVM::StoreOp>()) {
Value storeAddr = storeOp.getAddr();
Operation *addrOp = storeAddr.getDefiningOp();

// The destination address is already defined in this region or it is not an
// llvm.alloca operation, so skip it.
if (!isa_and_present<LLVM::AllocaOp>(addrOp) || op->isAncestor(addrOp))
continue;

// Get LLVM value to which the address is mapped. It has to be mapped to the
// allocation instruction of a scalar type to be marked as sinkable by this
// function.
llvm::Value *llvmAddr = moduleTranslation.lookupValue(storeAddr);
if (!isa_and_present<llvm::AllocaInst>(llvmAddr))
continue;

auto *llvmAlloca = cast<llvm::AllocaInst>(llvmAddr);
if (llvmAlloca->getAllocatedType()->getPrimitiveSizeInBits() == 0)
continue;

// Check that the address is only used inside of the region.
bool addressUsedOnlyInternally = true;
for (auto &addrUse : storeAddr.getUses()) {
if (!op->isAncestor(addrUse.getOwner())) {
addressUsedOnlyInternally = false;
break;
}
}

if (!addressUsedOnlyInternally)
continue;

allocasToSink.insert(llvmAlloca);
}
}

/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
Expand Down Expand Up @@ -850,6 +894,9 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
// Set up the source location value for OpenMP runtime.
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);

SetVector<llvm::AllocaInst *> allocasToSink;
getSinkableAllocas(moduleTranslation, loop.getRegion(), allocasToSink);

// Generator of the canonical loop body.
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
// relying on captured variables.
Expand All @@ -869,10 +916,21 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
if (loopInfos.size() != loop.getNumLoops() - 1)
return;

// Convert the body of the loop.
// Convert the body of the loop, adding lifetime markers to allocations that
// can be sunk into the new block.
builder.restoreIP(ip);
convertOmpOpRegions(loop.getRegion(), "omp.wsloop.region", builder,
moduleTranslation, bodyGenStatus);
for (auto *alloca : allocasToSink) {
unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8;
builder.CreateLifetimeStart(alloca, builder.getInt64(size));
}
llvm::BasicBlock *cont =
convertOmpOpRegions(loop.getRegion(), "omp.wsloop.region", builder,
moduleTranslation, bodyGenStatus);
builder.SetInsertPoint(cont, cont->begin());
for (auto *alloca : allocasToSink) {
unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8;
builder.CreateLifetimeEnd(alloca, builder.getInt64(size));
}
};

// Delegate actual loop construction to the OpenMP IRBuilder.
Expand Down Expand Up @@ -1091,6 +1149,9 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,

llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);

SetVector<llvm::AllocaInst *> allocasToSink;
getSinkableAllocas(moduleTranslation, loop.getRegion(), allocasToSink);

// Generator of the canonical loop body.
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
// relying on captured variables.
Expand All @@ -1110,10 +1171,21 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
if (loopInfos.size() != loop.getNumLoops() - 1)
return;

// Convert the body of the loop.
// Convert the body of the loop, adding lifetime markers to allocations that
// can be sunk into the new block.
builder.restoreIP(ip);
convertOmpOpRegions(loop.getRegion(), "omp.simdloop.region", builder,
moduleTranslation, bodyGenStatus);
for (auto *alloca : allocasToSink) {
unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8;
builder.CreateLifetimeStart(alloca, builder.getInt64(size));
}
llvm::BasicBlock *cont =
convertOmpOpRegions(loop.getRegion(), "omp.simdloop.region", builder,
moduleTranslation, bodyGenStatus);
builder.SetInsertPoint(cont, cont->begin());
for (auto *alloca : allocasToSink) {
unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8;
builder.CreateLifetimeEnd(alloca, builder.getInt64(size));
}
};

// Delegate actual loop construction to the OpenMP IRBuilder.
Expand Down