Skip to content
This repository has been archived by the owner on Jan 20, 2024. It is now read-only.

Commit

Permalink
Push index fir.alloca inside of omp.wsloop if only used there
Browse files Browse the repository at this point in the history
  • Loading branch information
skatrak committed Sep 13, 2023
1 parent 6f23091 commit 59208ae
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 18 deletions.
36 changes: 26 additions & 10 deletions flang/lib/Optimizer/Transforms/OMPWsLoopIndexMemToReg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,32 +62,48 @@ class OMPWsLoopIndexMemToRegPass
llvm::ArrayRef<Value> argStores) {
llvm::SmallPtrSet<Operation *, 4> toDelete;
for (Value store : argStores) {
Operation *allocaOp = store.getDefiningOp();

// Skip argument if storage not defined by a fir.alloca.
if (!isa_and_nonnull<fir::AllocaOp>(store.getDefiningOp()))
if (!isa_and_nonnull<fir::AllocaOp>(allocaOp))
return;

// Check that uses of the pointer are all fir.load and fir.store
// inside of the omp.wsloop currently being visited.
bool patternApplicable = true;
bool allUsesInsideWsLoop = true, patternApplicable = true;
for (OpOperand &use : store.getUses()) {
Operation *owner = use.getOwner();
if (owner->getParentOfType<omp::WsLoopOp>() !=
loop.getOperation() ||
(!isa<fir::LoadOp>(owner) && !isa<fir::StoreOp>(owner))) {
bool insideWsLoop =
owner->getParentOfType<omp::WsLoopOp>() == loop.getOperation();
if (!insideWsLoop) {
allUsesInsideWsLoop = false;
patternApplicable = false;
break;
}
if (!isa<fir::LoadOp>(owner) && !isa<fir::StoreOp>(owner))
patternApplicable = false;
}

// Do not make any modifications if some uses of the pointer are
// outside of the omp.wsloop.
// Push fir.alloca into the beginning of the loop region.
if (allUsesInsideWsLoop) {
OpBuilder builder(loop.getRegion());
Operation *allocaClone = builder.clone(*allocaOp);
allocaOp->replaceAllUsesWith(allocaClone);
allocaOp->erase();
allocaOp = allocaClone;
}

// Do not make any further modifications if an address to the index
// is necessary. Otherwise, the values can be used directly from the
// loop region first block's arguments.
if (!patternApplicable)
return;

// Remove fir.store operations for that address and replace all
// fir.load operations with the index as returned by the omp.wsloop
// operation.
for (OpOperand &use : store.getUses()) {
for (OpOperand &use :
cast<fir::AllocaOp>(allocaOp).getResult().getUses()) {
Operation *owner = use.getOwner();
if (isa<fir::StoreOp>(owner))
toDelete.insert(owner);
Expand All @@ -96,8 +112,8 @@ class OMPWsLoopIndexMemToRegPass
}

// Delete now-unused fir.alloca.
toDelete.insert(store.getDefiningOp());
store.dropAllUses();
toDelete.insert(allocaOp);
allocaOp->dropAllUses();
}

// Only consider marked operations if all fir.{load,store,alloca}
Expand Down
8 changes: 4 additions & 4 deletions flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ subroutine omp_do_firstprivate(a)
n = a+1
!$omp parallel do firstprivate(a)
! CHECK: omp.parallel {
! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned
! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref<i32>
! CHECK: %[[LB:.*]] = arith.constant 1 : i32
! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
! CHECK-NEXT: omp.wsloop for (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK-NEXT: fir.store %[[ARG1]] to %[[REF]] : !fir.ref<i32>
! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
! CHECK-NEXT: omp.yield
Expand All @@ -36,19 +36,19 @@ subroutine omp_do_firstprivate2(a, n)
n = a+1
!$omp parallel do firstprivate(a, n)
! CHECK: omp.parallel {
! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned
! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref<i32>
! CHECK-NEXT: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "n", pinned
! CHECK-NEXT: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
! CHECK-NEXT: fir.store %[[LD1]] to %[[CLONE1]] : !fir.ref<i32>


! CHECK: %[[LB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
! CHECK-NEXT: omp.wsloop for (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK-NEXT: fir.store %[[ARG2]] to %[[REF]] : !fir.ref<i32>
! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
! CHECK-NEXT: omp.yield
Expand Down
8 changes: 4 additions & 4 deletions flang/test/Lower/OpenMP/parallel-wsloop.f90
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ end subroutine parallel_private_do
! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}) {
! CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_private_doEi"}
! CHECK: omp.parallel {
! CHECK: %[[I_PRIV:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[COND_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_private_doEcond"}
! CHECK: %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_private_doEnt"}
! CHECK: %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
Expand All @@ -133,6 +132,7 @@ end subroutine parallel_private_do
! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32
! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
! CHECK: %[[I_PRIV:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: fir.store %[[I]] to %[[I_PRIV]] : !fir.ref<i32>
! CHECK: fir.call @_QPfoo(%[[I_PRIV]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
! CHECK: omp.yield
Expand Down Expand Up @@ -164,7 +164,6 @@ end subroutine omp_parallel_multiple_firstprivate_do
! CHECK-SAME: %[[B_ADDR:.*]]: !fir.ref<i32> {fir.bindc_name = "b"}) {
! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_parallel_multiple_firstprivate_doEi"}
! CHECK: omp.parallel {
! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_multiple_firstprivate_doEa"}
! CHECK: %[[A:.*]] = fir.load %[[A_ADDR]] : !fir.ref<i32>
! CHECK: fir.store %[[A]] to %[[A_PRIV_ADDR]] : !fir.ref<i32>
Expand All @@ -175,6 +174,7 @@ end subroutine omp_parallel_multiple_firstprivate_do
! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
! CHECK: fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
! CHECK: omp.yield
Expand Down Expand Up @@ -208,7 +208,6 @@ end subroutine parallel_do_private
! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}) {
! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_do_privateEi"}
! CHECK: omp.parallel {
! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[COND_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_privateEcond"}
! CHECK: %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_privateEnt"}
! CHECK: %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
Expand All @@ -217,6 +216,7 @@ end subroutine parallel_do_private
! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32
! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
! CHECK: fir.call @_QPfoo(%[[I_PRIV_ADDR]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
! CHECK: omp.yield
Expand Down Expand Up @@ -248,7 +248,6 @@ end subroutine omp_parallel_do_multiple_firstprivate
! CHECK-SAME: %[[B_ADDR:.*]]: !fir.ref<i32> {fir.bindc_name = "b"}) {
! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_parallel_do_multiple_firstprivateEi"}
! CHECK: omp.parallel {
! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"}
! CHECK: %[[A:.*]] = fir.load %[[A_ADDR]] : !fir.ref<i32>
! CHECK: fir.store %[[A]] to %[[A_PRIV_ADDR]] : !fir.ref<i32>
Expand All @@ -259,6 +258,7 @@ end subroutine omp_parallel_do_multiple_firstprivate
! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
! CHECK: fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
! CHECK: omp.yield
Expand Down
59 changes: 59 additions & 0 deletions flang/test/Transforms/omp-wsloop-index.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// RUN: fir-opt --omp-wsloop-index-mem2reg %s | FileCheck %s

// CHECK-LABEL: @remove_alloca
func.func @remove_alloca() {
// CHECK: %[[RESULT:.*]] = fir.alloca i32
// CHECK-NEXT: omp.parallel
%0 = fir.alloca i32
omp.parallel {
// CHECK-NOT: fir.alloca
// CHECK-NEXT: arith.constant 1
// CHECK-NEXT: arith.constant 10
// CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32
%1 = fir.alloca i32
%c1_i32 = arith.constant 1 : i32
%c10_i32 = arith.constant 10 : i32
omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) {
// CHECK-NOT: fir.alloca
// CHECK-NEXT: fir.store %[[INDEX]] to %[[RESULT]]
// CHECK-NEXT: omp.yield
fir.store %arg0 to %1 : !fir.ref<i32>
%2 = fir.load %1 : !fir.ref<i32>
fir.store %2 to %0 : !fir.ref<i32>
omp.yield
}
omp.terminator
}
return
}

func.func private @foo(%arg0 : !fir.ref<i32>) -> i32

// CHECK-LABEL: @push_alloca
func.func @push_alloca() {
// CHECK: %[[RESULT:.*]] = fir.alloca i32
// CHECK-NEXT: omp.parallel
%0 = fir.alloca i32
omp.parallel {
// CHECK-NOT: fir.alloca
// CHECK-NEXT: arith.constant 1
// CHECK-NEXT: arith.constant 10
// CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32
%1 = fir.alloca i32
%c1_i32 = arith.constant 1 : i32
%c10_i32 = arith.constant 10 : i32
omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) {
// CHECK-NEXT: %[[ALLOCA:.*]] = fir.alloca i32
// CHECK-NEXT: fir.store %[[INDEX]] to %[[ALLOCA]]
// CHECK-NEXT: %[[RETURN:.*]] = func.call @foo(%[[ALLOCA]])
// CHECK-NEXT: fir.store %[[RETURN]] to %[[RESULT]]
// CHECK-NEXT: omp.yield
fir.store %arg0 to %1 : !fir.ref<i32>
%2 = func.call @foo(%1) : (!fir.ref<i32>) -> i32
fir.store %2 to %0 : !fir.ref<i32>
omp.yield
}
omp.terminator
}
return
}

0 comments on commit 59208ae

Please sign in to comment.