Skip to content

Commit

Permalink
[GPU] Use alloca for private memory allocations (#18540)
Browse files Browse the repository at this point in the history
Without this patch, some `memref.alloc` allocations that fail to be
optimize out remained as `malloc` in the final binary.

Fixes: #18534
  • Loading branch information
kuhar committed Sep 17, 2024
1 parent 740e301 commit 6a44005
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 8 deletions.
16 changes: 10 additions & 6 deletions compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,19 @@ static FailureOr<Value> gpuAllocationFn(OpBuilder &builder, Location loc,
if (!enclosingForall) {
enclosingForall = parent->getParentOfType<scf::ForallOp>();
}
gpu::AddressSpaceAttr addressSpace;
if (enclosingForall && hasThreadMapping(enclosingForall)) {
addressSpace = gpu::AddressSpaceAttr::get(
auto addressSpace = gpu::AddressSpaceAttr::get(
builder.getContext(), gpu::GPUDialect::getPrivateAddressSpace());
} else {
addressSpace = gpu::AddressSpaceAttr::get(
builder.getContext(), gpu::GPUDialect::getWorkgroupAddressSpace());
auto allocType =
MemRefType::get(memRefType.getShape(), memRefType.getElementType(),
AffineMap(), addressSpace);
return builder.create<memref::AllocaOp>(loc, allocType, dynamicSizes)
.getResult();
}
MemRefType allocType =

auto addressSpace = gpu::AddressSpaceAttr::get(
builder.getContext(), gpu::GPUDialect::getWorkgroupAddressSpace());
auto allocType =
MemRefType::get(memRefType.getShape(), memRefType.getElementType(),
AffineMap(), addressSpace);
return builder.create<memref::AllocOp>(loc, allocType, dynamicSizes)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,5 @@ hal.executable private @conv_nchw_dispatch_1 {
// eliminated.

// CHECK-LABEL: func @conv_2d_nchw_fchw_2x320x64x64x320x3x3_f16
// CHECK-COUNT-3: memref.alloc() : memref<1x1x1x4xf16, #gpu.address_space<private>>
// CHECK-COUNT-3: memref.alloca() : memref<1x1x1x4xf16, #gpu.address_space<private>>
// CHECK-COUNT-3: memref.copy %{{.*}}, %{{.*}} : memref<1x1x1x4xf16, #gpu.address_space<private>> to memref<{{.*}} #hal.descriptor_type<storage_buffer>>
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func.func @bufferize_with_thread_private_memory(%arg0: index) {
}
// CHECK-LABEL: func.func @bufferize_with_thread_private_memory
// CHECK: scf.forall {{.*}} in (2, 16) {
// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<1x1x4x4xf16, #gpu.address_space<private>>
// CHECK: %[[ALLOC:.+]] = memref.alloca() : memref<1x1x4x4xf16, #gpu.address_space<private>>
// CHECK: memref.copy %{{.*}}, %[[ALLOC]]
// CHECK-SAME: memref<1x1x4x4xf16, strided<[1310720, 4096, 64, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
// CHECK-SAME: to memref<1x1x4x4xf16, #gpu.address_space<private>>
Expand Down

0 comments on commit 6a44005

Please sign in to comment.