Skip to content

Commit

Permalink
[ARM] Add target feature to force 32-bit atomics
Browse files Browse the repository at this point in the history
This adds a +atomic-32 target feature, which instructs LLVM to assume
that lock-free 32-bit atomics are available for this target, even
if they usually wouldn't be.

If only atomic loads/stores are used, then this won't emit libcalls.
If atomic CAS is used, then the user is responsible for providing
any necessary __sync implementations (e.g. by masking interrupts
for single-core privileged use cases).

See https://reviews.llvm.org/D120026#3674333 for context on this
change. The tl;dr is that the thumbv6m target in Rust has
historically made atomic load/store only available, which is
incompatible with the change from D120026, which switched these to
use libatomic.

Differential Revision: https://reviews.llvm.org/D130480
  • Loading branch information
nikic committed Jul 27, 2022
1 parent 9cc1dd2 commit b1b1086
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 1 deletion.
9 changes: 9 additions & 0 deletions llvm/lib/Target/ARM/ARM.td
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,15 @@ def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf",
"for leaf functions",
[FeatureAAPCSFrameChain]>;

// Assume that lock-free 32-bit atomics are available, even if the target
// and operating system combination would not usually provide them. The user
// is responsible for providing any necessary __sync implementations. Code
// built with this feature is not ABI-compatible with code built without this
// feature, if atomic variables are exposed across the ABI boundary.
def FeatureAtomics32 : SubtargetFeature<
"atomics-32", "HasForced32BitAtomics", "true",
"Assume that lock-free 32-bit atomics are available">;

//===----------------------------------------------------------------------===//
// ARM architecture class
//
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1370,7 +1370,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// instructions. (ARMv6 doesn't have dmb, but it has an equivalent
// encoding; see ARMISD::MEMBARRIER_MCR.)
setMaxAtomicSizeInBitsSupported(64);
} else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) {
} else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
Subtarget->hasForced32BitAtomics()) {
// Cortex-M (besides Cortex-M0) have 32-bit atomics.
setMaxAtomicSizeInBitsSupported(32);
} else {
Expand Down
203 changes: 203 additions & 0 deletions llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefixes=CHECK,NO-ATOMIC32
; RUN: llc -mtriple=thumbv6m-none-eabi -mattr=+atomics-32 < %s | FileCheck %s --check-prefixes=CHECK,ATOMIC32

define i32 @load32(ptr %p) {
; NO-ATOMIC32-LABEL: load32:
; NO-ATOMIC32: @ %bb.0:
; NO-ATOMIC32-NEXT: .save {r7, lr}
; NO-ATOMIC32-NEXT: push {r7, lr}
; NO-ATOMIC32-NEXT: movs r1, #5
; NO-ATOMIC32-NEXT: bl __atomic_load_4
; NO-ATOMIC32-NEXT: pop {r7, pc}
;
; ATOMIC32-LABEL: load32:
; ATOMIC32: @ %bb.0:
; ATOMIC32-NEXT: ldr r0, [r0]
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: bx lr
%v = load atomic i32, ptr %p seq_cst, align 4
ret i32 %v
}

define void @store32(ptr %p) {
; NO-ATOMIC32-LABEL: store32:
; NO-ATOMIC32: @ %bb.0:
; NO-ATOMIC32-NEXT: .save {r7, lr}
; NO-ATOMIC32-NEXT: push {r7, lr}
; NO-ATOMIC32-NEXT: movs r1, #0
; NO-ATOMIC32-NEXT: movs r2, #5
; NO-ATOMIC32-NEXT: bl __atomic_store_4
; NO-ATOMIC32-NEXT: pop {r7, pc}
;
; ATOMIC32-LABEL: store32:
; ATOMIC32: @ %bb.0:
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: movs r1, #0
; ATOMIC32-NEXT: str r1, [r0]
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: bx lr
store atomic i32 0, ptr %p seq_cst, align 4
ret void
}

define i32 @rmw32(ptr %p) {
; NO-ATOMIC32-LABEL: rmw32:
; NO-ATOMIC32: @ %bb.0:
; NO-ATOMIC32-NEXT: .save {r7, lr}
; NO-ATOMIC32-NEXT: push {r7, lr}
; NO-ATOMIC32-NEXT: movs r1, #1
; NO-ATOMIC32-NEXT: movs r2, #5
; NO-ATOMIC32-NEXT: bl __atomic_fetch_add_4
; NO-ATOMIC32-NEXT: pop {r7, pc}
;
; ATOMIC32-LABEL: rmw32:
; ATOMIC32: @ %bb.0:
; ATOMIC32-NEXT: .save {r7, lr}
; ATOMIC32-NEXT: push {r7, lr}
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: movs r1, #1
; ATOMIC32-NEXT: bl __sync_fetch_and_add_4
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: pop {r7, pc}
%v = atomicrmw add ptr %p, i32 1 seq_cst, align 4
ret i32 %v
}

define i32 @cmpxchg32(ptr %p) {
; NO-ATOMIC32-LABEL: cmpxchg32:
; NO-ATOMIC32: @ %bb.0:
; NO-ATOMIC32-NEXT: .save {r7, lr}
; NO-ATOMIC32-NEXT: push {r7, lr}
; NO-ATOMIC32-NEXT: .pad #8
; NO-ATOMIC32-NEXT: sub sp, #8
; NO-ATOMIC32-NEXT: movs r1, #0
; NO-ATOMIC32-NEXT: str r1, [sp, #4]
; NO-ATOMIC32-NEXT: movs r3, #5
; NO-ATOMIC32-NEXT: str r3, [sp]
; NO-ATOMIC32-NEXT: add r1, sp, #4
; NO-ATOMIC32-NEXT: movs r2, #1
; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_4
; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
; NO-ATOMIC32-NEXT: add sp, #8
; NO-ATOMIC32-NEXT: pop {r7, pc}
;
; ATOMIC32-LABEL: cmpxchg32:
; ATOMIC32: @ %bb.0:
; ATOMIC32-NEXT: .save {r7, lr}
; ATOMIC32-NEXT: push {r7, lr}
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: movs r1, #0
; ATOMIC32-NEXT: movs r2, #1
; ATOMIC32-NEXT: bl __sync_val_compare_and_swap_4
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: pop {r7, pc}
%res = cmpxchg ptr %p, i32 0, i32 1 seq_cst seq_cst
%res.0 = extractvalue { i32, i1 } %res, 0
ret i32 %res.0
}

define i64 @load64(ptr %p) {
; CHECK-LABEL: load64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: movs r0, #8
; CHECK-NEXT: mov r2, sp
; CHECK-NEXT: movs r3, #5
; CHECK-NEXT: bl __atomic_load
; CHECK-NEXT: ldr r1, [sp, #4]
; CHECK-NEXT: ldr r0, [sp]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r7, pc}
%v = load atomic i64, ptr %p seq_cst, align 4
ret i64 %v
}

define void @store64(ptr %p) {
; CHECK-LABEL: store64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: str r0, [sp, #4]
; CHECK-NEXT: str r0, [sp]
; CHECK-NEXT: movs r0, #8
; CHECK-NEXT: mov r2, sp
; CHECK-NEXT: movs r3, #5
; CHECK-NEXT: bl __atomic_store
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r7, pc}
store atomic i64 0, ptr %p seq_cst, align 4
ret void
}

define i64 @rmw64(ptr %p) {
; CHECK-LABEL: rmw64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .pad #24
; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: ldr r1, [r4, #4]
; CHECK-NEXT: .LBB6_1: @ %atomicrmw.start
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str r0, [sp, #16]
; CHECK-NEXT: str r1, [sp, #20]
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: adds r0, r0, #1
; CHECK-NEXT: adcs r2, r1
; CHECK-NEXT: str r2, [sp, #12]
; CHECK-NEXT: str r0, [sp, #8]
; CHECK-NEXT: movs r0, #5
; CHECK-NEXT: str r0, [sp]
; CHECK-NEXT: str r0, [sp, #4]
; CHECK-NEXT: movs r0, #8
; CHECK-NEXT: add r2, sp, #16
; CHECK-NEXT: add r3, sp, #8
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __atomic_compare_exchange
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: ldr r1, [sp, #20]
; CHECK-NEXT: ldr r0, [sp, #16]
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: beq .LBB6_1
; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end
; CHECK-NEXT: add sp, #24
; CHECK-NEXT: pop {r4, pc}
%v = atomicrmw add ptr %p, i64 1 seq_cst, align 4
ret i64 %v
}

define i64 @cmpxchg64(ptr %p) {
; CHECK-LABEL: cmpxchg64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: str r3, [sp, #12]
; CHECK-NEXT: str r3, [sp, #8]
; CHECK-NEXT: movs r1, #5
; CHECK-NEXT: str r1, [sp]
; CHECK-NEXT: str r1, [sp, #4]
; CHECK-NEXT: add r1, sp, #8
; CHECK-NEXT: movs r2, #1
; CHECK-NEXT: bl __atomic_compare_exchange_8
; CHECK-NEXT: ldr r1, [sp, #12]
; CHECK-NEXT: ldr r0, [sp, #8]
; CHECK-NEXT: add sp, #16
; CHECK-NEXT: pop {r7, pc}
%res = cmpxchg ptr %p, i64 0, i64 1 seq_cst seq_cst
%res.0 = extractvalue { i64, i1 } %res, 0
ret i64 %res.0
}

0 comments on commit b1b1086

Please sign in to comment.