Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Missed optimization #67342

Open
ojeda opened this issue Sep 25, 2023 · 1 comment · May be fixed by #69021
Open

Missed optimization #67342

ojeda opened this issue Sep 25, 2023 · 1 comment · May be fixed by #69021
Assignees
Labels
llvm Umbrella label for LLVM issues missed-optimization

Comments

@ojeda
Copy link

ojeda commented Sep 25, 2023

From: rust-lang/rust#116150

With LLVM 17.0.1:

%"core::result::Result<u16, i32>" = type { i16, [3 x i16] }
%"core::result::Result<u16, i32>::Ok" = type { [1 x i16], i16 }
%"core::result::Result<u16, i32>::Err" = type { [1 x i32], i32 }

define i64 @f(i32 %err) unnamed_addr #0 {
  %r = alloca %"core::result::Result<u16, i32>", align 4
  %_3 = icmp slt i32 %err, 0
  br i1 %_3, label %bb1, label %bb2

bb2:                                              ; preds = %start
  %ok = trunc i32 %err to i16
  %1 = getelementptr inbounds %"core::result::Result<u16, i32>::Ok", ptr %r, i32 0, i32 1
  store i16 %ok, ptr %1, align 2
  store i16 0, ptr %r, align 4
  br label %bb3

bb1:                                              ; preds = %start
  %2 = getelementptr inbounds %"core::result::Result<u16, i32>::Err", ptr %r, i32 0, i32 1
  store i32 %err, ptr %2, align 4
  store i16 1, ptr %r, align 4
  br label %bb3

bb3:                                              ; preds = %bb1, %bb2
  %3 = load i64, ptr %r, align 4
  ret i64 %3
}

optimizes to:

define i64 @f(i32 %err) unnamed_addr #0 {
  %_3 = icmp slt i32 %err, 0
  %err.lobit = lshr i32 %err, 31
  %r.sroa.4.0.insert.ext = zext i32 %err to i64
  %r.sroa.4.0.insert.shift = shl nuw i64 %r.sroa.4.0.insert.ext, 32
  %0 = shl i32 %err, 16
  %1 = select i1 %_3, i32 0, i32 %0
  %r.sroa.3.0.insert.shift = zext i32 %1 to i64
  %r.sroa.3.0.insert.insert = or i64 %r.sroa.4.0.insert.shift, %r.sroa.3.0.insert.shift
  %r.sroa.0.0.insert.ext = zext i32 %err.lobit to i64
  %r.sroa.0.0.insert.insert = or i64 %r.sroa.3.0.insert.insert, %r.sroa.0.0.insert.ext
  ret i64 %r.sroa.0.0.insert.insert
}
f:                                      # @f
        mov     eax, edi
        shr     eax, 31
        mov     rcx, rdi
        shl     rcx, 32
        mov     edx, edi
        shl     edx, 16
        xor     esi, esi
        test    edi, edi
        cmovns  esi, edx
        or      rsi, rcx
        or      rax, rsi
        ret

But if the trunc is away from the store:

%"core::result::Result<u16, i32>" = type { i16, [3 x i16] }
%"core::result::Result<u16, i32>::Ok" = type { [1 x i16], i16 }
%"core::result::Result<u16, i32>::Err" = type { [1 x i32], i32 }

define i64 @f(i32 %err) unnamed_addr #0 {
  %r = alloca %"core::result::Result<u16, i32>", align 4
  %ok = trunc i32 %err to i16
  %_3 = icmp slt i32 %err, 0
  br i1 %_3, label %bb1, label %bb2

bb2:                                              ; preds = %start
  %1 = getelementptr inbounds %"core::result::Result<u16, i32>::Ok", ptr %r, i32 0, i32 1
  store i16 %ok, ptr %1, align 2
  store i16 0, ptr %r, align 4
  br label %bb3

bb1:                                              ; preds = %start
  %2 = getelementptr inbounds %"core::result::Result<u16, i32>::Err", ptr %r, i32 0, i32 1
  store i32 %err, ptr %2, align 4
  store i16 1, ptr %r, align 4
  br label %bb3

bb3:                                              ; preds = %bb1, %bb2
  %3 = load i64, ptr %r, align 4
  ret i64 %3
}

it ends up without a select:

define i64 @f(i32 %err) unnamed_addr #0 {
  %err.lobit = lshr i32 %err, 31
  %r.sroa.4.0.insert.ext = zext i32 %err to i64
  %r.sroa.4.0.insert.shift = shl nuw i64 %r.sroa.4.0.insert.ext, 32
  %0 = shl i32 %err, 16
  %r.sroa.3.0.insert.shift = zext i32 %0 to i64
  %r.sroa.3.0.insert.insert = or i64 %r.sroa.4.0.insert.shift, %r.sroa.3.0.insert.shift
  %r.sroa.0.0.insert.ext = zext i32 %err.lobit to i64
  %r.sroa.0.0.insert.insert = or i64 %r.sroa.3.0.insert.insert, %r.sroa.0.0.insert.ext
  ret i64 %r.sroa.0.0.insert.insert
}
f:                                      # @f
        mov     eax, edi
        shr     eax, 31
        mov     rcx, rdi
        shl     rcx, 32
        shl     edi, 16
        or      rcx, rdi
        or      rax, rcx
        ret
@Endilll Endilll added missed-optimization llvm Umbrella label for LLVM issues and removed new issue labels Sep 25, 2023
@dtcxzyw dtcxzyw self-assigned this Sep 26, 2023
@dtcxzyw
Copy link
Member

dtcxzyw commented Sep 26, 2023

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
llvm Umbrella label for LLVM issues missed-optimization
Projects
None yet
Development

Successfully merging a pull request may close this issue.

3 participants