Skip to content

Missed optimization #67342

@ojeda

Description

@ojeda

From: rust-lang/rust#116150

With LLVM 17.0.1:

%"core::result::Result<u16, i32>" = type { i16, [3 x i16] } %"core::result::Result<u16, i32>::Ok" = type { [1 x i16], i16 } %"core::result::Result<u16, i32>::Err" = type { [1 x i32], i32 } define i64 @f(i32 %err) unnamed_addr #0 { %r = alloca %"core::result::Result<u16, i32>", align 4 %_3 = icmp slt i32 %err, 0 br i1 %_3, label %bb1, label %bb2 bb2: ; preds = %start %ok = trunc i32 %err to i16 %1 = getelementptr inbounds %"core::result::Result<u16, i32>::Ok", ptr %r, i32 0, i32 1 store i16 %ok, ptr %1, align 2 store i16 0, ptr %r, align 4 br label %bb3 bb1: ; preds = %start %2 = getelementptr inbounds %"core::result::Result<u16, i32>::Err", ptr %r, i32 0, i32 1 store i32 %err, ptr %2, align 4 store i16 1, ptr %r, align 4 br label %bb3 bb3: ; preds = %bb1, %bb2 %3 = load i64, ptr %r, align 4 ret i64 %3 }

optimizes to:

define i64 @f(i32 %err) unnamed_addr #0 { %_3 = icmp slt i32 %err, 0 %err.lobit = lshr i32 %err, 31 %r.sroa.4.0.insert.ext = zext i32 %err to i64 %r.sroa.4.0.insert.shift = shl nuw i64 %r.sroa.4.0.insert.ext, 32 %0 = shl i32 %err, 16 %1 = select i1 %_3, i32 0, i32 %0 %r.sroa.3.0.insert.shift = zext i32 %1 to i64 %r.sroa.3.0.insert.insert = or i64 %r.sroa.4.0.insert.shift, %r.sroa.3.0.insert.shift %r.sroa.0.0.insert.ext = zext i32 %err.lobit to i64 %r.sroa.0.0.insert.insert = or i64 %r.sroa.3.0.insert.insert, %r.sroa.0.0.insert.ext ret i64 %r.sroa.0.0.insert.insert }
f: # @f  mov eax, edi  shr eax, 31  mov rcx, rdi  shl rcx, 32  mov edx, edi  shl edx, 16  xor esi, esi  test edi, edi  cmovns esi, edx  or rsi, rcx  or rax, rsi  ret

But if the trunc is away from the store:

%"core::result::Result<u16, i32>" = type { i16, [3 x i16] } %"core::result::Result<u16, i32>::Ok" = type { [1 x i16], i16 } %"core::result::Result<u16, i32>::Err" = type { [1 x i32], i32 } define i64 @f(i32 %err) unnamed_addr #0 { %r = alloca %"core::result::Result<u16, i32>", align 4 %ok = trunc i32 %err to i16 %_3 = icmp slt i32 %err, 0 br i1 %_3, label %bb1, label %bb2 bb2: ; preds = %start %1 = getelementptr inbounds %"core::result::Result<u16, i32>::Ok", ptr %r, i32 0, i32 1 store i16 %ok, ptr %1, align 2 store i16 0, ptr %r, align 4 br label %bb3 bb1: ; preds = %start %2 = getelementptr inbounds %"core::result::Result<u16, i32>::Err", ptr %r, i32 0, i32 1 store i32 %err, ptr %2, align 4 store i16 1, ptr %r, align 4 br label %bb3 bb3: ; preds = %bb1, %bb2 %3 = load i64, ptr %r, align 4 ret i64 %3 }

it ends up without a select:

define i64 @f(i32 %err) unnamed_addr #0 { %err.lobit = lshr i32 %err, 31 %r.sroa.4.0.insert.ext = zext i32 %err to i64 %r.sroa.4.0.insert.shift = shl nuw i64 %r.sroa.4.0.insert.ext, 32 %0 = shl i32 %err, 16 %r.sroa.3.0.insert.shift = zext i32 %0 to i64 %r.sroa.3.0.insert.insert = or i64 %r.sroa.4.0.insert.shift, %r.sroa.3.0.insert.shift %r.sroa.0.0.insert.ext = zext i32 %err.lobit to i64 %r.sroa.0.0.insert.insert = or i64 %r.sroa.3.0.insert.insert, %r.sroa.0.0.insert.ext ret i64 %r.sroa.0.0.insert.insert }
f: # @f  mov eax, edi  shr eax, 31  mov rcx, rdi  shl rcx, 32  shl edi, 16  or rcx, rdi  or rax, rcx  ret

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions