Skip to content

[DAG] MatchLoadCombine - match swapped loads #167314

@RKSimon

Description

@RKSimon

Sort of an extension to the BSWAP pattern code:

uint64_t ll(uint32_t *p) { uint64_t h = *p++; h <<= 32; h |= *p++; return h; }
ll:  movl (%rdi), %ecx  movl 4(%rdi), %eax  shlq $32, %rcx  orq %rcx, %rax  retq

These i32 loads are swapped preventing them being combined into a single i64 load.

But assuming we can cheaply rotate them then we can load the i64 and then rotate it:

uint64_t rr(uint32_t *p) { uint64_t h = *p++; h |= (uint64_t)*p++ << 32; return __builtin_rotateright64(h, 32); }
rr:  rorxq $32, (%rdi), %rax  retq
define i64 @src(ptr %p) { %p.hi = getelementptr inbounds nuw i8, ptr %p, i64 4 %lo = load i32, ptr %p %hi = load i32, ptr %p.hi %conv = zext i32 %lo to i64 %shl = shl nuw i64 %conv, 32 %conv2 = zext i32 %hi to i64 %or = or disjoint i64 %shl, %conv2 ret i64 %or } define i64 @tgt(ptr %p) { %p.hi = getelementptr inbounds nuw i8, ptr %p, i64 4 %lo = load i32, ptr %p %hi = load i32, ptr %p.hi %conv = zext i32 %hi to i64 %shl = shl nuw i64 %conv, 32 %conv2 = zext i32 %lo to i64 %or = or disjoint i64 %shl, %conv2 %rot = tail call i64 @llvm.fshl.i64(i64 %or, i64 %or, i64 32) ret i64 %rot } declare i64 @llvm.fshl.i64(i64, i64, i64) #1

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions