Skip to content

Conversation

@RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Dec 1, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Dec 1, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Patch is 36.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170210.diff

1 Files Affected:

  • (added) llvm/test/CodeGen/X86/combine-icmp.ll (+846)
diff --git a/llvm/test/CodeGen/X86/combine-icmp.ll b/llvm/test/CodeGen/X86/combine-icmp.ll new file mode 100644 index 0000000000000..ea1ab15f6e9ba --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-icmp.ll @@ -0,0 +1,846 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512 + +define i4 @concat_icmp_v4i64_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +; SSE2-LABEL: concat_icmp_v4i64_v2i64: +; SSE2: # %bb.0: +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE2-NEXT: andps %xmm2, %xmm0 +; SSE2-NEXT: movmskps %xmm0, %eax +; SSE2-NEXT: xorl $15, %eax +; SSE2-NEXT: # kill: def $al killed $al killed $eax +; SSE2-NEXT: retq +; +; SSE42-LABEL: concat_icmp_v4i64_v2i64: +; SSE42: # %bb.0: +; SSE42-NEXT: pxor %xmm2, %xmm2 +; SSE42-NEXT: pcmpeqq %xmm2, %xmm0 +; SSE42-NEXT: pcmpeqq %xmm2, %xmm1 +; SSE42-NEXT: packssdw %xmm1, %xmm0 +; SSE42-NEXT: movmskps %xmm0, %eax +; SSE42-NEXT: xorl $15, %eax +; SSE42-NEXT: # kill: def $al killed $al killed $eax +; SSE42-NEXT: retq +; +; AVX1OR2-LABEL: concat_icmp_v4i64_v2i64: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1OR2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskps %xmm0, %eax +; AVX1OR2-NEXT: xorl $15, %eax +; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: concat_icmp_v4i64_v2i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vptestmq %xmm0, %xmm0, %k0 +; AVX512-NEXT: vptestmq %xmm1, %xmm1, %k1 +; AVX512-NEXT: kshiftlb $2, %k1, %k1 +; AVX512-NEXT: korw %k1, %k0, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq + %v0 = icmp ne <2 x i64> %a0, zeroinitializer + %v1 = icmp ne <2 x i64> %a1, zeroinitializer + %v = shufflevector <2 x i1> %v0, <2 x i1> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %r = bitcast <4 x i1> %v to i4 + ret i4 %r +} + +define i8 @concat_icmp_v8i32_v4i32(<4 x i32> %a0, <4 x i32> %a1) { +; SSE-LABEL: concat_icmp_v8i32_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: concat_icmp_v8i32_v4i32: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1OR2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: concat_icmp_v8i32_v4i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vptestnmd %xmm0, %xmm0, %k0 +; AVX512-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; AVX512-NEXT: kshiftlb $4, %k1, %k1 +; AVX512-NEXT: korb %k1, %k0, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq + %v0 = icmp eq <4 x i32> %a0, zeroinitializer + %v1 = icmp eq <4 x i32> %a1, zeroinitializer + %v = shufflevector <4 x i1> %v0, <4 x i1> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %r = bitcast <8 x i1> %v to i8 + ret i8 %r +} + +define i16 @concat_icmp_v16i16_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +; SSE2-LABEL: concat_icmp_v16i16_v8i16: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: psubusw %xmm0, %xmm3 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: pcmpeqw %xmm0, %xmm3 +; SSE2-NEXT: psubusw %xmm1, %xmm2 +; SSE2-NEXT: pcmpeqw %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm3 +; SSE2-NEXT: pmovmskb %xmm3, %eax +; SSE2-NEXT: # kill: def $ax killed $ax killed $eax +; SSE2-NEXT: retq +; +; SSE42-LABEL: concat_icmp_v16i16_v8i16: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2] +; SSE42-NEXT: movdqa %xmm0, %xmm3 +; SSE42-NEXT: pmaxuw %xmm2, %xmm3 +; SSE42-NEXT: pcmpeqw %xmm0, %xmm3 +; SSE42-NEXT: pmaxuw %xmm1, %xmm2 +; SSE42-NEXT: pcmpeqw %xmm1, %xmm2 +; SSE42-NEXT: packsswb %xmm2, %xmm3 +; SSE42-NEXT: pmovmskb %xmm3, %eax +; SSE42-NEXT: # kill: def $ax killed $ax killed $eax +; SSE42-NEXT: retq +; +; AVX1-LABEL: concat_icmp_v16i16_v8i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2] +; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpmaxuw %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: # kill: def $ax killed $ax killed $eax +; AVX1-NEXT: retq +; +; AVX2-LABEL: concat_icmp_v16i16_v8i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2] +; AVX2-NEXT: vpmaxuw %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpmaxuw %xmm2, %xmm1, %xmm2 +; AVX2-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: # kill: def $ax killed $ax killed $eax +; AVX2-NEXT: retq +; +; AVX512-LABEL: concat_icmp_v16i16_v8i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1] +; AVX512-NEXT: vpcmpnleuw %xmm2, %xmm0, %k0 +; AVX512-NEXT: vpcmpnleuw %xmm2, %xmm1, %k1 +; AVX512-NEXT: kunpckbw %k0, %k1, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: retq + %v0 = icmp ugt <8 x i16> %a0, splat (i16 1) + %v1 = icmp ugt <8 x i16> %a1, splat (i16 1) + %v = shufflevector <8 x i1> %v0, <8 x i1> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %r = bitcast <16 x i1> %v to i16 + ret i16 %r +} + +define i32 @concat_icmp_v32i8_v16i8(<16 x i8> %a0, <16 x i8> %a1) { +; SSE-LABEL: concat_icmp_v32i8_v16i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; SSE-NEXT: pcmpgtb %xmm2, %xmm0 +; SSE-NEXT: pcmpgtb %xmm2, %xmm1 +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: pmovmskb %xmm1, %eax +; SSE-NEXT: shll $16, %eax +; SSE-NEXT: orl %ecx, %eax +; SSE-NEXT: retq +; +; AVX1-LABEL: concat_icmp_v32i8_v16i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: vpmovmskb %xmm1, %eax +; AVX1-NEXT: shll $16, %eax +; AVX1-NEXT: orl %ecx, %eax +; AVX1-NEXT: retq +; +; AVX2-LABEL: concat_icmp_v32i8_v16i8: +; AVX2: # %bb.0: +; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: concat_icmp_v32i8_v16i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX512-NEXT: vpcmpgtb %xmm2, %xmm0, %k0 +; AVX512-NEXT: vpcmpgtb %xmm2, %xmm1, %k1 +; AVX512-NEXT: kunpckwd %k0, %k1, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: retq + %v0 = icmp sgt <16 x i8> %a0, splat (i8 5) + %v1 = icmp sgt <16 x i8> %a1, splat (i8 5) + %v = shufflevector <16 x i1> %v0, <16 x i1> %v1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> + %r = bitcast <32 x i1> %v to i32 + ret i32 %r +} + +define i8 @concat_icmp_v8i64_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> %a3) { +; SSE2-LABEL: concat_icmp_v8i64_v2i64: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,2,2,3] +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483776,2147483776,2147483776,2147483648] +; SSE2-NEXT: movdqa %xmm5, %xmm7 +; SSE2-NEXT: pcmpgtd %xmm6, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,3,3] +; SSE2-NEXT: pcmpeqd %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm7, %xmm3 +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,2,2,3] +; SSE2-NEXT: movdqa %xmm5, %xmm7 +; SSE2-NEXT: pcmpgtd %xmm6, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,3,3] +; SSE2-NEXT: pcmpeqd %xmm4, %xmm2 +; SSE2-NEXT: pand %xmm7, %xmm2 +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,2,2,3] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,3,3] +; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 +; SSE2-NEXT: pand %xmm6, %xmm1 +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3] +; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,3,3] +; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 +; SSE2-NEXT: pand %xmm5, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: packsswb %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: # kill: def $al killed $al killed $eax +; SSE2-NEXT: retq +; +; SSE42-LABEL: concat_icmp_v8i64_v2i64: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; SSE42-NEXT: pxor %xmm4, %xmm0 +; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775936,9223372036854775936] +; SSE42-NEXT: movdqa %xmm5, %xmm6 +; SSE42-NEXT: pcmpgtq %xmm0, %xmm6 +; SSE42-NEXT: pxor %xmm4, %xmm1 +; SSE42-NEXT: movdqa %xmm5, %xmm0 +; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 +; SSE42-NEXT: packssdw %xmm0, %xmm6 +; SSE42-NEXT: pxor %xmm4, %xmm2 +; SSE42-NEXT: movdqa %xmm5, %xmm0 +; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 +; SSE42-NEXT: pxor %xmm4, %xmm3 +; SSE42-NEXT: pcmpgtq %xmm3, %xmm5 +; SSE42-NEXT: packssdw %xmm5, %xmm0 +; SSE42-NEXT: packssdw %xmm6, %xmm6 +; SSE42-NEXT: packssdw %xmm0, %xmm0 +; SSE42-NEXT: packsswb %xmm0, %xmm6 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,3,2,3] +; SSE42-NEXT: pmovmskb %xmm0, %eax +; SSE42-NEXT: # kill: def $al killed $al killed $eax +; SSE42-NEXT: retq +; +; AVX1-LABEL: concat_icmp_v8i64_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: # xmm4 = mem[0,0] +; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [9223372036854775936,9223372036854775936] +; AVX1-NEXT: # xmm5 = mem[0,0] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm5, %xmm0 +; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm5, %xmm2 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpackssdw %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,3] +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: # kill: def $al killed $al killed $eax +; AVX1-NEXT: retq +; +; AVX2-LABEL: concat_icmp_v8i64_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm4, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm5 = [9223372036854775936,9223372036854775936] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm5, %xmm0 +; AVX2-NEXT: vpxor %xmm4, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm5, %xmm1 +; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm4, %xmm2, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm5, %xmm1 +; AVX2-NEXT: vpxor %xmm4, %xmm3, %xmm2 +; AVX2-NEXT: vpcmpgtq %xmm2, %xmm5, %xmm2 +; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpackssdw %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,3] +; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: retq +; +; AVX512-LABEL: concat_icmp_v8i64_v2i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm4 = [128,128] +; AVX512-NEXT: vpcmpltuq %xmm4, %xmm0, %k0 +; AVX512-NEXT: vpcmpltuq %xmm4, %xmm1, %k1 +; AVX512-NEXT: vpcmpltuq %xmm4, %xmm2, %k2 +; AVX512-NEXT: vpcmpltuq %xmm4, %xmm3, %k3 +; AVX512-NEXT: kshiftlb $2, %k3, %k3 +; AVX512-NEXT: korb %k3, %k2, %k2 +; AVX512-NEXT: kshiftlb $4, %k2, %k2 +; AVX512-NEXT: kshiftlb $2, %k1, %k1 +; AVX512-NEXT: korw %k1, %k0, %k0 +; AVX512-NEXT: kshiftlb $4, %k0, %k0 +; AVX512-NEXT: kshiftrb $4, %k0, %k0 +; AVX512-NEXT: korb %k2, %k0, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq + %v0 = icmp ult <2 x i64> %a0, splat (i64 128) + %v1 = icmp ult <2 x i64> %a1, splat (i64 128) + %v2 = icmp ult <2 x i64> %a2, splat (i64 128) + %v3 = icmp ult <2 x i64> %a3, splat (i64 128) + %v01 = shufflevector <2 x i1> %v0, <2 x i1> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %v23 = shufflevector <2 x i1> %v2, <2 x i1> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %v = shufflevector <4 x i1> %v01, <4 x i1> %v23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %r = bitcast <8 x i1> %v to i8 + ret i8 %r +} + +define i16 @concat_icmp_v16i32_v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) { +; SSE-LABEL: concat_icmp_v16i32_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm4, %xmm4 +; SSE-NEXT: pcmpgtd %xmm4, %xmm0 +; SSE-NEXT: pcmpgtd %xmm4, %xmm1 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: pcmpgtd %xmm4, %xmm2 +; SSE-NEXT: pcmpgtd %xmm4, %xmm3 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: packsswb %xmm2, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: # kill: def $ax killed $ax killed $eax +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: concat_icmp_v16i32_v4i32: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1OR2-NEXT: vpcmpgtd %xmm4, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpcmpgtd %xmm4, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpcmpgtd %xmm4, %xmm2, %xmm1 +; AVX1OR2-NEXT: vpcmpgtd %xmm4, %xmm3, %xmm2 +; AVX1OR2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: # kill: def $ax killed $ax killed $eax +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: concat_icmp_v16i32_v4i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX512-NEXT: vpcmpgtd %xmm4, %xmm0, %k0 +; AVX512-NEXT: vpcmpgtd %xmm4, %xmm1, %k1 +; AVX512-NEXT: vpcmpgtd %xmm4, %xmm2, %k2 +; AVX512-NEXT: vpcmpgtd %xmm4, %xmm3, %k3 +; AVX512-NEXT: kshiftlb $4, %k1, %k1 +; AVX512-NEXT: korb %k1, %k0, %k0 +; AVX512-NEXT: kshiftlb $4, %k3, %k1 +; AVX512-NEXT: korb %k1, %k2, %k1 +; AVX512-NEXT: kunpckbw %k0, %k1, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: retq + %v0 = icmp sgt <4 x i32> %a0, zeroinitializer + %v1 = icmp sgt <4 x i32> %a1, zeroinitializer + %v2 = icmp sgt <4 x i32> %a2, zeroinitializer + %v3 = icmp sgt <4 x i32> %a3, zeroinitializer + %v01 = shufflevector <4 x i1> %v0, <4 x i1> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %v23 = shufflevector <4 x i1> %v2, <4 x i1> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %v = shufflevector <8 x i1> %v01, <8 x i1> %v23, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %r = bitcast <16 x i1> %v to i16 + ret i16 %r +} + +define i32 @concat_icmp_v32i16_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) { +; SSE-LABEL: concat_icmp_v32i16_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm4, %xmm4 +; SSE-NEXT: pcmpeqw %xmm4, %xmm0 +; SSE-NEXT: pcmpeqw %xmm4, %xmm1 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pcmpeqw %xmm4, %xmm2 +; SSE-NEXT: pcmpeqw %xmm4, %xmm3 +; SSE-NEXT: packsswb %xmm3, %xmm2 +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: xorl $65535, %ecx # imm = 0xFFFF +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: notl %eax +; SSE-NEXT: shll $16, %eax +; SSE-NEXT: orl %ecx, %eax +; SSE-NEXT: retq +; +; AVX1-LABEL: concat_icmp_v32i16_v8i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm2 +; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: xorl $65535, %ecx # imm = 0xFFFF +; AVX1-NEXT: vpmovmskb %xmm1, %eax +; AVX1-NEXT: notl %eax +; AVX1-NEXT: shll $16, %eax +; AVX1-NEXT: orl %ecx, %eax +; AVX1-NEXT: retq +; +; AVX2-LABEL: concat_icmp_v32i16_v8i16: +; AVX2: # %bb.0: +; AVX2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 +; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpacksswb %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 +; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm2 +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpacksswb %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm0 +; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3],ymm0[4,5],ymm2[6,7] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: concat_icmp_v32i16_v8i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vptestmw %xmm0, %xmm0, %k0 +; AVX512-NEXT: vptestmw %xmm1, %xmm1, %k1 +; AVX512-NEXT: vptestmw %xmm2, %xmm2, %k2 +; AVX512-NEXT: vptestmw %xmm3, %xmm3, %k3 +; AVX512-NEXT: kunpckbw %k0, %k1, %k0 +; AVX512-NEXT: kunpckbw %k2, %k3, %k1 +; AVX512-NEXT: ... [truncated] 
@RKSimon RKSimon enabled auto-merge (squash) December 1, 2025 22:22
@RKSimon RKSimon merged commit 9324dae into llvm:main Dec 1, 2025
11 of 12 checks passed
@RKSimon RKSimon deleted the x86-concat-icmp-tests branch December 2, 2025 09:59
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

2 participants