[SDAG] Drop select -> fmax/min folding in SelectionDAGBuilder #93575

dtcxzyw · 2024-05-28T16:35:53Z

As we already handle this pattern in InstCombine/DAGCombiner, I think it is ok to drop this folding in SelectionDAGBuilder to handle signed zero correctly.

Fixes #93414.

llvmbot · 2024-05-28T16:36:24Z

@llvm/pr-subscribers-backend-arm
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-backend-systemz
@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-selectiondag

Author: Yingwei Zheng (dtcxzyw)

Changes

As we already handle this pattern in InstCombine/DAGCombiner, I think it is ok to drop this folding in SelectionDAGBuilder to handle signed zero correctly.

Fixes #93414.

Full diff: https://github.com/llvm/llvm-project/pull/93575.diff

6 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+2-26)
(modified) llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll (+3-2)
(modified) llvm/test/CodeGen/AArch64/arm64-fmax.ll (+6-3)
(modified) llvm/test/CodeGen/AArch64/select_fmf.ll (+13-12)
(modified) llvm/test/CodeGen/AArch64/sve-pred-selectop.ll (+16-12)
(modified) llvm/test/CodeGen/RISCV/float-select-fcmp.ll (+26)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ca352da5d36eb..799f748fb786e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3725,32 +3725,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) { case SPF_UMAX: Opc = ISD::UMAX; break; case SPF_UMIN: Opc = ISD::UMIN; break; case SPF_SMAX: Opc = ISD::SMAX; break; - case SPF_SMIN: Opc = ISD::SMIN; break; - case SPF_FMINNUM: - switch (SPR.NaNBehavior) { - case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); - case SPNB_RETURNS_NAN: break; - case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; - case SPNB_RETURNS_ANY: - if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) || - (UseScalarMinMax && - TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()))) - Opc = ISD::FMINNUM; - break; - } - break; - case SPF_FMAXNUM: - switch (SPR.NaNBehavior) { - case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); - case SPNB_RETURNS_NAN: break; - case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; - case SPNB_RETURNS_ANY: - if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) || - (UseScalarMinMax && - TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()))) - Opc = ISD::FMAXNUM; - break; - } + case SPF_SMIN: + Opc = ISD::SMIN; break; case SPF_NABS: Negate = true; diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll index 550e89f4a27f9..b96c3ffbd52a8 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll @@ -23,7 +23,7 @@ define double @test_cross(float %in) { } ; Same as previous, but with ordered comparison; -; must become fminnm, not fmin. +; must become fcmp + fcsel, not fmin/fminnm. define double @test_cross_fail_nan(float %in) { ; CHECK-LABEL: test_cross_fail_nan: %cmp = fcmp olt float %in, 0.000000e+00 @@ -31,7 +31,8 @@ define double @test_cross_fail_nan(float %in) { %longer = fpext float %val to double ret double %longer -; CHECK: fminnm s +; CHECK: fcmp +; CHECK: fcsel } ; This isn't a min or a max, but passes the first condition for swapping the diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll index d7d54a6e48a92..b2fd4821cc6eb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll @@ -5,7 +5,8 @@ define double @test_direct(float %in) { ; CHECK-LABEL: test_direct: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fmaxnm s0, s0, s1 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: fcsel s0, s1, s0, lt ; CHECK-NEXT: fcvt d0, s0 ; CHECK-NEXT: ret %cmp = fcmp nnan olt float %in, 0.000000e+00 @@ -18,7 +19,8 @@ define double @test_cross(float %in) { ; CHECK-LABEL: test_cross: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: fcsel s0, s0, s1, lt ; CHECK-NEXT: fcvt d0, s0 ; CHECK-NEXT: ret %cmp = fcmp nnan ult float %in, 0.000000e+00 @@ -33,7 +35,8 @@ define double @test_cross_fail_nan(float %in) { ; CHECK-LABEL: test_cross_fail_nan: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: fcsel s0, s0, s1, lt ; CHECK-NEXT: fcvt d0, s0 ; CHECK-NEXT: ret %cmp = fcmp nnan olt float %in, 0.000000e+00 diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll index 92d8676ca04be..938217180676f 100644 --- a/llvm/test/CodeGen/AArch64/select_fmf.ll +++ b/llvm/test/CodeGen/AArch64/select_fmf.ll @@ -7,13 +7,14 @@ define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: select_select_fold_select_and: ; CHECK: // %bb.0: -; CHECK-NEXT: fminnm s4, s1, s2 +; CHECK-NEXT: fcmp s0, s3 +; CHECK-NEXT: fcsel s4, s0, s3, gt ; CHECK-NEXT: fcmp s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s3 -; CHECK-NEXT: fmov s1, #0.50000000 -; CHECK-NEXT: fccmp s4, s0, #4, lt -; CHECK-NEXT: fadd s1, s0, s1 -; CHECK-NEXT: fcsel s2, s2, s0, gt +; CHECK-NEXT: fcsel s1, s1, s2, lt +; CHECK-NEXT: fmov s2, #0.50000000 +; CHECK-NEXT: fccmp s1, s0, #4, lt +; CHECK-NEXT: fadd s1, s0, s2 +; CHECK-NEXT: fcsel s2, s4, s0, gt ; CHECK-NEXT: fadd s4, s1, s2 ; CHECK-NEXT: fcmp s4, s1 ; CHECK-NEXT: b.le .LBB0_2 @@ -65,13 +66,13 @@ exit: ; preds = %if.end.i159.i.i, %if.then.i define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: select_select_fold_select_or: ; CHECK: // %bb.0: -; CHECK-NEXT: fminnm s4, s1, s2 ; CHECK-NEXT: fcmp s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s3 -; CHECK-NEXT: fmov s1, #0.50000000 -; CHECK-NEXT: fccmp s4, s0, #0, ge -; CHECK-NEXT: fadd s1, s0, s1 -; CHECK-NEXT: fcsel s2, s0, s2, gt +; CHECK-NEXT: fcsel s1, s1, s2, lt +; CHECK-NEXT: fccmp s0, s3, #0, ge +; CHECK-NEXT: fmov s2, #0.50000000 +; CHECK-NEXT: fccmp s1, s0, #0, le +; CHECK-NEXT: fadd s1, s0, s2 +; CHECK-NEXT: fcsel s2, s0, s3, gt ; CHECK-NEXT: fadd s4, s1, s2 ; CHECK-NEXT: fcmp s4, s1 ; CHECK-NEXT: b.le .LBB1_2 diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll index 8438e9d88f5de..6c3ff79f911bc 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll @@ -659,9 +659,10 @@ define <vscale x 4 x float> @fcmp_fast_olt_v4f32(<vscale x 4 x float> %z, <vscal ; CHECK-LABEL: fcmp_fast_olt_v4f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: mov z0.s, p1/m, z1.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z2.s, z1.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s +; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer @@ -675,9 +676,10 @@ define <vscale x 8 x half> @fcmp_fast_olt_v8f16(<vscale x 8 x half> %z, <vscale ; CHECK-LABEL: fcmp_fast_olt_v8f16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: mov z0.h, p1/m, z1.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z1.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h +; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer @@ -691,9 +693,10 @@ define <vscale x 4 x float> @fcmp_fast_ogt_v4f32(<vscale x 4 x float> %z, <vscal ; CHECK-LABEL: fcmp_fast_ogt_v4f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: mov z0.s, p1/m, z1.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s +; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer @@ -707,9 +710,10 @@ define <vscale x 8 x half> @fcmp_fast_ogt_v8f16(<vscale x 8 x half> %z, <vscale ; CHECK-LABEL: fcmp_fast_ogt_v8f16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: mov z0.h, p1/m, z1.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h +; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll index a2ff0d33e2d31..9e8ffb0104340 100644 --- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll @@ -451,3 +451,29 @@ define signext i32 @select_fcmp_uge_1_2(float %a, float %b) nounwind { %2 = select i1 %1, i32 1, i32 2 ret i32 %2 } + +; Test from PR93414 +; Make sure that we don't use fmin.s here to handle signed zero correctly. +define float @select_fcmp_olt_pos_zero(float %x) { +; CHECK-LABEL: select_fcmp_olt_pos_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.w.x fa5, zero +; CHECK-NEXT: flt.s a0, fa0, fa5 +; CHECK-NEXT: bnez a0, .LBB20_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.s fa0, fa5 +; CHECK-NEXT: .LBB20_2: +; CHECK-NEXT: ret +; +; CHECKZFINX-LABEL: select_fcmp_olt_pos_zero: +; CHECKZFINX: # %bb.0: +; CHECKZFINX-NEXT: flt.s a1, a0, zero +; CHECKZFINX-NEXT: bnez a1, .LBB20_2 +; CHECKZFINX-NEXT: # %bb.1: +; CHECKZFINX-NEXT: li a0, 0 +; CHECKZFINX-NEXT: .LBB20_2: +; CHECKZFINX-NEXT: ret + %cmp = fcmp olt float %x, 0.000000 + %sel = select i1 %cmp, float %x, float 0.000000 + ret float %sel +}

arsenm · 2024-05-29T10:39:08Z

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

- if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
- (UseScalarMinMax &&
- TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType())))
- Opc = ISD::FMINNUM;


I just noticed we don't actually have the DAG combine to form nnan select -> minimum/maximum. Ideally we would implement that before dropping this

I just noticed we don't actually have the DAG combine to form nnan select -> minimum/maximum. Ideally we would implement that before dropping this

Are you working on this?

Not yet, I was considering it

Are you going to work on this? If not I can probably pick this up

dtcxzyw · 2024-06-25T11:55:08Z

Failed Tests (19):
LLVM :: CodeGen/AMDGPU/fmed3.ll
LLVM :: CodeGen/AMDGPU/reduction.ll
LLVM :: CodeGen/ARM/fp16-vminmaxnm-safe.ll
LLVM :: CodeGen/ARM/fp16-vminmaxnm-vector.ll
LLVM :: CodeGen/ARM/fp16-vminmaxnm.ll
LLVM :: CodeGen/ARM/minnum-maxnum-intrinsics.ll
LLVM :: CodeGen/ARM/vminmaxnm-safe.ll
LLVM :: CodeGen/ARM/vminmaxnm.ll
LLVM :: CodeGen/PowerPC/vec-min-max.ll
LLVM :: CodeGen/PowerPC/vector-reduce-fmax.ll
LLVM :: CodeGen/PowerPC/vector-reduce-fmin.ll
LLVM :: CodeGen/SystemZ/vec-max-05.ll
LLVM :: CodeGen/SystemZ/vec-max-min-zerosplat.ll
LLVM :: CodeGen/SystemZ/vec-min-05.ll
LLVM :: CodeGen/Thumb2/mve-minmax.ll
LLVM :: CodeGen/Thumb2/mve-pred-selectop.ll
LLVM :: CodeGen/Thumb2/mve-pred-selectop2.ll
LLVM :: CodeGen/Thumb2/mve-pred-selectop3.ll
LLVM :: CodeGen/Thumb2/mve-vecreduce-fminmax.ll

dtcxzyw · 2024-06-25T11:57:52Z

@arsenm Have all (potential) regressions been fixed?

arsenm · 2024-06-25T13:26:15Z

@arsenm Have all (potential) regressions been fixed?

No. We at least should have the nnan cases to minimum/maximum combine handled

dtcxzyw · 2024-06-29T15:42:37Z

@arsenm Have all (potential) regressions been fixed?

No. We at least should have the nnan cases to minimum/maximum combine handled

@arsenm Ping me after most of the regressions are addressed :)

arsenm · 2024-07-02T14:00:15Z

No. We at least should have the nnan cases to minimum/maximum combine handled

@arsenm Ping me after most of the regressions are addressed :)

These are only theoretical regressions and I don't think should hold this up. It would only be helpful for targets that 1. only have legal fminimum/fmaximum and no legal fmin/fmax, and also those patterns appear during legalization. I believe wasm is the only relevant target for 1

dtcxzyw · 2024-07-02T16:37:35Z

@arsenm All assertion failures are fixed. Can I merge this PR?

arsenm · 2024-07-02T18:05:31Z

llvm/test/CodeGen/AMDGPU/fmed3.ll

 ; GFX9-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1
-; GFX9-SDAG-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
+; GFX9-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v1
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc


This regression actually doesn't look good. I need to look at this, but running instcombine on the test first doesn't recover the v_med3_f32

dtcxzyw added 2 commits May 28, 2024 23:45

[RISCV][SDAG] Add pre-commit tests for PR93414. NFC.

fadab92

[SDAG] Drop select -> fmax/min folding in SDAGBuilder

eeda142

dtcxzyw requested review from RKSimon, andykaylor, arsenm, jcranmer-intel and topperc May 28, 2024 16:35

llvmbot added backend:AArch64 llvm:SelectionDAG SelectionDAGISel as well labels May 28, 2024

arsenm reviewed May 29, 2024

View reviewed changes

quaternic mentioned this pull request Jun 17, 2024

Floating point comparisons are miscompiled for signalling NaN inputs on AArch64 rust-lang/rust#110174

Closed

arsenm approved these changes Jun 25, 2024

View reviewed changes

dtcxzyw added 2 commits June 29, 2024 22:48

Merge branch 'main' into fix-93414

e081184

[SDAG] Fix failed tests.

824540d

llvmbot added backend:ARM backend:AMDGPU backend:PowerPC backend:SystemZ labels Jun 29, 2024

arsenm reviewed Jul 2, 2024

View reviewed changes

dtcxzyw mentioned this pull request Dec 13, 2024

likely FP miscompile from AArch64 backend #119792

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[SDAG] Drop select -> fmax/min folding in SelectionDAGBuilder #93575

[SDAG] Drop select -> fmax/min folding in SelectionDAGBuilder #93575

Uh oh!

dtcxzyw commented May 28, 2024

llvmbot commented May 28, 2024 •

edited

Loading

arsenm May 29, 2024

dtcxzyw Jun 5, 2024

arsenm Jun 6, 2024

arsenm Jun 25, 2024

dtcxzyw Jun 25, 2024

dtcxzyw commented Jun 25, 2024

dtcxzyw commented Jun 25, 2024

arsenm commented Jun 25, 2024

dtcxzyw commented Jun 29, 2024

arsenm commented Jul 2, 2024

dtcxzyw commented Jul 2, 2024

arsenm Jul 2, 2024

Labels

3 participants

[SDAG] Drop select -> fmax/min folding in SelectionDAGBuilder #93575

Are you sure you want to change the base?

[SDAG] Drop select -> fmax/min folding in SelectionDAGBuilder #93575

Uh oh!

Conversation

dtcxzyw commented May 28, 2024

llvmbot commented May 28, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

arsenm May 29, 2024

Choose a reason for hiding this comment

dtcxzyw Jun 5, 2024

Choose a reason for hiding this comment

arsenm Jun 6, 2024

Choose a reason for hiding this comment

arsenm Jun 25, 2024

Choose a reason for hiding this comment

dtcxzyw Jun 25, 2024

Choose a reason for hiding this comment

dtcxzyw commented Jun 25, 2024

dtcxzyw commented Jun 25, 2024

arsenm commented Jun 25, 2024

dtcxzyw commented Jun 29, 2024

arsenm commented Jul 2, 2024

dtcxzyw commented Jul 2, 2024

arsenm Jul 2, 2024

Choose a reason for hiding this comment

Labels

3 participants

llvmbot commented May 28, 2024 •

edited

Loading