- Notifications
You must be signed in to change notification settings - Fork 15.3k
[SDAG] Drop select -> fmax/min folding in SelectionDAGBuilder #93575
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
| @llvm/pr-subscribers-backend-arm @llvm/pr-subscribers-llvm-selectiondag Author: Yingwei Zheng (dtcxzyw) ChangesAs we already handle this pattern in InstCombine/DAGCombiner, I think it is ok to drop this folding in SelectionDAGBuilder to handle signed zero correctly. Fixes #93414. Full diff: https://github.com/llvm/llvm-project/pull/93575.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ca352da5d36eb..799f748fb786e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3725,32 +3725,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) { case SPF_UMAX: Opc = ISD::UMAX; break; case SPF_UMIN: Opc = ISD::UMIN; break; case SPF_SMAX: Opc = ISD::SMAX; break; - case SPF_SMIN: Opc = ISD::SMIN; break; - case SPF_FMINNUM: - switch (SPR.NaNBehavior) { - case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); - case SPNB_RETURNS_NAN: break; - case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; - case SPNB_RETURNS_ANY: - if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) || - (UseScalarMinMax && - TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()))) - Opc = ISD::FMINNUM; - break; - } - break; - case SPF_FMAXNUM: - switch (SPR.NaNBehavior) { - case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); - case SPNB_RETURNS_NAN: break; - case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; - case SPNB_RETURNS_ANY: - if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) || - (UseScalarMinMax && - TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()))) - Opc = ISD::FMAXNUM; - break; - } + case SPF_SMIN: + Opc = ISD::SMIN; break; case SPF_NABS: Negate = true; diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll index 550e89f4a27f9..b96c3ffbd52a8 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll @@ -23,7 +23,7 @@ define double @test_cross(float %in) { } ; Same as previous, but with ordered comparison; -; must become fminnm, not fmin. +; must become fcmp + fcsel, not fmin/fminnm. define double @test_cross_fail_nan(float %in) { ; CHECK-LABEL: test_cross_fail_nan: %cmp = fcmp olt float %in, 0.000000e+00 @@ -31,7 +31,8 @@ define double @test_cross_fail_nan(float %in) { %longer = fpext float %val to double ret double %longer -; CHECK: fminnm s +; CHECK: fcmp +; CHECK: fcsel } ; This isn't a min or a max, but passes the first condition for swapping the diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll index d7d54a6e48a92..b2fd4821cc6eb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll @@ -5,7 +5,8 @@ define double @test_direct(float %in) { ; CHECK-LABEL: test_direct: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fmaxnm s0, s0, s1 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: fcsel s0, s1, s0, lt ; CHECK-NEXT: fcvt d0, s0 ; CHECK-NEXT: ret %cmp = fcmp nnan olt float %in, 0.000000e+00 @@ -18,7 +19,8 @@ define double @test_cross(float %in) { ; CHECK-LABEL: test_cross: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: fcsel s0, s0, s1, lt ; CHECK-NEXT: fcvt d0, s0 ; CHECK-NEXT: ret %cmp = fcmp nnan ult float %in, 0.000000e+00 @@ -33,7 +35,8 @@ define double @test_cross_fail_nan(float %in) { ; CHECK-LABEL: test_cross_fail_nan: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: fcsel s0, s0, s1, lt ; CHECK-NEXT: fcvt d0, s0 ; CHECK-NEXT: ret %cmp = fcmp nnan olt float %in, 0.000000e+00 diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll index 92d8676ca04be..938217180676f 100644 --- a/llvm/test/CodeGen/AArch64/select_fmf.ll +++ b/llvm/test/CodeGen/AArch64/select_fmf.ll @@ -7,13 +7,14 @@ define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: select_select_fold_select_and: ; CHECK: // %bb.0: -; CHECK-NEXT: fminnm s4, s1, s2 +; CHECK-NEXT: fcmp s0, s3 +; CHECK-NEXT: fcsel s4, s0, s3, gt ; CHECK-NEXT: fcmp s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s3 -; CHECK-NEXT: fmov s1, #0.50000000 -; CHECK-NEXT: fccmp s4, s0, #4, lt -; CHECK-NEXT: fadd s1, s0, s1 -; CHECK-NEXT: fcsel s2, s2, s0, gt +; CHECK-NEXT: fcsel s1, s1, s2, lt +; CHECK-NEXT: fmov s2, #0.50000000 +; CHECK-NEXT: fccmp s1, s0, #4, lt +; CHECK-NEXT: fadd s1, s0, s2 +; CHECK-NEXT: fcsel s2, s4, s0, gt ; CHECK-NEXT: fadd s4, s1, s2 ; CHECK-NEXT: fcmp s4, s1 ; CHECK-NEXT: b.le .LBB0_2 @@ -65,13 +66,13 @@ exit: ; preds = %if.end.i159.i.i, %if.then.i define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: select_select_fold_select_or: ; CHECK: // %bb.0: -; CHECK-NEXT: fminnm s4, s1, s2 ; CHECK-NEXT: fcmp s1, s2 -; CHECK-NEXT: fmaxnm s2, s0, s3 -; CHECK-NEXT: fmov s1, #0.50000000 -; CHECK-NEXT: fccmp s4, s0, #0, ge -; CHECK-NEXT: fadd s1, s0, s1 -; CHECK-NEXT: fcsel s2, s0, s2, gt +; CHECK-NEXT: fcsel s1, s1, s2, lt +; CHECK-NEXT: fccmp s0, s3, #0, ge +; CHECK-NEXT: fmov s2, #0.50000000 +; CHECK-NEXT: fccmp s1, s0, #0, le +; CHECK-NEXT: fadd s1, s0, s2 +; CHECK-NEXT: fcsel s2, s0, s3, gt ; CHECK-NEXT: fadd s4, s1, s2 ; CHECK-NEXT: fcmp s4, s1 ; CHECK-NEXT: b.le .LBB1_2 diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll index 8438e9d88f5de..6c3ff79f911bc 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll @@ -659,9 +659,10 @@ define <vscale x 4 x float> @fcmp_fast_olt_v4f32(<vscale x 4 x float> %z, <vscal ; CHECK-LABEL: fcmp_fast_olt_v4f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: mov z0.s, p1/m, z1.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z2.s, z1.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s +; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer @@ -675,9 +676,10 @@ define <vscale x 8 x half> @fcmp_fast_olt_v8f16(<vscale x 8 x half> %z, <vscale ; CHECK-LABEL: fcmp_fast_olt_v8f16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: mov z0.h, p1/m, z1.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z1.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h +; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer @@ -691,9 +693,10 @@ define <vscale x 4 x float> @fcmp_fast_ogt_v4f32(<vscale x 4 x float> %z, <vscal ; CHECK-LABEL: fcmp_fast_ogt_v4f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: mov z0.s, p1/m, z1.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s +; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer @@ -707,9 +710,10 @@ define <vscale x 8 x half> @fcmp_fast_ogt_v8f16(<vscale x 8 x half> %z, <vscale ; CHECK-LABEL: fcmp_fast_ogt_v8f16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: mov z0.h, p1/m, z1.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h +; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll index a2ff0d33e2d31..9e8ffb0104340 100644 --- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll @@ -451,3 +451,29 @@ define signext i32 @select_fcmp_uge_1_2(float %a, float %b) nounwind { %2 = select i1 %1, i32 1, i32 2 ret i32 %2 } + +; Test from PR93414 +; Make sure that we don't use fmin.s here to handle signed zero correctly. +define float @select_fcmp_olt_pos_zero(float %x) { +; CHECK-LABEL: select_fcmp_olt_pos_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.w.x fa5, zero +; CHECK-NEXT: flt.s a0, fa0, fa5 +; CHECK-NEXT: bnez a0, .LBB20_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.s fa0, fa5 +; CHECK-NEXT: .LBB20_2: +; CHECK-NEXT: ret +; +; CHECKZFINX-LABEL: select_fcmp_olt_pos_zero: +; CHECKZFINX: # %bb.0: +; CHECKZFINX-NEXT: flt.s a1, a0, zero +; CHECKZFINX-NEXT: bnez a1, .LBB20_2 +; CHECKZFINX-NEXT: # %bb.1: +; CHECKZFINX-NEXT: li a0, 0 +; CHECKZFINX-NEXT: .LBB20_2: +; CHECKZFINX-NEXT: ret + %cmp = fcmp olt float %x, 0.000000 + %sel = select i1 %cmp, float %x, float 0.000000 + ret float %sel +} |
| if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) || | ||
| (UseScalarMinMax && | ||
| TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()))) | ||
| Opc = ISD::FMINNUM; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just noticed we don't actually have the DAG combine to form nnan select -> minimum/maximum. Ideally we would implement that before dropping this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just noticed we don't actually have the DAG combine to form nnan select -> minimum/maximum. Ideally we would implement that before dropping this
Are you working on this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not yet, I was considering it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you going to work on this? If not I can probably pick this up
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No.
|
| @arsenm Have all (potential) regressions been fixed? |
No. We at least should have the nnan cases to minimum/maximum combine handled |
These are only theoretical regressions and I don't think should hold this up. It would only be helpful for targets that 1. only have legal fminimum/fmaximum and no legal fmin/fmax, and also those patterns appear during legalization. I believe wasm is the only relevant target for 1 |
| @arsenm All assertion failures are fixed. Can I merge this PR? |
| ; GFX9-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1 | ||
| ; GFX9-SDAG-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 | ||
| ; GFX9-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v1 | ||
| ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This regression actually doesn't look good. I need to look at this, but running instcombine on the test first doesn't recover the v_med3_f32
As we already handle this pattern in InstCombine/DAGCombiner, I think it is ok to drop this folding in SelectionDAGBuilder to handle signed zero correctly.
Fixes #93414.