Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 2 additions & 26 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3734,32 +3734,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPF_UMAX: Opc = ISD::UMAX; break;
case SPF_UMIN: Opc = ISD::UMIN; break;
case SPF_SMAX: Opc = ISD::SMAX; break;
case SPF_SMIN: Opc = ISD::SMIN; break;
case SPF_FMINNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
(UseScalarMinMax &&
TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType())))
Opc = ISD::FMINNUM;
Comment on lines -3744 to -3747
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just noticed we don't actually have the DAG combine to form nnan select -> minimum/maximum. Ideally we would implement that before dropping this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just noticed we don't actually have the DAG combine to form nnan select -> minimum/maximum. Ideally we would implement that before dropping this

Are you working on this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not yet, I was considering it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you going to work on this? If not I can probably pick this up

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No.

break;
}
break;
case SPF_FMAXNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
(UseScalarMinMax &&
TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType())))
Opc = ISD::FMAXNUM;
break;
}
case SPF_SMIN:
Opc = ISD::SMIN;
break;
case SPF_NABS:
Negate = true;
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@ define double @test_cross(float %in) {
}

; Same as previous, but with ordered comparison;
; must become fminnm, not fmin.
; must become fcmp + fcsel, not fmin/fminnm.
define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
%cmp = fcmp olt float %in, 0.000000e+00
%val = select i1 %cmp, float %in, float 0.000000e+00
%longer = fpext float %val to double
ret double %longer

; CHECK: fminnm s
; CHECK: fcmp
; CHECK: fcsel
}

; This isn't a min or a max, but passes the first condition for swapping the
Expand Down
9 changes: 6 additions & 3 deletions llvm/test/CodeGen/AArch64/arm64-fmax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ define double @test_direct(float %in) {
; CHECK-LABEL: test_direct:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: fcsel s0, s1, s0, lt
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan olt float %in, 0.000000e+00
Expand All @@ -18,7 +19,8 @@ define double @test_cross(float %in) {
; CHECK-LABEL: test_cross:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fminnm s0, s0, s1
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: fcsel s0, s0, s1, lt
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan ult float %in, 0.000000e+00
Expand All @@ -33,7 +35,8 @@ define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fminnm s0, s0, s1
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: fcsel s0, s0, s1, lt
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan olt float %in, 0.000000e+00
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AArch64/select_fmf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) {
; CHECK-LABEL: select_select_fold_select_and:
; CHECK: // %bb.0:
; CHECK-NEXT: fminnm s4, s1, s2
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: fmov s4, #0.50000000
; CHECK-NEXT: fcsel s1, s1, s2, lt
; CHECK-NEXT: fmaxnm s2, s0, s3
; CHECK-NEXT: fmov s1, #0.50000000
; CHECK-NEXT: fccmp s4, s0, #4, lt
; CHECK-NEXT: fadd s1, s0, s1
; CHECK-NEXT: fccmp s1, s0, #4, lt
; CHECK-NEXT: fadd s1, s0, s4
; CHECK-NEXT: fcsel s2, s2, s0, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
Expand Down Expand Up @@ -65,13 +65,13 @@ exit: ; preds = %if.end.i159.i.i, %if.then.i
define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) {
; CHECK-LABEL: select_select_fold_select_or:
; CHECK: // %bb.0:
; CHECK-NEXT: fminnm s4, s1, s2
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: fmaxnm s2, s0, s3
; CHECK-NEXT: fmov s1, #0.50000000
; CHECK-NEXT: fccmp s4, s0, #0, ge
; CHECK-NEXT: fadd s1, s0, s1
; CHECK-NEXT: fcsel s2, s0, s2, gt
; CHECK-NEXT: fcsel s1, s1, s2, lt
; CHECK-NEXT: fccmp s0, s3, #0, ge
; CHECK-NEXT: fmov s2, #0.50000000
; CHECK-NEXT: fccmp s1, s0, #0, le
; CHECK-NEXT: fadd s1, s0, s2
; CHECK-NEXT: fcsel s2, s0, s3, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
; CHECK-NEXT: b.le .LBB1_2
Expand Down
28 changes: 16 additions & 12 deletions llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -659,9 +659,10 @@ define <vscale x 4 x float> @fcmp_fast_olt_v4f32(<vscale x 4 x float> %z, <vscal
; CHECK-LABEL: fcmp_fast_olt_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: fcmgt p1.s, p0/z, z2.s, z1.s
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
entry:
%c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer
Expand All @@ -675,9 +676,10 @@ define <vscale x 8 x half> @fcmp_fast_olt_v8f16(<vscale x 8 x half> %z, <vscale
; CHECK-LABEL: fcmp_fast_olt_v8f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0
; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z1.h
; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0
; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
entry:
%c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer
Expand All @@ -691,9 +693,10 @@ define <vscale x 4 x float> @fcmp_fast_ogt_v4f32(<vscale x 4 x float> %z, <vscal
; CHECK-LABEL: fcmp_fast_ogt_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
entry:
%c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer
Expand All @@ -707,9 +710,10 @@ define <vscale x 8 x half> @fcmp_fast_ogt_v8f16(<vscale x 8 x half> %z, <vscale
; CHECK-LABEL: fcmp_fast_ogt_v8f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0
; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0
; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
entry:
%c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer
Expand Down
17 changes: 13 additions & 4 deletions llvm/test/CodeGen/AMDGPU/fmed3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1016,7 +1016,10 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out,
; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3
; VI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0
; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v2
; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 2.0, v2, vcc
; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v2
; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 4.0, v2, vcc
; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
; VI-SDAG-NEXT: s_endpgm
;
Expand Down Expand Up @@ -1051,7 +1054,10 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out,
; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1
; GFX9-SDAG-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
; GFX9-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v1
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This regression actually doesn't look good. I need to look at this, but running instcombine on the test first doesn't recover the v_med3_f32

; GFX9-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v1
; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-SDAG-NEXT: s_endpgm
;
Expand All @@ -1078,8 +1084,11 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out,
; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_med3_f32 v1, v1, 2.0, 4.0
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 2.0, v1
; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc_lo
; GFX11-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 4.0, v1
; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand Down
Loading