- Notifications
You must be signed in to change notification settings - Fork 15.3k
DAG: Implement promotion for strict_fpextend #74310
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
| @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesTest is a placeholder, will be merged into the existing test after additional bug fixes for illegal f16 targets are fixed. Full diff: https://github.com/llvm/llvm-project/pull/74310.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 630aa4a07d7b9..f77b3afccfb8f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2214,6 +2214,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_UINT_SAT: R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break; case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break; + case ISD::STRICT_FP_EXTEND: + R = PromoteFloatOp_STRICT_FP_EXTEND(N, OpNo); + break; case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; @@ -2276,6 +2279,26 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) { return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op); } +SDValue DAGTypeLegalizer::PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1); + + SDValue Op = GetPromotedFloat(N->getOperand(1)); + EVT VT = N->getValueType(0); + + // Desired VT is same as promoted type. Use promoted float directly. + if (VT == Op->getValueType(0)) { + ReplaceValueWith(SDValue(N, 1), N->getOperand(0)); + return Op; + } + + // Else, extend the promoted float value to the desired VT. + SDValue Res = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), N->getVTList(), + N->getOperand(0), Op); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + // Promote the float operands used for comparison. The true- and false- // operands have the same type as the result and are promoted, if needed, by // PromoteFloatRes_SELECT_CC diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e9bd54089d062..4c7ddd4aea9e6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -712,6 +712,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); diff --git a/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll b/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll new file mode 100644 index 0000000000000..a74f6bfd564bf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX8 %s + +declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #1 +declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #1 + +define float @v_constrained_fpext_f16_to_f32(ptr addrspace(1) %ptr) #0 { +; GFX8-LABEL: v_constrained_fpext_f16_to_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s6, 0 +; GFX8-NEXT: s_mov_b32 s7, 0xf000 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s6 +; GFX8-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] + %val = load half, ptr addrspace(1) %ptr + %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %val, metadata !"fpexcept.strict") + ret float %result +} + +define <2 x float> @v_constrained_fpext_v2f16_to_v2f32(ptr addrspace(1) %ptr) #0 { +; GFX8-LABEL: v_constrained_fpext_v2f16_to_v2f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s6, 0 +; GFX8-NEXT: s_mov_b32 s7, 0xf000 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s6 +; GFX8-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] + %val = load <2 x half>, ptr addrspace(1) %ptr + %result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %val, metadata !"fpexcept.strict") + ret <2 x float> %result +} + +attributes #0 = { strictfp } |
| @llvm/pr-subscribers-llvm-selectiondag Author: Matt Arsenault (arsenm) ChangesTest is a placeholder, will be merged into the existing test after additional bug fixes for illegal f16 targets are fixed. Full diff: https://github.com/llvm/llvm-project/pull/74310.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 630aa4a07d7b9..f77b3afccfb8f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2214,6 +2214,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_UINT_SAT: R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break; case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break; + case ISD::STRICT_FP_EXTEND: + R = PromoteFloatOp_STRICT_FP_EXTEND(N, OpNo); + break; case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; @@ -2276,6 +2279,26 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) { return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op); } +SDValue DAGTypeLegalizer::PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1); + + SDValue Op = GetPromotedFloat(N->getOperand(1)); + EVT VT = N->getValueType(0); + + // Desired VT is same as promoted type. Use promoted float directly. + if (VT == Op->getValueType(0)) { + ReplaceValueWith(SDValue(N, 1), N->getOperand(0)); + return Op; + } + + // Else, extend the promoted float value to the desired VT. + SDValue Res = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), N->getVTList(), + N->getOperand(0), Op); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + // Promote the float operands used for comparison. The true- and false- // operands have the same type as the result and are promoted, if needed, by // PromoteFloatRes_SELECT_CC diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e9bd54089d062..4c7ddd4aea9e6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -712,6 +712,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); diff --git a/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll b/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll new file mode 100644 index 0000000000000..a74f6bfd564bf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX8 %s + +declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #1 +declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #1 + +define float @v_constrained_fpext_f16_to_f32(ptr addrspace(1) %ptr) #0 { +; GFX8-LABEL: v_constrained_fpext_f16_to_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s6, 0 +; GFX8-NEXT: s_mov_b32 s7, 0xf000 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s6 +; GFX8-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] + %val = load half, ptr addrspace(1) %ptr + %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %val, metadata !"fpexcept.strict") + ret float %result +} + +define <2 x float> @v_constrained_fpext_v2f16_to_v2f32(ptr addrspace(1) %ptr) #0 { +; GFX8-LABEL: v_constrained_fpext_v2f16_to_v2f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s6, 0 +; GFX8-NEXT: s_mov_b32 s7, 0xf000 +; GFX8-NEXT: s_mov_b32 s4, s6 +; GFX8-NEXT: s_mov_b32 s5, s6 +; GFX8-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] + %val = load <2 x half>, ptr addrspace(1) %ptr + %result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %val, metadata !"fpexcept.strict") + ret <2 x float> %result +} + +attributes #0 = { strictfp } |
Test is a placeholder, will be merged into the existing test after additional bug fixes for illegal f16 targets are fixed.
89180d7 to 405b870 Compare | ping |
spavloff left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
| return Op; | ||
| } | ||
| | ||
| // Else, extend the promoted float value to the desired VT. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code path is not represented in the tests, is it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it is, it would require promoting past a legal FP type to another which is probably an unusual situation
Test is a placeholder, will be merged into the existing test after additional bug fixes for illegal f16 targets are fixed.