- Notifications
You must be signed in to change notification settings - Fork 15.3k
[RISCV] Add codegen support for experimental.vp.splice #74688
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
IR intrinsics were already defined, but no codegen support had been added. I extracted this code from our downstream. Some of it may have come from https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi/ originally.
| @llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesIR intrinsics were already defined, but no codegen support had been added. I extracted this code from our downstream. Some of it may have come from https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi/ originally. Patch is 80.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74688.diff 8 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 362fa92dd44b2a..3d21bd22e6ef5d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1871,6 +1871,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::EXPERIMENTAL_VP_STRIDED_STORE: Res = PromoteIntOp_VP_STRIDED(N, OpNo); break; + case ISD::EXPERIMENTAL_VP_SPLICE: + Res = PromoteIntOp_VP_SPLICE(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -2549,6 +2552,20 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo) { + SmallVector<SDValue, 6> NewOps(N->op_begin(), N->op_end()); + + if (OpNo == 2) { // Offset operand + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + } + + assert((OpNo == 4 || OpNo == 5) && "Unexpected operand for promotion"); + + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + //===----------------------------------------------------------------------===// // Integer Result Expansion //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e9bd54089d0627..4e8c29b1d031a1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -410,6 +410,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f2ec422b54a926..8a9ff6c418b109 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -667,7 +667,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, - ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE}; + ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE}; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, @@ -680,7 +680,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, - ISD::EXPERIMENTAL_VP_REVERSE}; + ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE}; static const unsigned IntegerVecReduceOps[] = { ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, @@ -765,6 +765,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); setOperationPromotedToType( @@ -1139,6 +1140,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SETCC, ISD::VP_TRUNCATE}, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); continue; } @@ -6605,6 +6607,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, !Subtarget.hasVInstructionsF16())) return SplitVPOp(Op, DAG); return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); + case ISD::EXPERIMENTAL_VP_SPLICE: + return lowerVPSpliceExperimental(Op, DAG); case ISD::EXPERIMENTAL_VP_REVERSE: return lowerVPReverseExperimental(Op, DAG); } @@ -10531,6 +10535,87 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, return convertFromScalableVector(VT, Result, DAG, Subtarget); } +SDValue +RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + + SDValue Op1 = Op.getOperand(0); + SDValue Op2 = Op.getOperand(1); + SDValue Offset = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + SDValue EVL1 = Op.getOperand(4); + SDValue EVL2 = Op.getOperand(5); + + const MVT XLenVT = Subtarget.getXLenVT(); + MVT VT = Op.getSimpleValueType(); + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VT); + Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); + Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); + MVT MaskVT = getMaskTypeFor(ContainerVT); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + + bool IsMaskVector = VT.getVectorElementType() == MVT::i1; + if (IsMaskVector) { + ContainerVT = ContainerVT.changeVectorElementType(MVT::i8); + + // Expand input operands + SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), + DAG.getConstant(1, DL, XLenVT), EVL1); + SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), + DAG.getConstant(0, DL, XLenVT), EVL1); + Op1 = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Op1, SplatOneOp1, + SplatZeroOp1, EVL1); + + SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), + DAG.getConstant(1, DL, XLenVT), EVL2); + SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), + DAG.getConstant(0, DL, XLenVT), EVL2); + Op2 = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Op2, SplatOneOp2, + SplatZeroOp2, EVL2); + } + + int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue(); + SDValue DownOffset, UpOffset; + if (ImmValue >= 0) { + // The operand is a TargetConstant, we need to rebuild it as a regular + // constant. + DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); + UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset); + } else { + // The operand is a TargetConstant, we need to rebuild it as a regular + // constant rather than negating the original operand. + UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); + DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset); + } + + SDValue SlideDown = + getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), + Op1, DownOffset, Mask, UpOffset); + SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2, + UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC); + + if (IsMaskVector) { + // Truncate Result back to a mask vector (Result has same EVL as Op2) + Result = DAG.getNode( + RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1), + {Result, DAG.getConstant(0, DL, ContainerVT), + DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)), + Mask, EVL2}); + } + + if (!VT.isFixedLengthVector()) + return Result; + return convertFromScalableVector(VT, Result, DAG, Subtarget); +} + SDValue RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index ae798cc47bf833..6ae2a8a9fbd8cf 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -904,6 +904,7 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/RISCV/vp-splice-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/vp-splice-fixed-vectors.ll new file mode 100644 index 00000000000000..f7c8c251e197be --- /dev/null +++ b/llvm/test/CodeGen/RISCV/vp-splice-fixed-vectors.ll @@ -0,0 +1,281 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 \ +; RUN: < %s | FileCheck %s + +declare <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64>, <2 x i64>, i32, <2 x i1>, i32, i32) +declare <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32>, <4 x i32>, i32, <4 x i1>, i32, i32) +declare <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16>, <8 x i16>, i32, <8 x i1>, i32, i32) +declare <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8>, <16 x i8>, i32, <16 x i1>, i32, i32) + +declare <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double>, <2 x double>, i32, <2 x i1>, i32, i32) +declare <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float>, <4 x float>, i32, <4 x i1>, i32, i32) + +define <2 x i64> @test_vp_splice_v2i64(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5 +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 1, i32 0 + %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + + %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> %allones, i32 %evla, i32 %evlb) + ret <2 x i64> %v +} + +define <2 x i64> @test_vp_splice_v2i64_negative_offset(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v2i64_negative_offset: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetivli zero, 5, e64, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 5 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 1, i32 0 + %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + + %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 -5, <2 x i1> %allones, i32 %evla, i32 %evlb) + ret <2 x i64> %v +} + +define <2 x i64> @test_vp_splice_v2i64_masked(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v2i64_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> %mask, i32 %evla, i32 %evlb) + ret <2 x i64> %v +} + +define <4 x i32> @test_vp_splice_v4i32(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 1, i32 0 + %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + + %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> %allones, i32 %evla, i32 %evlb) + ret <4 x i32> %v +} + +define <4 x i32> @test_vp_splice_v4i32_negative_offset(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v4i32_negative_offset: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetivli zero, 5, e32, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 5 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 1, i32 0 + %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + + %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 -5, <4 x i1> %allones, i32 %evla, i32 %evlb) + ret <4 x i32> %v +} + +define <4 x i32> @test_vp_splice_v4i32_masked(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v4i32_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> %mask, i32 %evla, i32 %evlb) + ret <4 x i32> %v +} + +define <8 x i16> @test_vp_splice_v8i16(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 1, i32 0 + %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + + %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> %allones, i32 %evla, i32 %evlb) + ret <8 x i16> %v +} + +define <8 x i16> @test_vp_splice_v8i16_negative_offset(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v8i16_negative_offset: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetivli zero, 5, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 5 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 1, i32 0 + %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + + %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 -5, <8 x i1> %allones, i32 %evla, i32 %evlb) + ret <8 x i16> %v +} + +define <8 x i16> @test_vp_splice_v8i16_masked(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v8i16_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb) + ret <8 x i16> %v +} + +define <16 x i8> @test_vp_splice_v16i8(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 1, i32 0 + %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + + %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> %allones, i32 %evla, i32 %evlb) + ret <16 x i8> %v +} + +define <16 x i8> @test_vp_splice_v16i8_negative_offset(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v16i8_negative_offset: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetivli zero, 5, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 5 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 1, i32 0 + %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + + %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 -5, <16 x i1> %allones, i32 %evla, i32 %evlb) + ret <16 x i8> %v +} + +define <16 x i8> @test_vp_splice_v16i8_masked(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v16i8_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> %mask, i32 %evla, i32 %evlb) + ret <16 x i8> %v +} + +define <2 x double> @test_vp_splice_v2f64(<2 x double> %va, <2 x double> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5 +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 1, i32 0 + %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + + %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 5, <2 x i1> %allones, i32 %evla, i32 %evlb) + ret <2 x double> %v +} + +define <2 x double> @test_vp_splice_v2f64_negative_offset(<2 x double> %va, <2 x double> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v2f64_negative_offset: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetivli zero, 5, e64, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 5 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 1, i32 0 + %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + + %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 -5, <2 x i1> %allones, i32 %evla, i32 %evlb) + ret <2 x double> %v +} + +define <2 x double> @test_vp_splice_v2f64_masked(<2 x double> %va, <2 x double> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v2f64_masked: +; CHECK: # %bb.0: +; CHECK-NEX... [truncated] |
You can test this locally with the following command:git-clang-format --diff c6dc9cd1fbfcb47aa193f16cb02b97876643e1fe a96c135421d71b5a618397afc8350a935e6cb768 -- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h llvm/lib/Target/RISCV/RISCVISelLowering.cpp llvm/lib/Target/RISCV/RISCVISelLowering.hView the diff from clang-format here.diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 730b4c5098..92404a6e58 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -655,32 +655,75 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, MVT::Other, Custom); - static const unsigned IntegerVPOps[] = { - ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, - ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM, - ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, - ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR, - ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, - ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, - ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, - ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT, - ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, - ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, - ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, - ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE}; - - static const unsigned FloatingPointVPOps[] = { - ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, - ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, - ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, - ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, - ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP, - ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND, - ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, - ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, - ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, - ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, - ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE}; + static const unsigned IntegerVPOps[] = {ISD::VP_ADD, + ISD::VP_SUB, + ISD::VP_MUL, + ISD::VP_SDIV, + ISD::VP_UDIV, + ISD::VP_SREM, + ISD::VP_UREM, + ISD::VP_AND, + ISD::VP_OR, + ISD::VP_XOR, + ISD::VP_ASHR, + ISD::VP_LSHR, + ISD::VP_SHL, + ISD::VP_REDUCE_ADD, + ISD::VP_REDUCE_AND, + ISD::VP_REDUCE_OR, + ISD::VP_REDUCE_XOR, + ISD::VP_REDUCE_SMAX, + ISD::VP_REDUCE_SMIN, + ISD::VP_REDUCE_UMAX, + ISD::VP_REDUCE_UMIN, + ISD::VP_MERGE, + ISD::VP_SELECT, + ISD::VP_FP_TO_SINT, + ISD::VP_FP_TO_UINT, + ISD::VP_SETCC, + ISD::VP_SIGN_EXTEND, + ISD::VP_ZERO_EXTEND, + ISD::VP_TRUNCATE, + ISD::VP_SMIN, + ISD::VP_SMAX, + ISD::VP_UMIN, + ISD::VP_UMAX, + ISD::VP_ABS, + ISD::EXPERIMENTAL_VP_REVERSE, + ISD::EXPERIMENTAL_VP_SPLICE}; + + static const unsigned FloatingPointVPOps[] = {ISD::VP_FADD, + ISD::VP_FSUB, + ISD::VP_FMUL, + ISD::VP_FDIV, + ISD::VP_FNEG, + ISD::VP_FABS, + ISD::VP_FMA, + ISD::VP_REDUCE_FADD, + ISD::VP_REDUCE_SEQ_FADD, + ISD::VP_REDUCE_FMIN, + ISD::VP_REDUCE_FMAX, + ISD::VP_MERGE, + ISD::VP_SELECT, + ISD::VP_SINT_TO_FP, + ISD::VP_UINT_TO_FP, + ISD::VP_SETCC, + ISD::VP_FP_ROUND, + ISD::VP_FP_EXTEND, + ISD::VP_SQRT, + ISD::VP_FMINNUM, + ISD::VP_FMAXNUM, + ISD::VP_FCEIL, + ISD::VP_FFLOOR, + ISD::VP_FROUND, + ISD::VP_FROUNDEVEN, + ISD::VP_FCOPYSIGN, + ISD::VP_FROUNDTOZERO, + ISD::VP_FRINT, + ISD::VP_FNEARBYINT, + ISD::VP_IS_FPCLASS, + ISD::EXPERIMENTAL_VP_REVERSE, + ISD::EXPERIMENTAL_VP_SPLICE}; static const unsigned IntegerVecReduceOps[] = { ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, |
| Ping |
rofirrim left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks @topperc !
IR intrinsics were already defined, but no codegen support had been added.
I extracted this code from our downstream. Some of it may have come from https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi/ originally.