llvm
diff --git a/‎llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp‎
Lines changed: 7 additions & 1 deletion b/‎llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎llvm/lib/Target/RISCV/RISCVISelLowering.cpp‎
Lines changed: 99 additions & 31 deletions b/‎llvm/lib/Target/RISCV/RISCVISelLowering.cpp‎
Lines changed: 99 additions & 31 deletions
@@ -2063,8 +2063,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
  MVT SubVecContainerVT = SubVecVT;
  // Establish the correct scalable-vector types for any fixed-length type.
  if (SubVecVT.isFixedLengthVector()) {
- assert(Idx == 0 && V.isUndef());
  SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
+ bool AlignedToVecReg = false;
+ if (auto VLen = Subtarget->getRealVLen();
+ VLen && SubVecVT.getSizeInBits() ==
+ SubVecContainerVT.getSizeInBits().getKnownMinValue() *
+ (*VLen / RISCV::RVVBitsPerBlock))
+ AlignedToVecReg = true;
+ assert(Idx == 0 && (AlignedToVecReg || V.isUndef()));
  }
  MVT ContainerVT = VT;
  if (VT.isFixedLengthVector())
 
@@ -9596,6 +9596,21 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
  Vec, Mask, VL, DL, DAG, Subtarget);
 }
 
+/// Returns true if \p LHS is known to be equal to \p RHS, taking into account
+/// if VLEN is exactly known by \p Subtarget and thus vscale when handling
+/// scalable quantities.
+static bool isKnownEQ(ElementCount LHS, ElementCount RHS,
+ const RISCVSubtarget &Subtarget) {
+ if (auto VLen = Subtarget.getRealVLen()) {
+ const unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
+ if (LHS.isScalable())
+ LHS = ElementCount::getFixed(LHS.getKnownMinValue() * Vscale);
+ if (RHS.isScalable())
+ RHS = ElementCount::getFixed(RHS.getKnownMinValue() * Vscale);
+ }
+ return LHS == RHS;
+}
+
 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
  SelectionDAG &DAG) const {
  SDValue Vec = Op.getOperand(0);
@@ -9645,12 +9660,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
  }
  }
 
- // If the subvector vector is a fixed-length type, we cannot use subregister
- // manipulation to simplify the codegen; we don't know which register of a
- // LMUL group contains the specific subvector as we only know the minimum
- // register size. Therefore we must slide the vector group up the full
- // amount.
- if (SubVecVT.isFixedLengthVector()) {
+ // If the subvector vector is a fixed-length type and we don't know VLEN
+ // exactly, we cannot use subregister manipulation to simplify the codegen; we
+ // don't know which register of a LMUL group contains the specific subvector
+ // as we only know the minimum register size. Therefore we must slide the
+ // vector group up the full amount.
+ const auto VLen = Subtarget.getRealVLen();
+ if (SubVecVT.isFixedLengthVector() && !VLen) {
  if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
  return Op;
  MVT ContainerVT = VecVT;
@@ -9698,41 +9714,92 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
  return DAG.getBitcast(Op.getValueType(), SubVec);
  }
 
- unsigned SubRegIdx, RemIdx;
- std::tie(SubRegIdx, RemIdx) =
- RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
- VecVT, SubVecVT, OrigIdx, TRI);
+ MVT ContainerVecVT = VecVT;
+ if (VecVT.isFixedLengthVector()) {
+ ContainerVecVT = getContainerForFixedLengthVector(VecVT);
+ Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
+ }
 
- RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
+ MVT ContainerSubVecVT = SubVecVT;
+ if (SubVecVT.isFixedLengthVector()) {
+ ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
+ SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
+ }
+
+ unsigned SubRegIdx;
+ ElementCount RemIdx;
+ // insert_subvector scales the index by vscale if the subvector is scalable,
+ // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
+ // we have a fixed length subvector, we need to adjust the index by 1/vscale.
+ if (SubVecVT.isFixedLengthVector()) {
+ assert(VLen);
+ unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
+ auto Decompose =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
+ SubRegIdx = Decompose.first;
+ RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
+ (OrigIdx % Vscale));
+ } else {
+ auto Decompose =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
+ SubRegIdx = Decompose.first;
+ RemIdx = ElementCount::getScalable(Decompose.second);
+ }
+
+ RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(ContainerSubVecVT);
  bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
  SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
  SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
+ bool AlignedToVecReg = !IsSubVecPartReg;
+ if (SubVecVT.isFixedLengthVector())
+ AlignedToVecReg &= SubVecVT.getSizeInBits() ==
+ ContainerSubVecVT.getSizeInBits().getKnownMinValue() *
+ (*VLen / RISCV::RVVBitsPerBlock);
 
  // 1. If the Idx has been completely eliminated and this subvector's size is
  // a vector register or a multiple thereof, or the surrounding elements are
  // undef, then this is a subvector insert which naturally aligns to a vector
  // register. These can easily be handled using subregister manipulation.
- // 2. If the subvector is smaller than a vector register, then the insertion
- // must preserve the undisturbed elements of the register. We do this by
- // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
- // (which resolves to a subregister copy), performing a VSLIDEUP to place the
- // subvector within the vector register, and an INSERT_SUBVECTOR of that
+ // 2. If the subvector isn't exactly aligned to a vector register group, then
+ // the insertion must preserve the undisturbed elements of the register. We do
+ // this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector
+ // type (which resolves to a subregister copy), performing a VSLIDEUP to place
+ // the subvector within the vector register, and an INSERT_SUBVECTOR of that
  // LMUL=1 type back into the larger vector (resolving to another subregister
  // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
  // to avoid allocating a large register group to hold our subvector.
- if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
+ if (RemIdx.isZero() && (AlignedToVecReg || Vec.isUndef())) {
+ if (SubVecVT.isFixedLengthVector()) {
+ // We may get NoSubRegister if inserting at index 0 and the subvec
+ // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
+ if (SubRegIdx == RISCV::NoSubRegister) {
+ assert(OrigIdx == 0);
+ return Op;
+ }
+
+ SDValue Insert =
+ DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
+ if (VecVT.isFixedLengthVector())
+ Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
+ return Insert;
+ }
  return Op;
+ }
 
  // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
  // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
  // (in our case undisturbed). This means we can set up a subvector insertion
  // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
  // size of the subvector.
- MVT InterSubVT = VecVT;
+ MVT InterSubVT = ContainerVecVT;
  SDValue AlignedExtract = Vec;
- unsigned AlignedIdx = OrigIdx - RemIdx;
- if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
- InterSubVT = getLMUL1VT(VecVT);
+ unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
+ if (SubVecVT.isFixedLengthVector())
+ AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
+ if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
+ InterSubVT = getLMUL1VT(ContainerVecVT);
  // Extract a subvector equal to the nearest full vector register type. This
  // should resolve to a EXTRACT_SUBREG instruction.
  AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
@@ -9743,25 +9810,23 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
  DAG.getUNDEF(InterSubVT), SubVec,
  DAG.getVectorIdxConstant(0, DL));
 
- auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
+ auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
 
- ElementCount EndIndex =
- ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
- VL = computeVLMax(SubVecVT, DL, DAG);
+ ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
+ VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
 
  // Use tail agnostic policy if we're inserting over InterSubVT's tail.
  unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
- if (EndIndex == InterSubVT.getVectorElementCount())
+ if (isKnownEQ(EndIndex, InterSubVT.getVectorElementCount(), Subtarget))
  Policy = RISCVII::TAIL_AGNOSTIC;
 
  // If we're inserting into the lowest elements, use a tail undisturbed
  // vmv.v.v.
- if (RemIdx == 0) {
+ if (RemIdx.isZero()) {
  SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
  SubVec, VL);
  } else {
- SDValue SlideupAmt =
- DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
+ SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
 
  // Construct the vector length corresponding to RemIdx + length(SubVecVT).
  VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
@@ -9772,10 +9837,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
 
  // If required, insert this subvector back into the correct vector register.
  // This should resolve to an INSERT_SUBREG instruction.
- if (VecVT.bitsGT(InterSubVT))
- SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
+ if (ContainerVecVT.bitsGT(InterSubVT))
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
  DAG.getVectorIdxConstant(AlignedIdx, DL));
 
+ if (VecVT.isFixedLengthVector())
+ SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
+
  // We might have bitcast from a mask type: cast back to the original type if
  // required.
  return DAG.getBitcast(Op.getSimpleValueType(), SubVec);