llvm · lukel97 · Apr 30, 2024 · Feb 26, 2024 · Apr 1, 2024 · Apr 8, 2024
diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h
@@ -181,6 +181,18 @@ template <typename LeafTy, typename ValueTy> class FixedOrScalableQuantity {
  return getKnownMinValue() % RHS == 0;
  }
 
+ /// Returns whether or not the callee is known to be a multiple of RHS.
+ constexpr bool isKnownMultipleOf(const FixedOrScalableQuantity &RHS) const {
+ // x % y == 0 => x % y == 0
+ // x % y == 0 => (vscale * x) % y == 0
+ // x % y == 0 => (vscale * x) % (vscale * y) == 0
+ // but
+ // x % y == 0 !=> x % (vscale * y) == 0
+ if (!isScalable() && RHS.isScalable())
+ return false;
+ return getKnownMinValue() % RHS.getKnownMinValue() == 0;
+ }
+
  // Return the minimum value with the assumption that the count is exact.
  // Use in places where a scalable count doesn't make sense (e.g. non-vector
  // types, or vectors in backends which don't support scalable vectors).

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2099,8 +2099,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
  MVT SubVecContainerVT = SubVecVT;
  // Establish the correct scalable-vector types for any fixed-length type.
  if (SubVecVT.isFixedLengthVector()) {
- assert(Idx == 0 && V.isUndef());
  SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
+ TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
+ [[maybe_unused]] bool ExactlyVecRegSized =
+ Subtarget->expandVScale(SubVecVT.getSizeInBits())
+ .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
+ assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
+ .getKnownMinValue()));
+ assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
  }
  MVT ContainerVT = VT;
  if (VT.isFixedLengthVector())

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -9772,12 +9772,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
  }
  }
 
- // If the subvector vector is a fixed-length type, we cannot use subregister
- // manipulation to simplify the codegen; we don't know which register of a
- // LMUL group contains the specific subvector as we only know the minimum
- // register size. Therefore we must slide the vector group up the full
- // amount.
- if (SubVecVT.isFixedLengthVector()) {
+ // If the subvector vector is a fixed-length type and we don't know VLEN
+ // exactly, we cannot use subregister manipulation to simplify the codegen; we
+ // don't know which register of a LMUL group contains the specific subvector
+ // as we only know the minimum register size. Therefore we must slide the
+ // vector group up the full amount.
+ const auto VLen = Subtarget.getRealVLen();
+ if (SubVecVT.isFixedLengthVector() && !VLen) {
  if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
  return Op;
  MVT ContainerVT = VecVT;
@@ -9825,41 +9826,90 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
  return DAG.getBitcast(Op.getValueType(), SubVec);
  }
 
- unsigned SubRegIdx, RemIdx;
- std::tie(SubRegIdx, RemIdx) =
- RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
- VecVT, SubVecVT, OrigIdx, TRI);
+ MVT ContainerVecVT = VecVT;
+ if (VecVT.isFixedLengthVector()) {
+ ContainerVecVT = getContainerForFixedLengthVector(VecVT);
+ Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
+ }
+
+ MVT ContainerSubVecVT = SubVecVT;
+ if (SubVecVT.isFixedLengthVector()) {
+ ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
+ SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
+ }
+
+ unsigned SubRegIdx;
+ ElementCount RemIdx;
+ // insert_subvector scales the index by vscale if the subvector is scalable,
+ // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
+ // we have a fixed length subvector, we need to adjust the index by 1/vscale.
+ if (SubVecVT.isFixedLengthVector()) {
+ assert(VLen);
+ unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
+ auto Decompose =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
+ SubRegIdx = Decompose.first;
+ RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
+ (OrigIdx % Vscale));
+ } else {
+ auto Decompose =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
+ SubRegIdx = Decompose.first;
+ RemIdx = ElementCount::getScalable(Decompose.second);
+ }
 
- RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
- bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
- SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
- SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
+ TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
+ assert(isPowerOf2_64(
+ Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
+ bool ExactlyVecRegSized =
+ Subtarget.expandVScale(SubVecVT.getSizeInBits())
+ .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
 
  // 1. If the Idx has been completely eliminated and this subvector's size is
  // a vector register or a multiple thereof, or the surrounding elements are
  // undef, then this is a subvector insert which naturally aligns to a vector
  // register. These can easily be handled using subregister manipulation.
- // 2. If the subvector is smaller than a vector register, then the insertion
- // must preserve the undisturbed elements of the register. We do this by
- // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
- // (which resolves to a subregister copy), performing a VSLIDEUP to place the
- // subvector within the vector register, and an INSERT_SUBVECTOR of that
- // LMUL=1 type back into the larger vector (resolving to another subregister
- // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
- // to avoid allocating a large register group to hold our subvector.
- if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
+ // 2. If the subvector isn't an exact multiple of a valid register group size,
+ // then the insertion must preserve the undisturbed elements of the register.
+ // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
+ // vector type (which resolves to a subregister copy), performing a VSLIDEUP
+ // to place the subvector within the vector register, and an INSERT_SUBVECTOR
+ // of that LMUL=1 type back into the larger vector (resolving to another
+ // subregister operation). See below for how our VSLIDEUP works. We go via a
+ // LMUL=1 type to avoid allocating a large register group to hold our
+ // subvector.
+ if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
+ if (SubVecVT.isFixedLengthVector()) {
+ // We may get NoSubRegister if inserting at index 0 and the subvec
+ // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
+ if (SubRegIdx == RISCV::NoSubRegister) {
+ assert(OrigIdx == 0);
+ return Op;
+ }
+
+ SDValue Insert =
+ DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
+ if (VecVT.isFixedLengthVector())
+ Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
+ return Insert;
+ }
  return Op;
+ }
 
  // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
  // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
  // (in our case undisturbed). This means we can set up a subvector insertion
  // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
  // size of the subvector.
- MVT InterSubVT = VecVT;
+ MVT InterSubVT = ContainerVecVT;
  SDValue AlignedExtract = Vec;
- unsigned AlignedIdx = OrigIdx - RemIdx;
- if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
- InterSubVT = getLMUL1VT(VecVT);
+ unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
+ if (SubVecVT.isFixedLengthVector())
+ AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
+ if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
+ InterSubVT = getLMUL1VT(ContainerVecVT);
  // Extract a subvector equal to the nearest full vector register type. This
  // should resolve to a EXTRACT_SUBREG instruction.
  AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
@@ -9870,25 +9920,24 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
  DAG.getUNDEF(InterSubVT), SubVec,
  DAG.getVectorIdxConstant(0, DL));
 
- auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
+ auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
 
- ElementCount EndIndex =
- ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
- VL = computeVLMax(SubVecVT, DL, DAG);
+ ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
+ VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
 
  // Use tail agnostic policy if we're inserting over InterSubVT's tail.
  unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
- if (EndIndex == InterSubVT.getVectorElementCount())
+ if (Subtarget.expandVScale(EndIndex) ==
+ Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
  Policy = RISCVII::TAIL_AGNOSTIC;
 
  // If we're inserting into the lowest elements, use a tail undisturbed
  // vmv.v.v.
- if (RemIdx == 0) {
+ if (RemIdx.isZero()) {
  SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
  SubVec, VL);
  } else {
- SDValue SlideupAmt =
- DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
+ SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
 
  // Construct the vector length corresponding to RemIdx + length(SubVecVT).
  VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
@@ -9899,10 +9948,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
 
  // If required, insert this subvector back into the correct vector register.
  // This should resolve to an INSERT_SUBREG instruction.
- if (VecVT.bitsGT(InterSubVT))
- SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
+ if (ContainerVecVT.bitsGT(InterSubVT))
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
  DAG.getVectorIdxConstant(AlignedIdx, DL));
 
+ if (VecVT.isFixedLengthVector())
+ SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
+
  // We might have bitcast from a mask type: cast back to the original type if
  // required.
  return DAG.getBitcast(Op.getSimpleValueType(), SubVec);

diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -200,6 +200,17 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
  return Min;
  }
 
+ /// If the ElementCount or TypeSize \p X is scalable and VScale (VLEN) is
+ /// exactly known, returns \p X converted to a fixed quantity. Otherwise
+ /// returns \p X unmodified.
+ template <typename Quantity> Quantity expandVScale(Quantity X) const {
+ if (auto VLen = getRealVLen(); VLen && X.isScalable()) {
+ const unsigned VScale = *VLen / RISCV::RVVBitsPerBlock;
+ X = Quantity::getFixed(X.getKnownMinValue() * VScale);
+ }
+ return X;
+ }
+
  RISCVABI::ABI getTargetABI() const { return TargetABI; }
  bool isSoftFPABI() const {
  return TargetABI == RISCVABI::ABI_LP64 ||