Skip to content
Merged
12 changes: 12 additions & 0 deletions llvm/include/llvm/Support/TypeSize.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,18 @@ template <typename LeafTy, typename ValueTy> class FixedOrScalableQuantity {
return getKnownMinValue() % RHS == 0;
}

/// Returns whether or not the callee is known to be a multiple of RHS.
constexpr bool isKnownMultipleOf(const FixedOrScalableQuantity &RHS) const {
// x % y == 0 => x % y == 0
// x % y == 0 => (vscale * x) % y == 0
// x % y == 0 => (vscale * x) % (vscale * y) == 0
// but
// x % y == 0 !=> x % (vscale * y) == 0
if (!isScalable() && RHS.isScalable())
return false;
return getKnownMinValue() % RHS.getKnownMinValue() == 0;
}

// Return the minimum value with the assumption that the count is exact.
// Use in places where a scalable count doesn't make sense (e.g. non-vector
// types, or vectors in backends which don't support scalable vectors).
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2099,8 +2099,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
MVT SubVecContainerVT = SubVecVT;
// Establish the correct scalable-vector types for any fixed-length type.
if (SubVecVT.isFixedLengthVector()) {
assert(Idx == 0 && V.isUndef());
SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
[[maybe_unused]] bool ExactlyVecRegSized =
Subtarget->expandVScale(SubVecVT.getSizeInBits())
.isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
.getKnownMinValue()));
assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
}
MVT ContainerVT = VT;
if (VT.isFixedLengthVector())
Expand Down
126 changes: 89 additions & 37 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9772,12 +9772,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
}
}

// If the subvector vector is a fixed-length type, we cannot use subregister
// manipulation to simplify the codegen; we don't know which register of a
// LMUL group contains the specific subvector as we only know the minimum
// register size. Therefore we must slide the vector group up the full
// amount.
if (SubVecVT.isFixedLengthVector()) {
// If the subvector vector is a fixed-length type and we don't know VLEN
// exactly, we cannot use subregister manipulation to simplify the codegen; we
// don't know which register of a LMUL group contains the specific subvector
// as we only know the minimum register size. Therefore we must slide the
// vector group up the full amount.
const auto VLen = Subtarget.getRealVLen();
if (SubVecVT.isFixedLengthVector() && !VLen) {
if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
return Op;
MVT ContainerVT = VecVT;
Expand Down Expand Up @@ -9825,41 +9826,90 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
return DAG.getBitcast(Op.getValueType(), SubVec);
}

unsigned SubRegIdx, RemIdx;
std::tie(SubRegIdx, RemIdx) =
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
VecVT, SubVecVT, OrigIdx, TRI);
MVT ContainerVecVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVecVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
}

MVT ContainerSubVecVT = SubVecVT;
if (SubVecVT.isFixedLengthVector()) {
ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
}

unsigned SubRegIdx;
ElementCount RemIdx;
// insert_subvector scales the index by vscale if the subvector is scalable,
// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
// we have a fixed length subvector, we need to adjust the index by 1/vscale.
if (SubVecVT.isFixedLengthVector()) {
assert(VLen);
unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
auto Decompose =
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
SubRegIdx = Decompose.first;
RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
(OrigIdx % Vscale));
} else {
auto Decompose =
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
SubRegIdx = Decompose.first;
RemIdx = ElementCount::getScalable(Decompose.second);
}

RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
assert(isPowerOf2_64(
Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
bool ExactlyVecRegSized =
Subtarget.expandVScale(SubVecVT.getSizeInBits())
.isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));

// 1. If the Idx has been completely eliminated and this subvector's size is
// a vector register or a multiple thereof, or the surrounding elements are
// undef, then this is a subvector insert which naturally aligns to a vector
// register. These can easily be handled using subregister manipulation.
// 2. If the subvector is smaller than a vector register, then the insertion
// must preserve the undisturbed elements of the register. We do this by
// lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
// (which resolves to a subregister copy), performing a VSLIDEUP to place the
// subvector within the vector register, and an INSERT_SUBVECTOR of that
// LMUL=1 type back into the larger vector (resolving to another subregister
// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
// to avoid allocating a large register group to hold our subvector.
if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
// 2. If the subvector isn't an exact multiple of a valid register group size,
// then the insertion must preserve the undisturbed elements of the register.
// We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
// vector type (which resolves to a subregister copy), performing a VSLIDEUP
// to place the subvector within the vector register, and an INSERT_SUBVECTOR
// of that LMUL=1 type back into the larger vector (resolving to another
// subregister operation). See below for how our VSLIDEUP works. We go via a
// LMUL=1 type to avoid allocating a large register group to hold our
// subvector.
if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
if (SubVecVT.isFixedLengthVector()) {
// We may get NoSubRegister if inserting at index 0 and the subvec
// container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
if (SubRegIdx == RISCV::NoSubRegister) {
assert(OrigIdx == 0);
return Op;
}

SDValue Insert =
DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
if (VecVT.isFixedLengthVector())
Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
return Insert;
}
return Op;
}

// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
// (in our case undisturbed). This means we can set up a subvector insertion
// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
// size of the subvector.
MVT InterSubVT = VecVT;
MVT InterSubVT = ContainerVecVT;
SDValue AlignedExtract = Vec;
unsigned AlignedIdx = OrigIdx - RemIdx;
if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
InterSubVT = getLMUL1VT(VecVT);
unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
if (SubVecVT.isFixedLengthVector())
AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
InterSubVT = getLMUL1VT(ContainerVecVT);
// Extract a subvector equal to the nearest full vector register type. This
// should resolve to a EXTRACT_SUBREG instruction.
AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
Expand All @@ -9870,25 +9920,24 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
DAG.getUNDEF(InterSubVT), SubVec,
DAG.getVectorIdxConstant(0, DL));

auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);

ElementCount EndIndex =
ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
VL = computeVLMax(SubVecVT, DL, DAG);
ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());

// Use tail agnostic policy if we're inserting over InterSubVT's tail.
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
if (EndIndex == InterSubVT.getVectorElementCount())
if (Subtarget.expandVScale(EndIndex) ==
Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
Policy = RISCVII::TAIL_AGNOSTIC;

// If we're inserting into the lowest elements, use a tail undisturbed
// vmv.v.v.
if (RemIdx == 0) {
if (RemIdx.isZero()) {
SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
SubVec, VL);
} else {
SDValue SlideupAmt =
DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);

// Construct the vector length corresponding to RemIdx + length(SubVecVT).
VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
Expand All @@ -9899,10 +9948,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,

// If required, insert this subvector back into the correct vector register.
// This should resolve to an INSERT_SUBREG instruction.
if (VecVT.bitsGT(InterSubVT))
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
if (ContainerVecVT.bitsGT(InterSubVT))
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
DAG.getVectorIdxConstant(AlignedIdx, DL));

if (VecVT.isFixedLengthVector())
SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);

// We might have bitcast from a mask type: cast back to the original type if
// required.
return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,17 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
return Min;
}

/// If the ElementCount or TypeSize \p X is scalable and VScale (VLEN) is
/// exactly known, returns \p X converted to a fixed quantity. Otherwise
/// returns \p X unmodified.
template <typename Quantity> Quantity expandVScale(Quantity X) const {
if (auto VLen = getRealVLen(); VLen && X.isScalable()) {
const unsigned VScale = *VLen / RISCV::RVVBitsPerBlock;
X = Quantity::getFixed(X.getKnownMinValue() * VScale);
}
return X;
}

RISCVABI::ABI getTargetABI() const { return TargetABI; }
bool isSoftFPABI() const {
return TargetABI == RISCVABI::ABI_LP64 ||
Expand Down
Loading