[RISCV] Implement EmitTargetCodeForMemset for Xqcilsm #151555

svs-quic · 2025-07-31T16:38:06Z

This patch adds support for converting memset calls to one or more QC_SETWMI instructions when beneficial. We only handle aligned memset calls for now.

We limit a QC_SETWMI to 16 words or less to improve interruptibility.
So for 1-16 words we use a single QC_SETWMI:

QC_SETWMI reg1, N, 0(reg2)

For 17-32 words we use two QC_SETWMI's with the first as 16 words and the second for the remainder:

QC_SETWMI reg1, 16, 0(reg2) QC_SETWMI reg1, N, 64(reg2)

For 33-48 words, we would like to use (16, 16, n), but that means the last QC_SETWMI needs an offset of 128 which the instruction doesn't support. So in this case we use a length of 15 for the second instruction and we do the rest with the third instruction.

This means the maximum number of words handled is 47 (for now):

QC_SETWMI R2, R0, 16, 0 QC_SETWMI R2, R0, 15, 64 QC_SETWMI R2, R0, N, 124

For 48 words or more, call the target independent memset.

llvmbot · 2025-07-31T16:38:36Z

@llvm/pr-subscribers-backend-risc-v

Author: Sudharsan Veeravalli (svs-quic)

Changes

This patch adds support for converting memset calls to one or more QC_SETWMI instructions when beneficial. We only handle aligned memset calls for now.

We limit a QC_SETWMI to 16 words or less to improve interruptibility.
So for 1-16 words we use a single QC_SETWMI:

QC_SETWMI reg1, N, 0(reg2)

For 17-32 words we use two QC_SETWMI's with the first as 16 words and the second for the remainder:

QC_SETWMI reg1, 16, 0(reg2) QC_SETWMI reg1, N, 64(reg2)

For 33-48 words, we would like to use (16, 16, n), but that means the last QC_SETWMI needs an offset of 128 which the instruction doesn't support. So in this case we use a length of 15 for the second instruction and we do the rest with the third instruction.

This means the maximum number of words handled is 47 (for now):

QC_SETWMI R2, R0, 16, 0 QC_SETWMI R2, R0, 15, 64 QC_SETWMI R2, R0, N, 124

For 48 words or more, call the target independent memset.

Patch is 36.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151555.diff

5 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (+9)
(modified) llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td (+8)
(modified) llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp (+101)
(modified) llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h (+6)
(added) llvm/test/CodeGen/RISCV/xqcilsm-memset.ll (+929)

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index f223fdbef4359..b778c33083685 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1845,6 +1845,15 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { CurDAG->RemoveDeadNode(Node); return; } + case RISCVISD::QC_SETWMI: { + SDValue Chain = Node->getOperand(0); + SDVTList VTs = Node->getVTList(); + SDValue Ops[] = {Node->getOperand(1), Node->getOperand(2), + Node->getOperand(3), Node->getOperand(4), Chain}; + MachineSDNode *New = CurDAG->getMachineNode(RISCV::QC_SETWMI, DL, VTs, Ops); + ReplaceNode(Node, New); + return; + } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = Node->getConstantOperandVal(0); switch (IntNo) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 52656134b7774..2479ced164927 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -14,6 +14,14 @@ // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// +def SDT_StoreMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 3>, + SDTCisPtrTy<2>, + SDTCisVT<3, XLenVT>]>; + +def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_StoreMultiple, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + def uimm5nonzero : RISCVOp<XLenVT>, ImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5, "NonZero">; diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp index 6ecddad72c078..edfa2992711a0 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "RISCVSelectionDAGInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/SelectionDAG.h" #define GET_SDNODE_DESC #include "RISCVGenSDNodeInfo.inc" @@ -62,3 +64,102 @@ void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, } #endif } + +SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo) const { + const RISCVSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); + // We currently do this only for Xqcilsm + if (!Subtarget.hasVendorXqcilsm()) + return SDValue(); + + // Do this only if we know the size at compile time. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + + uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue(); + + // Do this only if it is word aligned and we write multiple of 4 bytes. + if (!((Alignment.value() & 3) == 0 && (NumberOfBytesToWrite & 3) == 0)) + return SDValue(); + + SmallVector<SDValue, 8> OutChains; + SDValue SizeWords, OffsetSetwmi; + SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + int NumberOfWords = NumberOfBytesToWrite / 4; + + // Helper for constructing the QC_SETWMI instruction + auto getSetwmiNode = [&](SDValue SizeWords, SDValue OffsetSetwmi) -> SDValue { + SDValue Ops[] = {Chain, SrcValueReplicated, Dst, SizeWords, OffsetSetwmi}; + return DAG.getNode(RISCVISD::QC_SETWMI, dl, MVT::Other, Ops); + }; + + bool IsZeroVal = + isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero(); + + // If i8 type and constant non-zero value. + if ((Src.getValueType() == MVT::i8) && !IsZeroVal) + // Replicate byte to word by multiplication with 0x01010101. + SrcValueReplicated = DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated, + DAG.getConstant(16843009, dl, MVT::i32)); + + // We limit a QC_SETWMI to 16 words or less to improve interruptibility. + // So for 1-16 words we use a single QC_SETWMI: + // + // QC_SETWMI reg1, N, 0(reg2) + // + // For 17-32 words we use two QC_SETWMI's with the first as 16 words and the + // second for the remainder: + // + // QC_SETWMI reg1, 16, 0(reg2) + // QC_SETWMI reg1, 32-N, 64(reg2) + // + // For 33-48 words, we would like to use (16, 16, n), but that means the last + // QC_SETWMI needs an offset of 128 which the instruction doesnt support. + // So in this case we use a length of 15 for the second instruction and we do + // the rest with the third instruction. + // This means the maximum inlined number of words is 47 (for now): + // + // QC_SETWMI R2, R0, 16, 0 + // QC_SETWMI R2, R0, 15, 64 + // QC_SETWMI R2, R0, N, 124 + // + // For 48 words or more, call the target independent memset + if (NumberOfWords <= 16) { + // 1 - 16 words + SizeWords = DAG.getTargetConstant(NumberOfWords, dl, MVT::i32); + SDValue OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); + return getSetwmiNode(SizeWords, OffsetSetwmi); + } else if (NumberOfWords <= 47) { + if (NumberOfWords <= 32) { + // 17 - 32 words + SizeWords = DAG.getTargetConstant(NumberOfWords - 16, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + + SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + } else { + // 33 - 47 words + SizeWords = DAG.getTargetConstant(NumberOfWords - 31, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(124, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + + SizeWords = DAG.getTargetConstant(15, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + + SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + } + + // >= 48 words. Call target independent memset. + return SDValue(); +} diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h index 641189f8661c1..08c8d11f2b108 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h @@ -34,6 +34,12 @@ class RISCVSelectionDAGInfo : public SelectionDAGGenTargetInfo { void verifyTargetNode(const SelectionDAG &DAG, const SDNode *N) const override; + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo) const override; + bool hasPassthruOp(unsigned Opcode) const { return GenNodeInfo.getDesc(Opcode).TSFlags & RISCVISD::HasPassthruOpMask; } diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll new file mode 100644 index 0000000000000..b0107cc1a4e03 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll @@ -0,0 +1,929 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I + +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilsm < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32IXQCISLS + +%struct.anon = type { [16 x i32] } +%struct.anon.0 = type { [47 x i32] } +%struct.anon.1 = type { [48 x i32] } +%struct.anon.2 = type { [64 x i8] } +%struct.struct1_t = type { [16 x i32] } + +@struct1 = common dso_local local_unnamed_addr global %struct.anon zeroinitializer, align 4 +@struct4b = common dso_local local_unnamed_addr global %struct.anon.0 zeroinitializer, align 4 +@struct4b1 = common dso_local local_unnamed_addr global %struct.anon.1 zeroinitializer, align 4 +@struct2 = common dso_local local_unnamed_addr global %struct.anon.2 zeroinitializer, align 1 +@arr1 = common dso_local local_unnamed_addr global [100 x i32] zeroinitializer, align 4 +@struct1_ = common dso_local local_unnamed_addr global %struct.struct1_t zeroinitializer, align 4 + +define void @test1(ptr nocapture %p, i32 %n) nounwind { +; RV32I-LABEL: test1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test1: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: mv a2, a1 +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 1 %p, i8 0, i32 %n, i1 false) + ret void +} + +declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1) + +define void @test2(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test2: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, 678490 +; RV32IXQCISLS-NEXT: addi a1, a1, 1445 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false) + ret void +} + +define void @test2a(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 188 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test2a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, 678490 +; RV32IXQCISLS-NEXT: addi a1, a1, 1445 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 15, 64(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 124(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 188, i1 false) + ret void +} + +define void @test2b(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2b: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 192 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test2b: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: li a1, 165 +; RV32IXQCISLS-NEXT: li a2, 192 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 192, i1 false) + ret void +} + +define void @test2c(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test2c: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, 678490 +; RV32IXQCISLS-NEXT: addi a1, a1, 1445 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false) + ret void +} + +define void @test2d(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2d: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, -91 +; RV32I-NEXT: lui a2, 1048570 +; RV32I-NEXT: lui a3, 678490 +; RV32I-NEXT: addi a2, a2, 1445 +; RV32I-NEXT: addi a3, a3, 1445 +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sh a2, 8(a0) +; RV32I-NEXT: sb a1, 10(a0) +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test2d: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: li a1, -91 +; RV32IXQCISLS-NEXT: lui a2, 1048570 +; RV32IXQCISLS-NEXT: lui a3, 678490 +; RV32IXQCISLS-NEXT: addi a2, a2, 1445 +; RV32IXQCISLS-NEXT: addi a3, a3, 1445 +; RV32IXQCISLS-NEXT: sw a3, 0(a0) +; RV32IXQCISLS-NEXT: sw a3, 4(a0) +; RV32IXQCISLS-NEXT: sh a2, 8(a0) +; RV32IXQCISLS-NEXT: sb a1, 10(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 11, i1 false) + ret void +} + + +define ptr @test3(ptr %p) nounwind { +; RV32I-LABEL: test3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a2, 256 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test3: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: li a2, 256 +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 256, i1 false) + ret ptr %p +} + +define ptr @test3a(ptr %p) nounwind { +; RV32I-LABEL: test3a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test3a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 128, i1 false) + ret ptr %p +} + +define void @test4() nounwind { +; RV32I-LABEL: test4: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(struct1) +; RV32I-NEXT: addi a0, a0, %lo(struct1) +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test4: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(struct1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(struct1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @struct1, i8 0, i32 64, i1 false) + ret void +} + +define void @test4a(ptr nocapture %s) nounwind { +; RV32I-LABEL: test4a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 166 +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test4a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, 682602 +; RV32IXQCISLS-NEXT: addi a1, a1, 1702 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %s, i8 -90, i32 64, i1 false) + ret void +} + +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) + +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) + +define void @test4b() nounwind { +; RV32I-LABEL: test4b: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a0, %hi(struct4b) +; RV32I-NEXT: addi a0, a0, %lo(struct4b) +; RV32I-NEXT: li a2, 188 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call memset +; RV32I-NEXT: lui a0, %hi(struct4b1) +; RV32I-NEXT: addi a0, a0, %lo(struct4b1) +; RV32I-NEXT: li a2, 192 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test4b: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, %hi(struct4b) +; RV32IXQCISLS-NEXT: addi a1, a1, %lo(struct4b) +; RV32IXQCISLS-NEXT: lui a0, %hi(struct4b1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(struct4b1) +; RV32IXQCISLS-NEXT: li a2, 192 +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 64(a1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 124(a1) +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b, i8 0, i32 188, i1 false) + tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b1, i8 0, i32 192, i1 false) + ret void +} + +define void @test5() nounwind { +; RV32I-LABEL: test5: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(struct2) +; RV32I-NEXT: addi a0, a0, %lo(struct2) +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test5: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(struct2) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(struct2) +; RV32IXQCISLS-NEXT: li a2, 64 +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 1 @struct2, i8 0, i32 64, i1 false) + ret void +} + +define i32 @test6() nounwind { +; RV32I-LABEL: test6: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sw zero, 12(sp) +; RV32IXQCISLS-NEXT: li a0, 0 +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i32, align 4 + call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false) + %0 = load i32, ptr %x, align 4 + ret i32 %0 +} + +define i32 @test6a() nounwind { +; RV32I-LABEL: test6a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: lw a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sw zero, 12(sp) +; RV32IXQCISLS-NEXT: lw a0, 12(sp) +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i32, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) + store i32 0, ptr %x, align 4 + %x.0.x.0. = load volatile i32, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x) + ret i32 %x.0.x.0. +} + +define zeroext i8 @test6b_c() nounwind { +; RV32I-LABEL: test6b_c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sb zero, 12(sp) +; RV32I-NEXT: lbu a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6b_c: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sb zero, 12(sp) +; RV32IXQCISLS-NEXT: lbu a0, 12(sp) +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i8, align 4 + call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %x) + call void @llvm.memset.p0.i32(ptr nonnull align 4 %x, i8 0, i32 1, i1 false) + %x.0.x.0. = load volatile i8, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %x) + ret i8 %x.0.x.0. +} + +define signext i16 @test6b_s() nounwind { +; RV32I-LABEL: test6b_s: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sh zero, 12(sp) +; RV32I-NEXT: lh a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6b_s: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sh zero, 12(sp) +; RV32IXQCISLS-NEXT: lh a0, 12(sp) +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i16, align 4 + call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %x) + store i16 0, ptr %x, align 4 + %x.0.x.0. = load volatile i16, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %x) + ret i16 %x.0.x.0. +} + +define i32 @test6b_l() nounwind { +; RV32I-LABEL: test6b_l: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: lw a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6b_l: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32I... [truncated]

lenary

Thanks. Just some small nits, and one place where the logic/testing needs an addition.

llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td