- Notifications
You must be signed in to change notification settings - Fork 15.3k
[RISCV] Implement EmitTargetCodeForMemset for Xqcilsm #151555
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
| @llvm/pr-subscribers-backend-risc-v Author: Sudharsan Veeravalli (svs-quic) ChangesThis patch adds support for converting memset calls to one or more We limit a
For For This means the maximum number of words handled is For Patch is 36.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151555.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index f223fdbef4359..b778c33083685 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1845,6 +1845,15 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { CurDAG->RemoveDeadNode(Node); return; } + case RISCVISD::QC_SETWMI: { + SDValue Chain = Node->getOperand(0); + SDVTList VTs = Node->getVTList(); + SDValue Ops[] = {Node->getOperand(1), Node->getOperand(2), + Node->getOperand(3), Node->getOperand(4), Chain}; + MachineSDNode *New = CurDAG->getMachineNode(RISCV::QC_SETWMI, DL, VTs, Ops); + ReplaceNode(Node, New); + return; + } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = Node->getConstantOperandVal(0); switch (IntNo) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 52656134b7774..2479ced164927 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -14,6 +14,14 @@ // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// +def SDT_StoreMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 3>, + SDTCisPtrTy<2>, + SDTCisVT<3, XLenVT>]>; + +def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_StoreMultiple, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + def uimm5nonzero : RISCVOp<XLenVT>, ImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5, "NonZero">; diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp index 6ecddad72c078..edfa2992711a0 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "RISCVSelectionDAGInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/SelectionDAG.h" #define GET_SDNODE_DESC #include "RISCVGenSDNodeInfo.inc" @@ -62,3 +64,102 @@ void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, } #endif } + +SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo) const { + const RISCVSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); + // We currently do this only for Xqcilsm + if (!Subtarget.hasVendorXqcilsm()) + return SDValue(); + + // Do this only if we know the size at compile time. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + + uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue(); + + // Do this only if it is word aligned and we write multiple of 4 bytes. + if (!((Alignment.value() & 3) == 0 && (NumberOfBytesToWrite & 3) == 0)) + return SDValue(); + + SmallVector<SDValue, 8> OutChains; + SDValue SizeWords, OffsetSetwmi; + SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + int NumberOfWords = NumberOfBytesToWrite / 4; + + // Helper for constructing the QC_SETWMI instruction + auto getSetwmiNode = [&](SDValue SizeWords, SDValue OffsetSetwmi) -> SDValue { + SDValue Ops[] = {Chain, SrcValueReplicated, Dst, SizeWords, OffsetSetwmi}; + return DAG.getNode(RISCVISD::QC_SETWMI, dl, MVT::Other, Ops); + }; + + bool IsZeroVal = + isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero(); + + // If i8 type and constant non-zero value. + if ((Src.getValueType() == MVT::i8) && !IsZeroVal) + // Replicate byte to word by multiplication with 0x01010101. + SrcValueReplicated = DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated, + DAG.getConstant(16843009, dl, MVT::i32)); + + // We limit a QC_SETWMI to 16 words or less to improve interruptibility. + // So for 1-16 words we use a single QC_SETWMI: + // + // QC_SETWMI reg1, N, 0(reg2) + // + // For 17-32 words we use two QC_SETWMI's with the first as 16 words and the + // second for the remainder: + // + // QC_SETWMI reg1, 16, 0(reg2) + // QC_SETWMI reg1, 32-N, 64(reg2) + // + // For 33-48 words, we would like to use (16, 16, n), but that means the last + // QC_SETWMI needs an offset of 128 which the instruction doesnt support. + // So in this case we use a length of 15 for the second instruction and we do + // the rest with the third instruction. + // This means the maximum inlined number of words is 47 (for now): + // + // QC_SETWMI R2, R0, 16, 0 + // QC_SETWMI R2, R0, 15, 64 + // QC_SETWMI R2, R0, N, 124 + // + // For 48 words or more, call the target independent memset + if (NumberOfWords <= 16) { + // 1 - 16 words + SizeWords = DAG.getTargetConstant(NumberOfWords, dl, MVT::i32); + SDValue OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); + return getSetwmiNode(SizeWords, OffsetSetwmi); + } else if (NumberOfWords <= 47) { + if (NumberOfWords <= 32) { + // 17 - 32 words + SizeWords = DAG.getTargetConstant(NumberOfWords - 16, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + + SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + } else { + // 33 - 47 words + SizeWords = DAG.getTargetConstant(NumberOfWords - 31, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(124, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + + SizeWords = DAG.getTargetConstant(15, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(64, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + + SizeWords = DAG.getTargetConstant(16, dl, MVT::i32); + OffsetSetwmi = DAG.getTargetConstant(0, dl, MVT::i32); + OutChains.push_back(getSetwmiNode(SizeWords, OffsetSetwmi)); + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + } + + // >= 48 words. Call target independent memset. + return SDValue(); +} diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h index 641189f8661c1..08c8d11f2b108 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h @@ -34,6 +34,12 @@ class RISCVSelectionDAGInfo : public SelectionDAGGenTargetInfo { void verifyTargetNode(const SelectionDAG &DAG, const SDNode *N) const override; + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo) const override; + bool hasPassthruOp(unsigned Opcode) const { return GenNodeInfo.getDesc(Opcode).TSFlags & RISCVISD::HasPassthruOpMask; } diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll new file mode 100644 index 0000000000000..b0107cc1a4e03 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll @@ -0,0 +1,929 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I + +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilsm < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32IXQCISLS + +%struct.anon = type { [16 x i32] } +%struct.anon.0 = type { [47 x i32] } +%struct.anon.1 = type { [48 x i32] } +%struct.anon.2 = type { [64 x i8] } +%struct.struct1_t = type { [16 x i32] } + +@struct1 = common dso_local local_unnamed_addr global %struct.anon zeroinitializer, align 4 +@struct4b = common dso_local local_unnamed_addr global %struct.anon.0 zeroinitializer, align 4 +@struct4b1 = common dso_local local_unnamed_addr global %struct.anon.1 zeroinitializer, align 4 +@struct2 = common dso_local local_unnamed_addr global %struct.anon.2 zeroinitializer, align 1 +@arr1 = common dso_local local_unnamed_addr global [100 x i32] zeroinitializer, align 4 +@struct1_ = common dso_local local_unnamed_addr global %struct.struct1_t zeroinitializer, align 4 + +define void @test1(ptr nocapture %p, i32 %n) nounwind { +; RV32I-LABEL: test1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test1: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: mv a2, a1 +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 1 %p, i8 0, i32 %n, i1 false) + ret void +} + +declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1) + +define void @test2(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test2: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, 678490 +; RV32IXQCISLS-NEXT: addi a1, a1, 1445 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false) + ret void +} + +define void @test2a(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 188 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test2a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, 678490 +; RV32IXQCISLS-NEXT: addi a1, a1, 1445 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 15, 64(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 124(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 188, i1 false) + ret void +} + +define void @test2b(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2b: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 192 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test2b: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: li a1, 165 +; RV32IXQCISLS-NEXT: li a2, 192 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 192, i1 false) + ret void +} + +define void @test2c(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test2c: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, 678490 +; RV32IXQCISLS-NEXT: addi a1, a1, 1445 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false) + ret void +} + +define void @test2d(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2d: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, -91 +; RV32I-NEXT: lui a2, 1048570 +; RV32I-NEXT: lui a3, 678490 +; RV32I-NEXT: addi a2, a2, 1445 +; RV32I-NEXT: addi a3, a3, 1445 +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sh a2, 8(a0) +; RV32I-NEXT: sb a1, 10(a0) +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test2d: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: li a1, -91 +; RV32IXQCISLS-NEXT: lui a2, 1048570 +; RV32IXQCISLS-NEXT: lui a3, 678490 +; RV32IXQCISLS-NEXT: addi a2, a2, 1445 +; RV32IXQCISLS-NEXT: addi a3, a3, 1445 +; RV32IXQCISLS-NEXT: sw a3, 0(a0) +; RV32IXQCISLS-NEXT: sw a3, 4(a0) +; RV32IXQCISLS-NEXT: sh a2, 8(a0) +; RV32IXQCISLS-NEXT: sb a1, 10(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 11, i1 false) + ret void +} + + +define ptr @test3(ptr %p) nounwind { +; RV32I-LABEL: test3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a2, 256 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test3: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: li a2, 256 +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 256, i1 false) + ret ptr %p +} + +define ptr @test3a(ptr %p) nounwind { +; RV32I-LABEL: test3a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test3a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 64(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 128, i1 false) + ret ptr %p +} + +define void @test4() nounwind { +; RV32I-LABEL: test4: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(struct1) +; RV32I-NEXT: addi a0, a0, %lo(struct1) +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test4: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(struct1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(struct1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @struct1, i8 0, i32 64, i1 false) + ret void +} + +define void @test4a(ptr nocapture %s) nounwind { +; RV32I-LABEL: test4a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 166 +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test4a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, 682602 +; RV32IXQCISLS-NEXT: addi a1, a1, 1702 +; RV32IXQCISLS-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCISLS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %s, i8 -90, i32 64, i1 false) + ret void +} + +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) + +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) + +define void @test4b() nounwind { +; RV32I-LABEL: test4b: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a0, %hi(struct4b) +; RV32I-NEXT: addi a0, a0, %lo(struct4b) +; RV32I-NEXT: li a2, 188 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call memset +; RV32I-NEXT: lui a0, %hi(struct4b1) +; RV32I-NEXT: addi a0, a0, %lo(struct4b1) +; RV32I-NEXT: li a2, 192 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test4b: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a1, %hi(struct4b) +; RV32IXQCISLS-NEXT: addi a1, a1, %lo(struct4b) +; RV32IXQCISLS-NEXT: lui a0, %hi(struct4b1) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(struct4b1) +; RV32IXQCISLS-NEXT: li a2, 192 +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 0(a1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 15, 64(a1) +; RV32IXQCISLS-NEXT: qc.setwmi zero, 16, 124(a1) +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b, i8 0, i32 188, i1 false) + tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b1, i8 0, i32 192, i1 false) + ret void +} + +define void @test5() nounwind { +; RV32I-LABEL: test5: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(struct2) +; RV32I-NEXT: addi a0, a0, %lo(struct2) +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCISLS-LABEL: test5: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: lui a0, %hi(struct2) +; RV32IXQCISLS-NEXT: addi a0, a0, %lo(struct2) +; RV32IXQCISLS-NEXT: li a2, 64 +; RV32IXQCISLS-NEXT: li a1, 0 +; RV32IXQCISLS-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 1 @struct2, i8 0, i32 64, i1 false) + ret void +} + +define i32 @test6() nounwind { +; RV32I-LABEL: test6: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sw zero, 12(sp) +; RV32IXQCISLS-NEXT: li a0, 0 +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i32, align 4 + call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false) + %0 = load i32, ptr %x, align 4 + ret i32 %0 +} + +define i32 @test6a() nounwind { +; RV32I-LABEL: test6a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: lw a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6a: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sw zero, 12(sp) +; RV32IXQCISLS-NEXT: lw a0, 12(sp) +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i32, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) + store i32 0, ptr %x, align 4 + %x.0.x.0. = load volatile i32, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x) + ret i32 %x.0.x.0. +} + +define zeroext i8 @test6b_c() nounwind { +; RV32I-LABEL: test6b_c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sb zero, 12(sp) +; RV32I-NEXT: lbu a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6b_c: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sb zero, 12(sp) +; RV32IXQCISLS-NEXT: lbu a0, 12(sp) +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i8, align 4 + call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %x) + call void @llvm.memset.p0.i32(ptr nonnull align 4 %x, i8 0, i32 1, i1 false) + %x.0.x.0. = load volatile i8, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %x) + ret i8 %x.0.x.0. +} + +define signext i16 @test6b_s() nounwind { +; RV32I-LABEL: test6b_s: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sh zero, 12(sp) +; RV32I-NEXT: lh a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6b_s: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32IXQCISLS-NEXT: sh zero, 12(sp) +; RV32IXQCISLS-NEXT: lh a0, 12(sp) +; RV32IXQCISLS-NEXT: addi sp, sp, 16 +; RV32IXQCISLS-NEXT: ret +entry: + %x = alloca i16, align 4 + call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %x) + store i16 0, ptr %x, align 4 + %x.0.x.0. = load volatile i16, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %x) + ret i16 %x.0.x.0. +} + +define i32 @test6b_l() nounwind { +; RV32I-LABEL: test6b_l: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: lw a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCISLS-LABEL: test6b_l: +; RV32IXQCISLS: # %bb.0: # %entry +; RV32IXQCISLS-NEXT: addi sp, sp, -16 +; RV32I... [truncated] |
lenary left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. Just some small nits, and one place where the logic/testing needs an addition.
| ✅ With the latest revision this PR passed the C/C++ code formatter. |
lenary left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm happy with this, but I think the MachinePointerInfo is not quite being handled correctly. Suggestion inline (which will need reformatting).
lenary left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks!
topperc left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/16511 Here is the relevant piece of the build log for the reference |
This patch adds support for converting memset calls to one or more
QC_SETWMIinstructions when beneficial. We only handle aligned memset calls for now.We limit a
QC_SETWMIto 16 words or less to improve interruptibility.So for
1-16words we use a singleQC_SETWMI:QC_SETWMI reg1, N, 0(reg2)For
17-32words we use twoQC_SETWMI'swith the first as 16 words and the second for the remainder:For
33-48words, we would like to use(16, 16, n), but that means the last QC_SETWMI needs an offset of128which the instruction doesn't support. So in this case we use a length of15for the second instruction and we do the rest with the third instruction.This means the maximum number of words handled is
47(for now):For
48words or more, call the target independent memset.