- Notifications
You must be signed in to change notification settings - Fork 15.3k
[LoongArch] Optimize for scalar type ctpop when lsx enabled #166286
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
zhaoqi5 wants to merge 1 commit into main Choose a base branch from users/zhaoqi5/opt-scalar-ctpop-with-lsx
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline, and old review comments may become outdated.
+112 −114
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Member
| @llvm/pr-subscribers-backend-loongarch Author: ZhaoQi (zhaoqi5) ChangesFull diff: https://github.com/llvm/llvm-project/pull/166286.diff 5 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index fe700e17d341b..9c55ea35b34ce 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -7038,29 +7038,40 @@ static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI, MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); + + unsigned BroadcastOp, CTOp, PickOp; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case LoongArch::PseudoCTPOP_B: + BroadcastOp = LoongArch::VREPLGR2VR_B; + CTOp = LoongArch::VPCNT_B; + PickOp = LoongArch::VPICKVE2GR_B; + break; + case LoongArch::PseudoCTPOP_H: + case LoongArch::PseudoCTPOP_H_LA32: + BroadcastOp = LoongArch::VREPLGR2VR_H; + CTOp = LoongArch::VPCNT_H; + PickOp = LoongArch::VPICKVE2GR_H; + break; + case LoongArch::PseudoCTPOP_W: + case LoongArch::PseudoCTPOP_W_LA32: + BroadcastOp = LoongArch::VREPLGR2VR_W; + CTOp = LoongArch::VPCNT_W; + PickOp = LoongArch::VPICKVE2GR_W; + break; + case LoongArch::PseudoCTPOP_D: + BroadcastOp = LoongArch::VREPLGR2VR_D; + CTOp = LoongArch::VPCNT_D; + PickOp = LoongArch::VPICKVE2GR_D; + break; + } + Register ScratchReg1 = MRI.createVirtualRegister(RC); Register ScratchReg2 = MRI.createVirtualRegister(RC); - Register ScratchReg3 = MRI.createVirtualRegister(RC); - - BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0); - BuildMI(*BB, MI, DL, - TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D - : LoongArch::VINSGR2VR_W), - ScratchReg2) - .addReg(ScratchReg1) - .addReg(Src) - .addImm(0); - BuildMI( - *BB, MI, DL, - TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W), - ScratchReg3) - .addReg(ScratchReg2); - BuildMI(*BB, MI, DL, - TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D - : LoongArch::VPICKVE2GR_W), - Dst) - .addReg(ScratchReg3) - .addImm(0); + BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Src); + BuildMI(*BB, MI, DL, TII->get(CTOp), ScratchReg2).addReg(ScratchReg1); + BuildMI(*BB, MI, DL, TII->get(PickOp), Dst).addReg(ScratchReg2).addImm(0); MI.eraseFromParent(); return BB; @@ -7432,7 +7443,12 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( case LoongArch::PseudoXVINSGR2VR_B: case LoongArch::PseudoXVINSGR2VR_H: return emitPseudoXVINSGR2VR(MI, BB, Subtarget); - case LoongArch::PseudoCTPOP: + case LoongArch::PseudoCTPOP_B: + case LoongArch::PseudoCTPOP_H: + case LoongArch::PseudoCTPOP_W: + case LoongArch::PseudoCTPOP_D: + case LoongArch::PseudoCTPOP_H_LA32: + case LoongArch::PseudoCTPOP_W_LA32: return emitPseudoCTPOP(MI, BB, Subtarget); case LoongArch::PseudoVMSKLTZ_B: case LoongArch::PseudoVMSKLTZ_H: diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 6b74a4b5e5f6f..14543b3e1f5a8 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1271,9 +1271,27 @@ def PseudoVBZ_W : VecCond<loongarch_vall_zero, v4i32>; def PseudoVBZ_D : VecCond<loongarch_vall_zero, v2i64>; def PseudoVBZ : VecCond<loongarch_vany_zero, v16i8>; -let usesCustomInserter = 1 in -def PseudoCTPOP : Pseudo<(outs GPR:$rd), (ins GPR:$rj), - [(set GPR:$rd, (ctpop GPR:$rj))]>; +let usesCustomInserter = 1 in { +def PseudoCTPOP_B : Pseudo<(outs GPR:$rd), (ins GPR:$rj), + [(set GPR:$rd, (ctpop (and GPR:$rj, 255)))]>; +def PseudoCTPOP_H : Pseudo<(outs GPR:$rd), (ins GPR:$rj), + [(set GPR:$rd, (ctpop (loongarch_bstrpick GRLenVT:$rj, + (GRLenVT 15), (GRLenVT 0))))]>; +let Predicates = [IsLA32] in { +def PseudoCTPOP_H_LA32 : Pseudo<(outs GPR:$rd), (ins GPR:$rj), + [(set GPR:$rd, (ctpop (and GPR:$rj, 65535)))]>; +def PseudoCTPOP_W_LA32 : Pseudo<(outs GPR:$rd), (ins GPR:$rj), + [(set GPR:$rd, (ctpop GPR:$rj))]>; +} // Predicates = [IsLA32] + +let Predicates = [IsLA64] in { +def PseudoCTPOP_W : Pseudo<(outs GPR:$rd), (ins GPR:$rj), + [(set GPR:$rd, (ctpop (loongarch_bstrpick i64:$rj, + (i64 31), (i64 0))))]>; +def PseudoCTPOP_D : Pseudo<(outs GPR:$rd), (ins GPR:$rj), + [(set GPR:$rd, (ctpop GPR:$rj))]>; +} // Predicates = [IsLA64] +} // usesCustomInserter = 1 let usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def PseudoVMSKLTZ_B : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>; diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll index 27be02c50f1c7..62dbeef42547d 100644 --- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll @@ -510,11 +510,9 @@ define i8 @test_ctpop_i8(i8 %a) nounwind { ; ; LA64-LABEL: test_ctpop_i8: ; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: vldi $vr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vpcnt.d $vr0, $vr0 -; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 +; LA64-NEXT: vreplgr2vr.b $vr0, $a0 +; LA64-NEXT: vpcnt.b $vr0, $vr0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i8 @llvm.ctpop.i8(i8 %a) ret i8 %1 @@ -564,11 +562,9 @@ define i16 @test_ctpop_i16(i16 %a) nounwind { ; ; LA64-LABEL: test_ctpop_i16: ; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vldi $vr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vpcnt.d $vr0, $vr0 -; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 +; LA64-NEXT: vreplgr2vr.h $vr0, $a0 +; LA64-NEXT: vpcnt.h $vr0, $vr0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i16 @llvm.ctpop.i16(i16 %a) ret i16 %1 @@ -625,11 +621,9 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; ; LA64-LABEL: test_ctpop_i32: ; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vldi $vr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vpcnt.d $vr0, $vr0 -; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 +; LA64-NEXT: vreplgr2vr.w $vr0, $a0 +; LA64-NEXT: vpcnt.w $vr0, $vr0 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 @@ -714,8 +708,7 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; ; LA64-LABEL: test_ctpop_i64: ; LA64: # %bb.0: -; LA64-NEXT: vldi $vr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vreplgr2vr.d $vr0, $a0 ; LA64-NEXT: vpcnt.d $vr0, $vr0 ; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 ; LA64-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll b/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll index 150a6f16804d8..4bce0f4089d01 100644 --- a/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll +++ b/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll @@ -11,29 +11,23 @@ declare i64 @llvm.ctpop.i64(i64) define i8 @test_ctpop_i8(i8 %a) nounwind { ; LA32R-LABEL: test_ctpop_i8: ; LA32R: # %bb.0: -; LA32R-NEXT: andi $a0, $a0, 255 -; LA32R-NEXT: vldi $vr0, 0 -; LA32R-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; LA32R-NEXT: vpcnt.w $vr0, $vr0 -; LA32R-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA32R-NEXT: vreplgr2vr.b $vr0, $a0 +; LA32R-NEXT: vpcnt.b $vr0, $vr0 +; LA32R-NEXT: vpickve2gr.b $a0, $vr0, 0 ; LA32R-NEXT: ret ; ; LA32S-LABEL: test_ctpop_i8: ; LA32S: # %bb.0: -; LA32S-NEXT: andi $a0, $a0, 255 -; LA32S-NEXT: vldi $vr0, 0 -; LA32S-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; LA32S-NEXT: vpcnt.w $vr0, $vr0 -; LA32S-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA32S-NEXT: vreplgr2vr.b $vr0, $a0 +; LA32S-NEXT: vpcnt.b $vr0, $vr0 +; LA32S-NEXT: vpickve2gr.b $a0, $vr0, 0 ; LA32S-NEXT: ret ; ; LA64-LABEL: test_ctpop_i8: ; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: vldi $vr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vpcnt.d $vr0, $vr0 -; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 +; LA64-NEXT: vreplgr2vr.b $vr0, $a0 +; LA64-NEXT: vpcnt.b $vr0, $vr0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i8 @llvm.ctpop.i8(i8 %a) ret i8 %1 @@ -42,31 +36,23 @@ define i8 @test_ctpop_i8(i8 %a) nounwind { define i16 @test_ctpop_i16(i16 %a) nounwind { ; LA32R-LABEL: test_ctpop_i16: ; LA32R: # %bb.0: -; LA32R-NEXT: lu12i.w $a1, 15 -; LA32R-NEXT: ori $a1, $a1, 4095 -; LA32R-NEXT: and $a0, $a0, $a1 -; LA32R-NEXT: vldi $vr0, 0 -; LA32R-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; LA32R-NEXT: vpcnt.w $vr0, $vr0 -; LA32R-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA32R-NEXT: vreplgr2vr.h $vr0, $a0 +; LA32R-NEXT: vpcnt.h $vr0, $vr0 +; LA32R-NEXT: vpickve2gr.h $a0, $vr0, 0 ; LA32R-NEXT: ret ; ; LA32S-LABEL: test_ctpop_i16: ; LA32S: # %bb.0: -; LA32S-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32S-NEXT: vldi $vr0, 0 -; LA32S-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; LA32S-NEXT: vpcnt.w $vr0, $vr0 -; LA32S-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA32S-NEXT: vreplgr2vr.h $vr0, $a0 +; LA32S-NEXT: vpcnt.h $vr0, $vr0 +; LA32S-NEXT: vpickve2gr.h $a0, $vr0, 0 ; LA32S-NEXT: ret ; ; LA64-LABEL: test_ctpop_i16: ; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: vldi $vr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vpcnt.d $vr0, $vr0 -; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 +; LA64-NEXT: vreplgr2vr.h $vr0, $a0 +; LA64-NEXT: vpcnt.h $vr0, $vr0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i16 @llvm.ctpop.i16(i16 %a) ret i16 %1 @@ -75,27 +61,23 @@ define i16 @test_ctpop_i16(i16 %a) nounwind { define i32 @test_ctpop_i32(i32 %a) nounwind { ; LA32R-LABEL: test_ctpop_i32: ; LA32R: # %bb.0: -; LA32R-NEXT: vldi $vr0, 0 -; LA32R-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; LA32R-NEXT: vreplgr2vr.w $vr0, $a0 ; LA32R-NEXT: vpcnt.w $vr0, $vr0 ; LA32R-NEXT: vpickve2gr.w $a0, $vr0, 0 ; LA32R-NEXT: ret ; ; LA32S-LABEL: test_ctpop_i32: ; LA32S: # %bb.0: -; LA32S-NEXT: vldi $vr0, 0 -; LA32S-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; LA32S-NEXT: vreplgr2vr.w $vr0, $a0 ; LA32S-NEXT: vpcnt.w $vr0, $vr0 ; LA32S-NEXT: vpickve2gr.w $a0, $vr0, 0 ; LA32S-NEXT: ret ; ; LA64-LABEL: test_ctpop_i32: ; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: vldi $vr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vpcnt.d $vr0, $vr0 -; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 +; LA64-NEXT: vreplgr2vr.w $vr0, $a0 +; LA64-NEXT: vpcnt.w $vr0, $vr0 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 @@ -104,12 +86,10 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { define i64 @test_ctpop_i64(i64 %a) nounwind { ; LA32R-LABEL: test_ctpop_i64: ; LA32R: # %bb.0: -; LA32R-NEXT: vldi $vr0, 0 -; LA32R-NEXT: vldi $vr1, 0 -; LA32R-NEXT: vinsgr2vr.w $vr1, $a1, 0 -; LA32R-NEXT: vpcnt.w $vr1, $vr1 -; LA32R-NEXT: vpickve2gr.w $a1, $vr1, 0 -; LA32R-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; LA32R-NEXT: vreplgr2vr.w $vr0, $a1 +; LA32R-NEXT: vpcnt.w $vr0, $vr0 +; LA32R-NEXT: vpickve2gr.w $a1, $vr0, 0 +; LA32R-NEXT: vreplgr2vr.w $vr0, $a0 ; LA32R-NEXT: vpcnt.w $vr0, $vr0 ; LA32R-NEXT: vpickve2gr.w $a0, $vr0, 0 ; LA32R-NEXT: add.w $a0, $a0, $a1 @@ -118,12 +98,10 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; ; LA32S-LABEL: test_ctpop_i64: ; LA32S: # %bb.0: -; LA32S-NEXT: vldi $vr0, 0 -; LA32S-NEXT: vldi $vr1, 0 -; LA32S-NEXT: vinsgr2vr.w $vr1, $a1, 0 -; LA32S-NEXT: vpcnt.w $vr1, $vr1 -; LA32S-NEXT: vpickve2gr.w $a1, $vr1, 0 -; LA32S-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; LA32S-NEXT: vreplgr2vr.w $vr0, $a1 +; LA32S-NEXT: vpcnt.w $vr0, $vr0 +; LA32S-NEXT: vpickve2gr.w $a1, $vr0, 0 +; LA32S-NEXT: vreplgr2vr.w $vr0, $a0 ; LA32S-NEXT: vpcnt.w $vr0, $vr0 ; LA32S-NEXT: vpickve2gr.w $a0, $vr0, 0 ; LA32S-NEXT: add.w $a0, $a0, $a1 @@ -132,8 +110,7 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; ; LA64-LABEL: test_ctpop_i64: ; LA64: # %bb.0: -; LA64-NEXT: vldi $vr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vreplgr2vr.d $vr0, $a0 ; LA64-NEXT: vpcnt.d $vr0, $vr0 ; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 ; LA64-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/sextw-removal.ll b/llvm/test/CodeGen/LoongArch/sextw-removal.ll index 0c31ff9eee1f2..683d760721c3d 100644 --- a/llvm/test/CodeGen/LoongArch/sextw-removal.ll +++ b/llvm/test/CodeGen/LoongArch/sextw-removal.ll @@ -146,19 +146,17 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; CHECK-NEXT: sra.w $a1, $a0, $a1 +; CHECK-NEXT: sra.w $a0, $a0, $a1 ; CHECK-NEXT: .p2align 4, , 16 ; CHECK-NEXT: .LBB4_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: addi.w $a0, $a1, 0 ; CHECK-NEXT: pcaddu18i $ra, %call36(bar) ; CHECK-NEXT: jirl $ra, $ra, 0 -; CHECK-NEXT: bstrpick.d $a1, $a0, 31, 0 -; CHECK-NEXT: vldi $vr0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 -; CHECK-NEXT: vpcnt.d $vr0, $vr0 -; CHECK-NEXT: vpickve2gr.d $a1, $vr0, 0 -; CHECK-NEXT: bnez $a0, .LBB4_1 +; CHECK-NEXT: move $a1, $a0 +; CHECK-NEXT: vreplgr2vr.w $vr0, $a0 +; CHECK-NEXT: vpcnt.w $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0 +; CHECK-NEXT: bnez $a1, .LBB4_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -175,11 +173,9 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { ; NORMV-NEXT: addi.w $a0, $a1, 0 ; NORMV-NEXT: pcaddu18i $ra, %call36(bar) ; NORMV-NEXT: jirl $ra, $ra, 0 -; NORMV-NEXT: bstrpick.d $a1, $a0, 31, 0 -; NORMV-NEXT: vldi $vr0, 0 -; NORMV-NEXT: vinsgr2vr.d $vr0, $a1, 0 -; NORMV-NEXT: vpcnt.d $vr0, $vr0 -; NORMV-NEXT: vpickve2gr.d $a1, $vr0, 0 +; NORMV-NEXT: vreplgr2vr.w $vr0, $a0 +; NORMV-NEXT: vpcnt.w $vr0, $vr0 +; NORMV-NEXT: vpickve2gr.w $a1, $vr0, 0 ; NORMV-NEXT: bnez $a0, .LBB4_1 ; NORMV-NEXT: # %bb.2: # %bb7 ; NORMV-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload @@ -275,8 +271,7 @@ define void @test7(i32 signext %arg, i32 signext %arg1) nounwind { ; CHECK-NEXT: addi.w $a0, $a0, 0 ; CHECK-NEXT: pcaddu18i $ra, %call36(foo) ; CHECK-NEXT: jirl $ra, $ra, 0 -; CHECK-NEXT: vldi $vr0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: vreplgr2vr.d $vr0, $a0 ; CHECK-NEXT: vpcnt.d $vr0, $vr0 ; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 ; CHECK-NEXT: bnez $a0, .LBB6_1 @@ -296,8 +291,7 @@ define void @test7(i32 signext %arg, i32 signext %arg1) nounwind { ; NORMV-NEXT: addi.w $a0, $a0, 0 ; NORMV-NEXT: pcaddu18i $ra, %call36(foo) ; NORMV-NEXT: jirl $ra, $ra, 0 -; NORMV-NEXT: vldi $vr0, 0 -; NORMV-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; NORMV-NEXT: vreplgr2vr.d $vr0, $a0 ; NORMV-NEXT: vpcnt.d $vr0, $vr0 ; NORMV-NEXT: vpickve2gr.d $a0, $vr0, 0 ; NORMV-NEXT: bnez $a0, .LBB6_1 |
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Add this suggestion to a batch that can be applied as a single commit. This suggestion is invalid because no changes were made to the code. Suggestions cannot be applied while the pull request is closed. Suggestions cannot be applied while viewing a subset of changes. Only one suggestion per line can be applied in a batch. Add this suggestion to a batch that can be applied as a single commit. Applying suggestions on deleted lines is not supported. You must change the existing code in this line in order to create a valid suggestion. Outdated suggestions cannot be applied. This suggestion has been applied or marked resolved. Suggestions cannot be applied from pending reviews. Suggestions cannot be applied on multi-line comments. Suggestions cannot be applied while the pull request is queued to merge. Suggestion cannot be applied right now. Please check back later.
No description provided.