@@ -58,6 +58,182 @@ define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 ze
5858 ret <4 x i1 > %v
5959}
6060
61+ define <8 x i1 > @vpmerge_vv_v8i1 (<8 x i1 > %va , <8 x i1 > %vb , <8 x i1 > %m , i32 zeroext %evl ) {
62+ ; RV32-LABEL: vpmerge_vv_v8i1:
63+ ; RV32: # %bb.0:
64+ ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
65+ ; RV32-NEXT: vid.v v10
66+ ; RV32-NEXT: vmsltu.vx v12, v10, a0
67+ ; RV32-NEXT: vmand.mm v9, v9, v12
68+ ; RV32-NEXT: vmandn.mm v8, v8, v9
69+ ; RV32-NEXT: vmand.mm v9, v0, v9
70+ ; RV32-NEXT: vmor.mm v0, v9, v8
71+ ; RV32-NEXT: ret
72+ ;
73+ ; RV64-LABEL: vpmerge_vv_v8i1:
74+ ; RV64: # %bb.0:
75+ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
76+ ; RV64-NEXT: vid.v v12
77+ ; RV64-NEXT: vmsltu.vx v10, v12, a0
78+ ; RV64-NEXT: vmand.mm v9, v9, v10
79+ ; RV64-NEXT: vmandn.mm v8, v8, v9
80+ ; RV64-NEXT: vmand.mm v9, v0, v9
81+ ; RV64-NEXT: vmor.mm v0, v9, v8
82+ ; RV64-NEXT: ret
83+ ;
84+ ; RV32ZVFHMIN-LABEL: vpmerge_vv_v8i1:
85+ ; RV32ZVFHMIN: # %bb.0:
86+ ; RV32ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
87+ ; RV32ZVFHMIN-NEXT: vid.v v10
88+ ; RV32ZVFHMIN-NEXT: vmsltu.vx v12, v10, a0
89+ ; RV32ZVFHMIN-NEXT: vmand.mm v9, v9, v12
90+ ; RV32ZVFHMIN-NEXT: vmandn.mm v8, v8, v9
91+ ; RV32ZVFHMIN-NEXT: vmand.mm v9, v0, v9
92+ ; RV32ZVFHMIN-NEXT: vmor.mm v0, v9, v8
93+ ; RV32ZVFHMIN-NEXT: ret
94+ ;
95+ ; RV64ZVFHMIN-LABEL: vpmerge_vv_v8i1:
96+ ; RV64ZVFHMIN: # %bb.0:
97+ ; RV64ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
98+ ; RV64ZVFHMIN-NEXT: vid.v v12
99+ ; RV64ZVFHMIN-NEXT: vmsltu.vx v10, v12, a0
100+ ; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v10
101+ ; RV64ZVFHMIN-NEXT: vmandn.mm v8, v8, v9
102+ ; RV64ZVFHMIN-NEXT: vmand.mm v9, v0, v9
103+ ; RV64ZVFHMIN-NEXT: vmor.mm v0, v9, v8
104+ ; RV64ZVFHMIN-NEXT: ret
105+ %v = call <8 x i1 > @llvm.vp.merge.v8i1 (<8 x i1 > %m , <8 x i1 > %va , <8 x i1 > %vb , i32 %evl )
106+ ret <8 x i1 > %v
107+ }
108+
109+ define <16 x i1 > @vpmerge_vv_v16i1 (<16 x i1 > %va , <16 x i1 > %vb , <16 x i1 > %m , i32 zeroext %evl ) {
110+ ; RV32-LABEL: vpmerge_vv_v16i1:
111+ ; RV32: # %bb.0:
112+ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
113+ ; RV32-NEXT: vid.v v12
114+ ; RV32-NEXT: vmsltu.vx v10, v12, a0
115+ ; RV32-NEXT: vmand.mm v9, v9, v10
116+ ; RV32-NEXT: vmandn.mm v8, v8, v9
117+ ; RV32-NEXT: vmand.mm v9, v0, v9
118+ ; RV32-NEXT: vmor.mm v0, v9, v8
119+ ; RV32-NEXT: ret
120+ ;
121+ ; RV64-LABEL: vpmerge_vv_v16i1:
122+ ; RV64: # %bb.0:
123+ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
124+ ; RV64-NEXT: vid.v v16
125+ ; RV64-NEXT: vmsltu.vx v10, v16, a0
126+ ; RV64-NEXT: vmand.mm v9, v9, v10
127+ ; RV64-NEXT: vmandn.mm v8, v8, v9
128+ ; RV64-NEXT: vmand.mm v9, v0, v9
129+ ; RV64-NEXT: vmor.mm v0, v9, v8
130+ ; RV64-NEXT: ret
131+ ;
132+ ; RV32ZVFHMIN-LABEL: vpmerge_vv_v16i1:
133+ ; RV32ZVFHMIN: # %bb.0:
134+ ; RV32ZVFHMIN-NEXT: vsetivli zero, 16, e32, m4, ta, ma
135+ ; RV32ZVFHMIN-NEXT: vid.v v12
136+ ; RV32ZVFHMIN-NEXT: vmsltu.vx v10, v12, a0
137+ ; RV32ZVFHMIN-NEXT: vmand.mm v9, v9, v10
138+ ; RV32ZVFHMIN-NEXT: vmandn.mm v8, v8, v9
139+ ; RV32ZVFHMIN-NEXT: vmand.mm v9, v0, v9
140+ ; RV32ZVFHMIN-NEXT: vmor.mm v0, v9, v8
141+ ; RV32ZVFHMIN-NEXT: ret
142+ ;
143+ ; RV64ZVFHMIN-LABEL: vpmerge_vv_v16i1:
144+ ; RV64ZVFHMIN: # %bb.0:
145+ ; RV64ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma
146+ ; RV64ZVFHMIN-NEXT: vid.v v16
147+ ; RV64ZVFHMIN-NEXT: vmsltu.vx v10, v16, a0
148+ ; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v10
149+ ; RV64ZVFHMIN-NEXT: vmandn.mm v8, v8, v9
150+ ; RV64ZVFHMIN-NEXT: vmand.mm v9, v0, v9
151+ ; RV64ZVFHMIN-NEXT: vmor.mm v0, v9, v8
152+ ; RV64ZVFHMIN-NEXT: ret
153+ %v = call <16 x i1 > @llvm.vp.merge.v16i1 (<16 x i1 > %m , <16 x i1 > %va , <16 x i1 > %vb , i32 %evl )
154+ ret <16 x i1 > %v
155+ }
156+
157+ define <32 x i1 > @vpmerge_vv_v32i1 (<32 x i1 > %va , <32 x i1 > %vb , <32 x i1 > %m , i32 zeroext %evl ) {
158+ ; RV32-LABEL: vpmerge_vv_v32i1:
159+ ; RV32: # %bb.0:
160+ ; RV32-NEXT: li a1, 32
161+ ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
162+ ; RV32-NEXT: vid.v v16
163+ ; RV32-NEXT: vmsltu.vx v10, v16, a0
164+ ; RV32-NEXT: vmand.mm v9, v9, v10
165+ ; RV32-NEXT: vmandn.mm v8, v8, v9
166+ ; RV32-NEXT: vmand.mm v9, v0, v9
167+ ; RV32-NEXT: vmor.mm v0, v9, v8
168+ ; RV32-NEXT: ret
169+ ;
170+ ; RV64-LABEL: vpmerge_vv_v32i1:
171+ ; RV64: # %bb.0:
172+ ; RV64-NEXT: vsetvli a1, zero, e8, m2, ta, ma
173+ ; RV64-NEXT: vmv.v.i v10, 0
174+ ; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma
175+ ; RV64-NEXT: vmerge.vim v12, v10, 1, v0
176+ ; RV64-NEXT: vmv1r.v v0, v8
177+ ; RV64-NEXT: vsetvli a1, zero, e8, m2, ta, ma
178+ ; RV64-NEXT: vmerge.vim v10, v10, 1, v0
179+ ; RV64-NEXT: vmv1r.v v0, v9
180+ ; RV64-NEXT: vsetvli zero, a0, e8, m2, tu, ma
181+ ; RV64-NEXT: vmerge.vvm v10, v10, v12, v0
182+ ; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma
183+ ; RV64-NEXT: vmsne.vi v0, v10, 0
184+ ; RV64-NEXT: ret
185+ ;
186+ ; RV32ZVFHMIN-LABEL: vpmerge_vv_v32i1:
187+ ; RV32ZVFHMIN: # %bb.0:
188+ ; RV32ZVFHMIN-NEXT: li a1, 32
189+ ; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma
190+ ; RV32ZVFHMIN-NEXT: vid.v v16
191+ ; RV32ZVFHMIN-NEXT: vmsltu.vx v10, v16, a0
192+ ; RV32ZVFHMIN-NEXT: vmand.mm v9, v9, v10
193+ ; RV32ZVFHMIN-NEXT: vmandn.mm v8, v8, v9
194+ ; RV32ZVFHMIN-NEXT: vmand.mm v9, v0, v9
195+ ; RV32ZVFHMIN-NEXT: vmor.mm v0, v9, v8
196+ ; RV32ZVFHMIN-NEXT: ret
197+ ;
198+ ; RV64ZVFHMIN-LABEL: vpmerge_vv_v32i1:
199+ ; RV64ZVFHMIN: # %bb.0:
200+ ; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e8, m2, ta, ma
201+ ; RV64ZVFHMIN-NEXT: vmv.v.i v10, 0
202+ ; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e8, m2, ta, ma
203+ ; RV64ZVFHMIN-NEXT: vmerge.vim v12, v10, 1, v0
204+ ; RV64ZVFHMIN-NEXT: vmv1r.v v0, v8
205+ ; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e8, m2, ta, ma
206+ ; RV64ZVFHMIN-NEXT: vmerge.vim v10, v10, 1, v0
207+ ; RV64ZVFHMIN-NEXT: vmv1r.v v0, v9
208+ ; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e8, m2, tu, ma
209+ ; RV64ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0
210+ ; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e8, m2, ta, ma
211+ ; RV64ZVFHMIN-NEXT: vmsne.vi v0, v10, 0
212+ ; RV64ZVFHMIN-NEXT: ret
213+ %v = call <32 x i1 > @llvm.vp.merge.v32i1 (<32 x i1 > %m , <32 x i1 > %va , <32 x i1 > %vb , i32 %evl )
214+ ret <32 x i1 > %v
215+ }
216+
217+ define <64 x i1 > @vpmerge_vv_v64i1 (<64 x i1 > %va , <64 x i1 > %vb , <64 x i1 > %m , i32 zeroext %evl ) {
218+ ; CHECK-LABEL: vpmerge_vv_v64i1:
219+ ; CHECK: # %bb.0:
220+ ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
221+ ; CHECK-NEXT: vmv.v.i v12, 0
222+ ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
223+ ; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
224+ ; CHECK-NEXT: vmv1r.v v0, v8
225+ ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
226+ ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
227+ ; CHECK-NEXT: vmv1r.v v0, v9
228+ ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma
229+ ; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0
230+ ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
231+ ; CHECK-NEXT: vmsne.vi v0, v12, 0
232+ ; CHECK-NEXT: ret
233+ %v = call <64 x i1 > @llvm.vp.merge.v64i1 (<64 x i1 > %m , <64 x i1 > %va , <64 x i1 > %vb , i32 %evl )
234+ ret <64 x i1 > %v
235+ }
236+
61237declare <2 x i8 > @llvm.vp.merge.v2i8 (<2 x i1 >, <2 x i8 >, <2 x i8 >, i32 )
62238
63239define <2 x i8 > @vpmerge_vv_v2i8 (<2 x i8 > %va , <2 x i8 > %vb , <2 x i1 > %m , i32 zeroext %evl ) {
@@ -1188,10 +1364,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
11881364; CHECK-NEXT: vle64.v v8, (a0)
11891365; CHECK-NEXT: li a1, 16
11901366; CHECK-NEXT: mv a0, a2
1191- ; CHECK-NEXT: bltu a2, a1, .LBB79_2
1367+ ; CHECK-NEXT: bltu a2, a1, .LBB83_2
11921368; CHECK-NEXT: # %bb.1:
11931369; CHECK-NEXT: li a0, 16
1194- ; CHECK-NEXT: .LBB79_2 :
1370+ ; CHECK-NEXT: .LBB83_2 :
11951371; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma
11961372; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
11971373; CHECK-NEXT: addi a0, a2, -16
@@ -1221,10 +1397,10 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1>
12211397; CHECK: # %bb.0:
12221398; CHECK-NEXT: li a2, 16
12231399; CHECK-NEXT: mv a1, a0
1224- ; CHECK-NEXT: bltu a0, a2, .LBB80_2
1400+ ; CHECK-NEXT: bltu a0, a2, .LBB84_2
12251401; CHECK-NEXT: # %bb.1:
12261402; CHECK-NEXT: li a1, 16
1227- ; CHECK-NEXT: .LBB80_2 :
1403+ ; CHECK-NEXT: .LBB84_2 :
12281404; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
12291405; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
12301406; CHECK-NEXT: addi a1, a0, -16
0 commit comments