Skip to content

Commit 1574c17

Browse files
committed
[clang][LoongArch] Introduce LASX and LSX conversion intrinsics
This patch introduces the LASX and LSX conversion intrinsics: - __m256 __lasx_cast_128_s (__m128) - __m256d __lasx_cast_128_d (__m128d) - __m256i __lasx_cast_128 (__m128i) - __m256 __lasx_concat_128_s (__m128, __m128) - __m256d __lasx_concat_128_d (__m128, __m128d) - __m256i __lasx_concat_128 (__m128, __m128i) - __m128 __lasx_extract_128_lo_s (__m256) - __m128d __lasx_extract_128_lo_d (__m256d) - __m128i __lasx_extract_128_lo (__m256i) - __m128 __lasx_extract_128_hi_s (__m256) - __m128d __lasx_extract_128_hi_d (__m256d) - __m128i __lasx_extract_128_hi (__m256i) - __m256 __lasx_insert_128_lo_s (__m256, __m128) - __m256d __lasx_insert_128_lo_d (__m256d, __m128d) - __m256i __lasx_insert_128_lo (__m256i, __m128i) - __m256 __lasx_insert_128_hi_s (__m256, __m128) - __m256d __lasx_insert_128_hi_d (__m256d, __m128d) - __m256i __lasx_insert_128_hi (__m256i, __m128i)
1 parent a088c6f commit 1574c17

File tree

4 files changed

+439
-0
lines changed

4 files changed

+439
-0
lines changed

clang/include/clang/Basic/BuiltinsLoongArchLASX.def

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -986,3 +986,22 @@ TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx")
986986
TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx")
987987
TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx")
988988
TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx")
989+
990+
TARGET_BUILTIN(__builtin_lasx_cast_128_s, "V8fV4f", "nc", "lasx")
991+
TARGET_BUILTIN(__builtin_lasx_cast_128_d, "V4dV2d", "nc", "lasx")
992+
TARGET_BUILTIN(__builtin_lasx_cast_128, "V32ScV16Sc", "nc", "lasx")
993+
TARGET_BUILTIN(__builtin_lasx_concat_128_s, "V8fV4fV4f", "nc", "lasx")
994+
TARGET_BUILTIN(__builtin_lasx_concat_128_d, "V4dV2dV2d", "nc", "lasx")
995+
TARGET_BUILTIN(__builtin_lasx_concat_128, "V32ScV16ScV16Sc", "nc", "lasx")
996+
TARGET_BUILTIN(__builtin_lasx_extract_128_lo_s, "V4fV8f", "nc", "lasx")
997+
TARGET_BUILTIN(__builtin_lasx_extract_128_lo_d, "V2dV4d", "nc", "lasx")
998+
TARGET_BUILTIN(__builtin_lasx_extract_128_lo, "V16ScV32Sc", "nc", "lasx")
999+
TARGET_BUILTIN(__builtin_lasx_extract_128_hi_s, "V4fV8f", "nc", "lasx")
1000+
TARGET_BUILTIN(__builtin_lasx_extract_128_hi_d, "V2dV4d", "nc", "lasx")
1001+
TARGET_BUILTIN(__builtin_lasx_extract_128_hi, "V16ScV32Sc", "nc", "lasx")
1002+
TARGET_BUILTIN(__builtin_lasx_insert_128_lo_s, "V8fV8fV4f", "nc", "lasx")
1003+
TARGET_BUILTIN(__builtin_lasx_insert_128_lo_d, "V4dV4dV2d", "nc", "lasx")
1004+
TARGET_BUILTIN(__builtin_lasx_insert_128_lo, "V32ScV32ScV16Sc", "nc", "lasx")
1005+
TARGET_BUILTIN(__builtin_lasx_insert_128_hi_s, "V8fV8fV4f", "nc", "lasx")
1006+
TARGET_BUILTIN(__builtin_lasx_insert_128_hi_d, "V4dV4dV2d", "nc", "lasx")
1007+
TARGET_BUILTIN(__builtin_lasx_insert_128_hi, "V32ScV32ScV16Sc", "nc", "lasx")

clang/lib/Headers/lasxintrin.h

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#ifndef _LOONGSON_ASXINTRIN_H
1111
#define _LOONGSON_ASXINTRIN_H 1
1212

13+
#include <lsxintrin.h>
14+
1315
#if defined(__loongarch_asx)
1416

1517
typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
@@ -3882,5 +3884,113 @@ extern __inline
38823884

38833885
#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1)))
38843886

3887+
extern __inline
3888+
__attribute__((__gnu_inline__, __always_inline__,
3889+
__artificial__)) __m256 __lasx_cast_128_s(__m128 _1) {
3890+
return (__m256)__builtin_lasx_cast_128_s((v4f32)_1);
3891+
}
3892+
3893+
extern __inline
3894+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
3895+
__lasx_cast_128_d(__m128d _1) {
3896+
return (__m256d)__builtin_lasx_cast_128_d((v2f64)_1);
3897+
}
3898+
3899+
extern __inline
3900+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
3901+
__lasx_cast_128(__m128i _1) {
3902+
return (__m256i)__builtin_lasx_cast_128((v16i8)_1);
3903+
}
3904+
3905+
extern __inline
3906+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
3907+
__lasx_concat_128_s(__m128 _1, __m128 _2) {
3908+
return (__m256)__builtin_lasx_concat_128_s((v4f32)_1, (v4f32)_2);
3909+
}
3910+
3911+
extern __inline
3912+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
3913+
__lasx_concat_128_d(__m128d _1, __m128d _2) {
3914+
return (__m256d)__builtin_lasx_concat_128_d((v2f64)_1, (v2f64)_2);
3915+
}
3916+
3917+
extern __inline
3918+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
3919+
__lasx_concat_128(__m128i _1, __m128i _2) {
3920+
return (__m256i)__builtin_lasx_concat_128((v16i8)_1, (v16i8)_2);
3921+
}
3922+
3923+
extern __inline
3924+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
3925+
__lasx_extract_128_lo_s(__m256 _1) {
3926+
return (__m128)__builtin_lasx_extract_128_lo_s((v8f32)_1);
3927+
}
3928+
3929+
extern __inline
3930+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
3931+
__lasx_extract_128_lo_d(__m256d _1) {
3932+
return (__m128d)__builtin_lasx_extract_128_lo_d((v4f64)_1);
3933+
}
3934+
3935+
extern __inline
3936+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
3937+
__lasx_extract_128_lo(__m256i _1) {
3938+
return (__m128i)__builtin_lasx_extract_128_lo((v32i8)_1);
3939+
}
3940+
3941+
extern __inline
3942+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
3943+
__lasx_extract_128_hi_s(__m256 _1) {
3944+
return (__m128)__builtin_lasx_extract_128_hi_s((v8f32)_1);
3945+
}
3946+
3947+
extern __inline
3948+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
3949+
__lasx_extract_128_hi_d(__m256d _1) {
3950+
return (__m128d)__builtin_lasx_extract_128_hi_d((v4f64)_1);
3951+
}
3952+
3953+
extern __inline
3954+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
3955+
__lasx_extract_128_hi(__m256i _1) {
3956+
return (__m128i)__builtin_lasx_extract_128_hi((v32i8)_1);
3957+
}
3958+
3959+
extern __inline
3960+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
3961+
__lasx_insert_128_lo_s(__m256 _1, __m128 _2) {
3962+
return (__m256)__builtin_lasx_insert_128_lo_s((v8f32)_1, (v4f32)_2);
3963+
}
3964+
3965+
extern __inline
3966+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
3967+
__lasx_insert_128_lo_d(__m256d _1, __m128d _2) {
3968+
return (__m256d)__builtin_lasx_insert_128_lo_d((v4f64)_1, (v2f64)_2);
3969+
}
3970+
3971+
extern __inline
3972+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
3973+
__lasx_insert_128_lo(__m256i _1, __m128i _2) {
3974+
return (__m256i)__builtin_lasx_insert_128_lo((v32i8)_1, (v16i8)_2);
3975+
}
3976+
3977+
extern __inline
3978+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
3979+
__lasx_insert_128_hi_s(__m256 _1, __m128 _2) {
3980+
return (__m256)__builtin_lasx_insert_128_hi_s((v8f32)_1, (v4f32)_2);
3981+
}
3982+
3983+
extern __inline
3984+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
3985+
__lasx_insert_128_hi_d(__m256d _1, __m128d _2) {
3986+
return (__m256d)__builtin_lasx_insert_128_hi_d((v4f64)_1, (v2f64)_2);
3987+
}
3988+
3989+
extern __inline
3990+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
3991+
__lasx_insert_128_hi(__m256i _1, __m128i _2) {
3992+
return (__m256i)__builtin_lasx_insert_128_hi((v32i8)_1, (v16i8)_2);
3993+
}
3994+
38853995
#endif /* defined(__loongarch_asx). */
38863996
#endif /* _LOONGSON_ASXINTRIN_H. */

clang/test/CodeGen/LoongArch/lasx/builtin-alias.c

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6384,3 +6384,156 @@ v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); }
63846384
// CHECK-NEXT: ret void
63856385
//
63866386
v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); }
6387+
// CHECK-LABEL: @cast_128_s(
6388+
// CHECK-NEXT: entry:
6389+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6390+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> [[TMP0]])
6391+
// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6392+
// CHECK-NEXT: ret void
6393+
//
6394+
v8f32 cast_128_s(v4f32 _1) { return __lasx_cast_128_s(_1); }
6395+
// CHECK-LABEL: @cast_128_d(
6396+
// CHECK-NEXT: entry:
6397+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6398+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> [[TMP0]])
6399+
// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6400+
// CHECK-NEXT: ret void
6401+
//
6402+
v4f64 cast_128_d(v2f64 _1) { return __lasx_cast_128_d(_1); }
6403+
// CHECK-LABEL: @cast_128(
6404+
// CHECK-NEXT: entry:
6405+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6406+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.cast.128(<16 x i8> [[TMP0]])
6407+
// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6408+
// CHECK-NEXT: ret void
6409+
//
6410+
v32i8 cast_128(v16i8 _1) { return __lasx_cast_128(_1); }
6411+
// CHECK-LABEL: @concat_128_s(
6412+
// CHECK-NEXT: entry:
6413+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6414+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6415+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6416+
// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6417+
// CHECK-NEXT: ret void
6418+
//
6419+
v8f32 concat_128_s(v4f32 _1, v4f32 _2) { return __lasx_concat_128_s(_1, _2); }
6420+
// CHECK-LABEL: @concat_128_d(
6421+
// CHECK-NEXT: entry:
6422+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6423+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6424+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6425+
// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6426+
// CHECK-NEXT: ret void
6427+
//
6428+
v4f64 concat_128_d(v2f64 _1, v2f64 _2) { return __lasx_concat_128_d(_1, _2); }
6429+
// CHECK-LABEL: @concat_128(
6430+
// CHECK-NEXT: entry:
6431+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6432+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6433+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.concat.128(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
6434+
// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6435+
// CHECK-NEXT: ret void
6436+
//
6437+
v32i8 concat_128(v16i8 _1, v16i8 _2) { return __lasx_concat_128(_1, _2); }
6438+
// CHECK-LABEL: @extract_128_lo_s(
6439+
// CHECK-NEXT: entry:
6440+
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6441+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> [[_1]])
6442+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
6443+
// CHECK-NEXT: ret i128 [[TMP2]]
6444+
//
6445+
v4f32 extract_128_lo_s(v8f32 _1) { return __lasx_extract_128_lo_s(_1); }
6446+
// CHECK-LABEL: @extract_128_lo_d(
6447+
// CHECK-NEXT: entry:
6448+
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6449+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> [[_1]])
6450+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
6451+
// CHECK-NEXT: ret i128 [[TMP2]]
6452+
//
6453+
v2f64 extract_128_lo_d(v4f64 _1) { return __lasx_extract_128_lo_d(_1); }
6454+
// CHECK-LABEL: @extract_128_lo(
6455+
// CHECK-NEXT: entry:
6456+
// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6457+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lasx.extract.128.lo(<32 x i8> [[_112]])
6458+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
6459+
// CHECK-NEXT: ret i128 [[TMP2]]
6460+
//
6461+
v16i8 extract_128_lo(v32i8 _1) { return __lasx_extract_128_lo(_1); }
6462+
// CHECK-LABEL: @extract_128_hi_s(
6463+
// CHECK-NEXT: entry:
6464+
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6465+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> [[_1]])
6466+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
6467+
// CHECK-NEXT: ret i128 [[TMP2]]
6468+
//
6469+
v4f32 extract_128_hi_s(v8f32 _1) { return __lasx_extract_128_hi_s(_1); }
6470+
// CHECK-LABEL: @extract_128_hi_d(
6471+
// CHECK-NEXT: entry:
6472+
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6473+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> [[_1]])
6474+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
6475+
// CHECK-NEXT: ret i128 [[TMP2]]
6476+
//
6477+
v2f64 extract_128_hi_d(v4f64 _1) { return __lasx_extract_128_hi_d(_1); }
6478+
// CHECK-LABEL: @extract_128_hi(
6479+
// CHECK-NEXT: entry:
6480+
// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6481+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lasx.extract.128.hi(<32 x i8> [[_112]])
6482+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
6483+
// CHECK-NEXT: ret i128 [[TMP2]]
6484+
//
6485+
v16i8 extract_128_hi(v32i8 _1) { return __lasx_extract_128_hi(_1); }
6486+
// CHECK-LABEL: @insert_128_lo_s(
6487+
// CHECK-NEXT: entry:
6488+
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6489+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6490+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> [[_1]], <4 x float> [[TMP1]])
6491+
// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6492+
// CHECK-NEXT: ret void
6493+
//
6494+
v8f32 insert_128_lo_s(v8f32 _1, v4f32 _2) { return __lasx_insert_128_lo_s(_1, _2); }
6495+
// CHECK-LABEL: @insert_128_lo_d(
6496+
// CHECK-NEXT: entry:
6497+
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6498+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6499+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> [[_1]], <2 x double> [[TMP1]])
6500+
// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6501+
// CHECK-NEXT: ret void
6502+
//
6503+
v4f64 insert_128_lo_d(v4f64 _1, v2f64 _2) { return __lasx_insert_128_lo_d(_1, _2); }
6504+
// CHECK-LABEL: @insert_128_lo(
6505+
// CHECK-NEXT: entry:
6506+
// CHECK-NEXT: [[_123:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6507+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6508+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.insert.128.lo(<32 x i8> [[_123]], <16 x i8> [[TMP1]])
6509+
// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6510+
// CHECK-NEXT: ret void
6511+
//
6512+
v32i8 insert_128_lo(v32i8 _1, v16i8 _2) { return __lasx_insert_128_lo(_1, _2); }
6513+
// CHECK-LABEL: @insert_128_hi_s(
6514+
// CHECK-NEXT: entry:
6515+
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6516+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6517+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> [[_1]], <4 x float> [[TMP1]])
6518+
// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6519+
// CHECK-NEXT: ret void
6520+
//
6521+
v8f32 insert_128_hi_s(v8f32 _1, v4f32 _2) { return __lasx_insert_128_hi_s(_1, _2); }
6522+
// CHECK-LABEL: @insert_128_hi_d(
6523+
// CHECK-NEXT: entry:
6524+
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6525+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6526+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> [[_1]], <2 x double> [[TMP1]])
6527+
// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6528+
// CHECK-NEXT: ret void
6529+
//
6530+
v4f64 insert_128_hi_d(v4f64 _1, v2f64 _2) { return __lasx_insert_128_hi_d(_1, _2); }
6531+
// CHECK-LABEL: @insert_128_hi(
6532+
// CHECK-NEXT: entry:
6533+
// CHECK-NEXT: [[_123:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6534+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6535+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.insert.128.hi(<32 x i8> [[_123]], <16 x i8> [[TMP1]])
6536+
// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6537+
// CHECK-NEXT: ret void
6538+
//
6539+
v32i8 insert_128_hi(v32i8 _1, v16i8 _2) { return __lasx_insert_128_hi(_1, _2); }

0 commit comments

Comments
 (0)