Skip to content

Commit fc722d5

Browse files
committed
[clang][LoongArch] Introduce LASX and LSX conversion intrinsics
This patch introduces the LASX and LSX conversion intrinsics: - __m256 __lasx_cast_128_s (__m128) - __m256d __lasx_cast_128_d (__m128d) - __m256i __lasx_cast_128 (__m128i) - __m256 __lasx_concat_128_s (__m128, __m128) - __m256d __lasx_concat_128_d (__m128, __m128d) - __m256i __lasx_concat_128 (__m128, __m128i) - __m128 __lasx_extract_128_lo_s (__m256) - __m128d __lasx_extract_128_lo_d (__m256d) - __m128i __lasx_extract_128_lo (__m256i) - __m128 __lasx_extract_128_hi_s (__m256) - __m128d __lasx_extract_128_hi_d (__m256d) - __m128i __lasx_extract_128_hi (__m256i) - __m256 __lasx_insert_128_lo_s (__m256, __m128) - __m256d __lasx_insert_128_lo_d (__m256d, __m128d) - __m256i __lasx_insert_128_lo (__m256i, __m128i) - __m256 __lasx_insert_128_hi_s (__m256, __m128) - __m256d __lasx_insert_128_hi_d (__m256d, __m128d) - __m256i __lasx_insert_128_hi (__m256i, __m128i)
1 parent aceb318 commit fc722d5

File tree

6 files changed

+446
-0
lines changed

6 files changed

+446
-0
lines changed

clang/include/clang/Basic/BuiltinsLoongArchLASX.def

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -986,3 +986,22 @@ TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx")
986986
TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx")
987987
TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx")
988988
TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx")
989+
990+
TARGET_BUILTIN(__builtin_lasx_cast_128_s, "V8fV4f", "nc", "lasx")
991+
TARGET_BUILTIN(__builtin_lasx_cast_128_d, "V4dV2d", "nc", "lasx")
992+
TARGET_BUILTIN(__builtin_lasx_cast_128, "V4LLiV2LLi", "nc", "lasx")
993+
TARGET_BUILTIN(__builtin_lasx_concat_128_s, "V8fV4fV4f", "nc", "lasx")
994+
TARGET_BUILTIN(__builtin_lasx_concat_128_d, "V4dV2dV2d", "nc", "lasx")
995+
TARGET_BUILTIN(__builtin_lasx_concat_128, "V4LLiV2LLiV2LLi", "nc", "lasx")
996+
TARGET_BUILTIN(__builtin_lasx_extract_128_lo_s, "V4fV8f", "nc", "lasx")
997+
TARGET_BUILTIN(__builtin_lasx_extract_128_lo_d, "V2dV4d", "nc", "lasx")
998+
TARGET_BUILTIN(__builtin_lasx_extract_128_lo, "V2LLiV4LLi", "nc", "lasx")
999+
TARGET_BUILTIN(__builtin_lasx_extract_128_hi_s, "V4fV8f", "nc", "lasx")
1000+
TARGET_BUILTIN(__builtin_lasx_extract_128_hi_d, "V2dV4d", "nc", "lasx")
1001+
TARGET_BUILTIN(__builtin_lasx_extract_128_hi, "V2LLiV4LLi", "nc", "lasx")
1002+
TARGET_BUILTIN(__builtin_lasx_insert_128_lo_s, "V8fV8fV4f", "nc", "lasx")
1003+
TARGET_BUILTIN(__builtin_lasx_insert_128_lo_d, "V4dV4dV2d", "nc", "lasx")
1004+
TARGET_BUILTIN(__builtin_lasx_insert_128_lo, "V4LLiV4LLiV2LLi", "nc", "lasx")
1005+
TARGET_BUILTIN(__builtin_lasx_insert_128_hi_s, "V8fV8fV4f", "nc", "lasx")
1006+
TARGET_BUILTIN(__builtin_lasx_insert_128_hi_d, "V4dV4dV2d", "nc", "lasx")
1007+
TARGET_BUILTIN(__builtin_lasx_insert_128_hi, "V4LLiV4LLiV2LLi", "nc", "lasx")

clang/lib/Basic/Targets/LoongArch.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
242242
Builder.defineMacro("__loongarch_simd_width", "256");
243243
Builder.defineMacro("__loongarch_sx", Twine(1));
244244
Builder.defineMacro("__loongarch_asx", Twine(1));
245+
Builder.defineMacro("__loongarch_asx_sx_conv", Twine(1));
245246
} else if (HasFeatureLSX) {
246247
Builder.defineMacro("__loongarch_simd_width", "128");
247248
Builder.defineMacro("__loongarch_sx", Twine(1));

clang/lib/Headers/lasxintrin.h

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#ifndef _LOONGSON_ASXINTRIN_H
1111
#define _LOONGSON_ASXINTRIN_H 1
1212

13+
#include <lsxintrin.h>
14+
1315
#if defined(__loongarch_asx)
1416

1517
typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
@@ -3882,5 +3884,116 @@ extern __inline
38823884

38833885
#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1)))
38843886

3887+
#if defined(__loongarch_asx_sx_conv)
3888+
3889+
extern __inline
3890+
__attribute__((__gnu_inline__, __always_inline__,
3891+
__artificial__)) __m256 __lasx_cast_128_s(__m128 _1) {
3892+
return (__m256)__builtin_lasx_cast_128_s((v4f32)_1);
3893+
}
3894+
3895+
extern __inline
3896+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
3897+
__lasx_cast_128_d(__m128d _1) {
3898+
return (__m256d)__builtin_lasx_cast_128_d((v2f64)_1);
3899+
}
3900+
3901+
extern __inline
3902+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
3903+
__lasx_cast_128(__m128i _1) {
3904+
return (__m256i)__builtin_lasx_cast_128((v2i64)_1);
3905+
}
3906+
3907+
extern __inline
3908+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
3909+
__lasx_concat_128_s(__m128 _1, __m128 _2) {
3910+
return (__m256)__builtin_lasx_concat_128_s((v4f32)_1, (v4f32)_2);
3911+
}
3912+
3913+
extern __inline
3914+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
3915+
__lasx_concat_128_d(__m128d _1, __m128d _2) {
3916+
return (__m256d)__builtin_lasx_concat_128_d((v2f64)_1, (v2f64)_2);
3917+
}
3918+
3919+
extern __inline
3920+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
3921+
__lasx_concat_128(__m128i _1, __m128i _2) {
3922+
return (__m256i)__builtin_lasx_concat_128((v2i64)_1, (v2i64)_2);
3923+
}
3924+
3925+
extern __inline
3926+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
3927+
__lasx_extract_128_lo_s(__m256 _1) {
3928+
return (__m128)__builtin_lasx_extract_128_lo_s((v8f32)_1);
3929+
}
3930+
3931+
extern __inline
3932+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
3933+
__lasx_extract_128_lo_d(__m256d _1) {
3934+
return (__m128d)__builtin_lasx_extract_128_lo_d((v4f64)_1);
3935+
}
3936+
3937+
extern __inline
3938+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
3939+
__lasx_extract_128_lo(__m256i _1) {
3940+
return (__m128i)__builtin_lasx_extract_128_lo((v4i64)_1);
3941+
}
3942+
3943+
extern __inline
3944+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
3945+
__lasx_extract_128_hi_s(__m256 _1) {
3946+
return (__m128)__builtin_lasx_extract_128_hi_s((v8f32)_1);
3947+
}
3948+
3949+
extern __inline
3950+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
3951+
__lasx_extract_128_hi_d(__m256d _1) {
3952+
return (__m128d)__builtin_lasx_extract_128_hi_d((v4f64)_1);
3953+
}
3954+
3955+
extern __inline
3956+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
3957+
__lasx_extract_128_hi(__m256i _1) {
3958+
return (__m128i)__builtin_lasx_extract_128_hi((v4i64)_1);
3959+
}
3960+
3961+
extern __inline
3962+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
3963+
__lasx_insert_128_lo_s(__m256 _1, __m128 _2) {
3964+
return (__m256)__builtin_lasx_insert_128_lo_s((v8f32)_1, (v4f32)_2);
3965+
}
3966+
3967+
extern __inline
3968+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
3969+
__lasx_insert_128_lo_d(__m256d _1, __m128d _2) {
3970+
return (__m256d)__builtin_lasx_insert_128_lo_d((v4f64)_1, (v2f64)_2);
3971+
}
3972+
3973+
extern __inline
3974+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
3975+
__lasx_insert_128_lo(__m256i _1, __m128i _2) {
3976+
return (__m256i)__builtin_lasx_insert_128_lo((v4i64)_1, (v2i64)_2);
3977+
}
3978+
3979+
extern __inline
3980+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
3981+
__lasx_insert_128_hi_s(__m256 _1, __m128 _2) {
3982+
return (__m256)__builtin_lasx_insert_128_hi_s((v8f32)_1, (v4f32)_2);
3983+
}
3984+
3985+
extern __inline
3986+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
3987+
__lasx_insert_128_hi_d(__m256d _1, __m128d _2) {
3988+
return (__m256d)__builtin_lasx_insert_128_hi_d((v4f64)_1, (v2f64)_2);
3989+
}
3990+
3991+
extern __inline
3992+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
3993+
__lasx_insert_128_hi(__m256i _1, __m128i _2) {
3994+
return (__m256i)__builtin_lasx_insert_128_hi((v4i64)_1, (v2i64)_2);
3995+
}
3996+
3997+
#endif /* defined(__loongarch_asx_sx_conv). */
38853998
#endif /* defined(__loongarch_asx). */
38863999
#endif /* _LOONGSON_ASXINTRIN_H. */

clang/test/CodeGen/LoongArch/lasx/builtin-alias.c

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6384,3 +6384,156 @@ v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); }
63846384
// CHECK-NEXT: ret void
63856385
//
63866386
v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); }
6387+
// CHECK-LABEL: @cast_128_s(
6388+
// CHECK-NEXT: entry:
6389+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6390+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> [[TMP0]])
6391+
// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6392+
// CHECK-NEXT: ret void
6393+
//
6394+
v8f32 cast_128_s(v4f32 _1) { return __lasx_cast_128_s(_1); }
6395+
// CHECK-LABEL: @cast_128_d(
6396+
// CHECK-NEXT: entry:
6397+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6398+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> [[TMP0]])
6399+
// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6400+
// CHECK-NEXT: ret void
6401+
//
6402+
v4f64 cast_128_d(v2f64 _1) { return __lasx_cast_128_d(_1); }
6403+
// CHECK-LABEL: @cast_128(
6404+
// CHECK-NEXT: entry:
6405+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6406+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> [[TMP0]])
6407+
// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6408+
// CHECK-NEXT: ret void
6409+
//
6410+
v4i64 cast_128(v2i64 _1) { return __lasx_cast_128(_1); }
6411+
// CHECK-LABEL: @concat_128_s(
6412+
// CHECK-NEXT: entry:
6413+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6414+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6415+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6416+
// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6417+
// CHECK-NEXT: ret void
6418+
//
6419+
v8f32 concat_128_s(v4f32 _1, v4f32 _2) { return __lasx_concat_128_s(_1, _2); }
6420+
// CHECK-LABEL: @concat_128_d(
6421+
// CHECK-NEXT: entry:
6422+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6423+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6424+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6425+
// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6426+
// CHECK-NEXT: ret void
6427+
//
6428+
v4f64 concat_128_d(v2f64 _1, v2f64 _2) { return __lasx_concat_128_d(_1, _2); }
6429+
// CHECK-LABEL: @concat_128(
6430+
// CHECK-NEXT: entry:
6431+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6432+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6433+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
6434+
// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6435+
// CHECK-NEXT: ret void
6436+
//
6437+
v4i64 concat_128(v2i64 _1, v2i64 _2) { return __lasx_concat_128(_1, _2); }
6438+
// CHECK-LABEL: @extract_128_lo_s(
6439+
// CHECK-NEXT: entry:
6440+
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6441+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> [[_1]])
6442+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
6443+
// CHECK-NEXT: ret i128 [[TMP2]]
6444+
//
6445+
v4f32 extract_128_lo_s(v8f32 _1) { return __lasx_extract_128_lo_s(_1); }
6446+
// CHECK-LABEL: @extract_128_lo_d(
6447+
// CHECK-NEXT: entry:
6448+
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6449+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> [[_1]])
6450+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
6451+
// CHECK-NEXT: ret i128 [[TMP2]]
6452+
//
6453+
v2f64 extract_128_lo_d(v4f64 _1) { return __lasx_extract_128_lo_d(_1); }
6454+
// CHECK-LABEL: @extract_128_lo(
6455+
// CHECK-NEXT: entry:
6456+
// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6457+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> [[_1]])
6458+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
6459+
// CHECK-NEXT: ret i128 [[TMP2]]
6460+
//
6461+
v2i64 extract_128_lo(v4i64 _1) { return __lasx_extract_128_lo(_1); }
6462+
// CHECK-LABEL: @extract_128_hi_s(
6463+
// CHECK-NEXT: entry:
6464+
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6465+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> [[_1]])
6466+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
6467+
// CHECK-NEXT: ret i128 [[TMP2]]
6468+
//
6469+
v4f32 extract_128_hi_s(v8f32 _1) { return __lasx_extract_128_hi_s(_1); }
6470+
// CHECK-LABEL: @extract_128_hi_d(
6471+
// CHECK-NEXT: entry:
6472+
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6473+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> [[_1]])
6474+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
6475+
// CHECK-NEXT: ret i128 [[TMP2]]
6476+
//
6477+
v2f64 extract_128_hi_d(v4f64 _1) { return __lasx_extract_128_hi_d(_1); }
6478+
// CHECK-LABEL: @extract_128_hi(
6479+
// CHECK-NEXT: entry:
6480+
// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6481+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> [[_1]])
6482+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
6483+
// CHECK-NEXT: ret i128 [[TMP2]]
6484+
//
6485+
v2i64 extract_128_hi(v4i64 _1) { return __lasx_extract_128_hi(_1); }
6486+
// CHECK-LABEL: @insert_128_lo_s(
6487+
// CHECK-NEXT: entry:
6488+
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6489+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6490+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> [[_1]], <4 x float> [[TMP1]])
6491+
// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6492+
// CHECK-NEXT: ret void
6493+
//
6494+
v8f32 insert_128_lo_s(v8f32 _1, v4f32 _2) { return __lasx_insert_128_lo_s(_1, _2); }
6495+
// CHECK-LABEL: @insert_128_lo_d(
6496+
// CHECK-NEXT: entry:
6497+
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6498+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6499+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> [[_1]], <2 x double> [[TMP1]])
6500+
// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6501+
// CHECK-NEXT: ret void
6502+
//
6503+
v4f64 insert_128_lo_d(v4f64 _1, v2f64 _2) { return __lasx_insert_128_lo_d(_1, _2); }
6504+
// CHECK-LABEL: @insert_128_lo(
6505+
// CHECK-NEXT: entry:
6506+
// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6507+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6508+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> [[_1]], <2 x i64> [[TMP1]])
6509+
// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6510+
// CHECK-NEXT: ret void
6511+
//
6512+
v4i64 insert_128_lo(v4i64 _1, v2i64 _2) { return __lasx_insert_128_lo(_1, _2); }
6513+
// CHECK-LABEL: @insert_128_hi_s(
6514+
// CHECK-NEXT: entry:
6515+
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6516+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6517+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> [[_1]], <4 x float> [[TMP1]])
6518+
// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6519+
// CHECK-NEXT: ret void
6520+
//
6521+
v8f32 insert_128_hi_s(v8f32 _1, v4f32 _2) { return __lasx_insert_128_hi_s(_1, _2); }
6522+
// CHECK-LABEL: @insert_128_hi_d(
6523+
// CHECK-NEXT: entry:
6524+
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6525+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6526+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> [[_1]], <2 x double> [[TMP1]])
6527+
// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6528+
// CHECK-NEXT: ret void
6529+
//
6530+
v4f64 insert_128_hi_d(v4f64 _1, v2f64 _2) { return __lasx_insert_128_hi_d(_1, _2); }
6531+
// CHECK-LABEL: @insert_128_hi(
6532+
// CHECK-NEXT: entry:
6533+
// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6534+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6535+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> [[_1]], <2 x i64> [[TMP1]])
6536+
// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6537+
// CHECK-NEXT: ret void
6538+
//
6539+
v4i64 insert_128_hi(v4i64 _1, v2i64 _2) { return __lasx_insert_128_hi(_1, _2); }

0 commit comments

Comments
 (0)