[clang][LoongArch] Introduce LASX and LSX conversion intrinsics #157819

heiher · 2025-09-10T09:20:53Z

This patch introduces the LASX and LSX conversion intrinsics:

__m256 __lasx_cast_128_s (__m128)
__m256d __lasx_cast_128_d (__m128d)
__m256i __lasx_cast_128 (__m128i)
__m256 __lasx_concat_128_s (__m128, __m128)
__m256d __lasx_concat_128_d (__m128, __m128d)
__m256i __lasx_concat_128 (__m128, __m128i)
__m128 __lasx_extract_128_lo_s (__m256)
__m128d __lasx_extract_128_lo_d (__m256d)
__m128i __lasx_extract_128_lo (__m256i)
__m128 __lasx_extract_128_hi_s (__m256)
__m128d __lasx_extract_128_hi_d (__m256d)
__m128i __lasx_extract_128_hi (__m256i)
__m256 __lasx_insert_128_lo_s (__m256, __m128)
__m256d __lasx_insert_128_lo_d (__m256d, __m128d)
__m256i __lasx_insert_128_lo (__m256i, __m128i)
__m256 __lasx_insert_128_hi_s (__m256, __m128)
__m256d __lasx_insert_128_hi_d (__m256d, __m128d)
__m256i __lasx_insert_128_hi (__m256i, __m128i)

Relevant GCC patch:
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c2013267642fea4a6e89b826940c8aa80a76089d

llvmbot · 2025-09-10T09:21:27Z

@llvm/pr-subscribers-backend-loongarch
@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: hev (heiher)

Changes

This patch introduces the LASX and LSX conversion intrinsics:

__m256 __lasx_cast_128_s (__m128)
__m256d __lasx_cast_128_d (__m128d)
__m256i __lasx_cast_128 (__m128i)
__m256 __lasx_concat_128_s (__m128, __m128)
__m256d __lasx_concat_128_d (__m128, __m128d)
__m256i __lasx_concat_128 (__m128, __m128i)
__m128 __lasx_extract_128_lo_s (__m256)
__m128d __lasx_extract_128_lo_d (__m256d)
__m128i __lasx_extract_128_lo (__m256i)
__m128 __lasx_extract_128_hi_s (__m256)
__m128d __lasx_extract_128_hi_d (__m256d)
__m128i __lasx_extract_128_hi (__m256i)
__m256 __lasx_insert_128_lo_s (__m256, __m128)
__m256d __lasx_insert_128_lo_d (__m256d, __m128d)
__m256i __lasx_insert_128_lo (__m256i, __m128i)
__m256 __lasx_insert_128_hi_s (__m256, __m128)
__m256d __lasx_insert_128_hi_d (__m256d, __m128d)
__m256i __lasx_insert_128_hi (__m256i, __m128i)

Patch is 25.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157819.diff

4 Files Affected:

(modified) clang/include/clang/Basic/BuiltinsLoongArchLASX.def (+19)
(modified) clang/lib/Headers/lasxintrin.h (+110)
(modified) clang/test/CodeGen/LoongArch/lasx/builtin-alias.c (+153)
(modified) clang/test/CodeGen/LoongArch/lasx/builtin.c (+157)

diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def index c4ea46a3bc5b5..b234dedad648e 100644 --- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def +++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def @@ -986,3 +986,22 @@ TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_cast_128_s, "V8fV4f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_cast_128_d, "V4dV2d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_cast_128, "V32ScV16Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_concat_128_s, "V8fV4fV4f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_concat_128_d, "V4dV2dV2d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_concat_128, "V32ScV16ScV16Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_extract_128_lo_s, "V4fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_extract_128_lo_d, "V2dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_extract_128_lo, "V16ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_extract_128_hi_s, "V4fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_extract_128_hi_d, "V2dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_extract_128_hi, "V16ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_insert_128_lo_s, "V8fV8fV4f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_insert_128_lo_d, "V4dV4dV2d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_insert_128_lo, "V32ScV32ScV16Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_insert_128_hi_s, "V8fV8fV4f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_insert_128_hi_d, "V4dV4dV2d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_insert_128_hi, "V32ScV32ScV16Sc", "nc", "lasx") diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h index 85020d82829e2..6dd8ac24ed46d 100644 --- a/clang/lib/Headers/lasxintrin.h +++ b/clang/lib/Headers/lasxintrin.h @@ -10,6 +10,8 @@ #ifndef _LOONGSON_ASXINTRIN_H #define _LOONGSON_ASXINTRIN_H 1 +#include <lsxintrin.h> + #if defined(__loongarch_asx) typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); @@ -3882,5 +3884,113 @@ extern __inline #define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_cast_128_s(__m128 _1) { + return (__m256)__builtin_lasx_cast_128_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_cast_128_d(__m128d _1) { + return (__m256d)__builtin_lasx_cast_128_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_cast_128(__m128i _1) { + return (__m256i)__builtin_lasx_cast_128((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_concat_128_s(__m128 _1, __m128 _2) { + return (__m256)__builtin_lasx_concat_128_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_concat_128_d(__m128d _1, __m128d _2) { + return (__m256d)__builtin_lasx_concat_128_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_concat_128(__m128i _1, __m128i _2) { + return (__m256i)__builtin_lasx_concat_128((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lasx_extract_128_lo_s(__m256 _1) { + return (__m128)__builtin_lasx_extract_128_lo_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lasx_extract_128_lo_d(__m256d _1) { + return (__m128d)__builtin_lasx_extract_128_lo_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lasx_extract_128_lo(__m256i _1) { + return (__m128i)__builtin_lasx_extract_128_lo((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lasx_extract_128_hi_s(__m256 _1) { + return (__m128)__builtin_lasx_extract_128_hi_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lasx_extract_128_hi_d(__m256d _1) { + return (__m128d)__builtin_lasx_extract_128_hi_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lasx_extract_128_hi(__m256i _1) { + return (__m128i)__builtin_lasx_extract_128_hi((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_insert_128_lo_s(__m256 _1, __m128 _2) { + return (__m256)__builtin_lasx_insert_128_lo_s((v8f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_insert_128_lo_d(__m256d _1, __m128d _2) { + return (__m256d)__builtin_lasx_insert_128_lo_d((v4f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_insert_128_lo(__m256i _1, __m128i _2) { + return (__m256i)__builtin_lasx_insert_128_lo((v32i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_insert_128_hi_s(__m256 _1, __m128 _2) { + return (__m256)__builtin_lasx_insert_128_hi_s((v8f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_insert_128_hi_d(__m256d _1, __m128d _2) { + return (__m256d)__builtin_lasx_insert_128_hi_d((v4f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_insert_128_hi(__m256i _1, __m128i _2) { + return (__m256i)__builtin_lasx_insert_128_hi((v32i8)_1, (v16i8)_2); +} + #endif /* defined(__loongarch_asx). */ #endif /* _LOONGSON_ASXINTRIN_H. */ diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c index 9a8ce224bcfd0..6ac9367bf1cf3 100644 --- a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c @@ -6384,3 +6384,156 @@ v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } // CHECK-NEXT: ret void // v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } +// CHECK-LABEL: @cast_128_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 cast_128_s(v4f32 _1) { return __builtin_lasx_cast_128_s(_1); } +// CHECK-LABEL: @cast_128_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 cast_128_d(v2f64 _1) { return __builtin_lasx_cast_128_d(_1); } +// CHECK-LABEL: @cast_128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.cast.128(<16 x i8> [[TMP0]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 cast_128(v16i8 _1) { return __builtin_lasx_cast_128(_1); } +// CHECK-LABEL: @concat_128_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 concat_128_s(v4f32 _1, v4f32 _2) { return __builtin_lasx_concat_128_s(_1, _2); } +// CHECK-LABEL: @concat_128_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 concat_128_d(v2f64 _1, v2f64 _2) { return __builtin_lasx_concat_128_d(_1, _2); } +// CHECK-LABEL: @concat_128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.concat.128(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 concat_128(v16i8 _1, v16i8 _2) { return __builtin_lasx_concat_128(_1, _2); } +// CHECK-LABEL: @extract_128_lo_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> [[_1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 extract_128_lo_s(v8f32 _1) { return __builtin_lasx_extract_128_lo_s(_1); } +// CHECK-LABEL: @extract_128_lo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> [[_1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 extract_128_lo_d(v4f64 _1) { return __builtin_lasx_extract_128_lo_d(_1); } +// CHECK-LABEL: @extract_128_lo( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lasx.extract.128.lo(<32 x i8> [[_1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 extract_128_lo(v32i8 _1) { return __builtin_lasx_extract_128_lo(_1); } +// CHECK-LABEL: @extract_128_hi_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> [[_1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 extract_128_hi_s(v8f32 _1) { return __builtin_lasx_extract_128_hi_s(_1); } +// CHECK-LABEL: @extract_128_hi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> [[_1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 extract_128_hi_d(v4f64 _1) { return __builtin_lasx_extract_128_hi_d(_1); } +// CHECK-LABEL: @extract_128_hi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lasx.extract.128.hi(<32 x i8> [[_1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 extract_128_hi(v32i8 _1) { return __builtin_lasx_extract_128_hi(_1); } +// CHECK-LABEL: @insert_128_lo_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> [[_1]], <4 x float> [[TMP1]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 insert_128_lo_s(v8f32 _1, v4f32 _2) { return __builtin_lasx_insert_128_lo_s(_1, _2); } +// CHECK-LABEL: @insert_128_lo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> [[_1]], <2 x double> [[TMP1]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 insert_128_lo_d(v4f64 _1, v2f64 _2) { return __builtin_lasx_insert_128_lo_d(_1, _2); } +// CHECK-LABEL: @insert_128_lo( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.insert.128.lo(<32 x i8> [[_1]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 insert_128_lo(v32i8 _1, v16i8 _2) { return __builtin_lasx_insert_128_lo(_1, _2); } +// CHECK-LABEL: @insert_128_hi_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> [[_1]], <4 x float> [[TMP1]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 insert_128_hi_s(v8f32 _1, v4f32 _2) { return __builtin_lasx_insert_128_hi_s(_1, _2); } +// CHECK-LABEL: @insert_128_hi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> [[_1]], <2 x double> [[TMP1]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 insert_128_hi_d(v4f64 _1, v2f64 _2) { return __builtin_lasx_insert_128_hi_d(_1, _2); } +// CHECK-LABEL: @insert_128_hi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.insert.128.hi(<32 x i8> [[_1]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 insert_128_hi(v32i8 _1, v16i8 _2) { return __builtin_lasx_insert_128_hi(_1, _2); } diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c index f52a23a5faea7..d0dde9f8d3d8b 100644 --- a/clang/test/CodeGen/LoongArch/lasx/builtin.c +++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c @@ -1,6 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +typedef float v4f32 __attribute__((vector_size(16), aligned(16))); +typedef double v2f64 __attribute__((vector_size(16), aligned(16))); + typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); @@ -6406,3 +6410,156 @@ v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } // CHECK-NEXT: ret void // v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } +// CHECK-LABEL: @cast_128_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 cast_128_s(v4f32 _1) { return __builtin_lasx_cast_128_s(_1); } +// CHECK-LABEL: @cast_128_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 cast_128_d(v2f64 _1) { return __builtin_lasx_cast_128_d(_1); } +// CHECK-LABEL: @cast_128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.cast.128(<16 x i8> [[TMP0]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 cast_128(v16i8 _1) { return __builtin_lasx_cast_128(_1); } +// CHECK-LABEL: @concat_128_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 concat_128_s(v4f32 _1, v4f32 _2) { return __builtin_lasx_concat_128_s(_1, _2); } +// CHECK-LABEL: @concat_128_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 concat_128_d(v2f64 _1, v2f64 _2) { return __builtin_lasx_concat_128_d(_1, _2); } +// CHECK-LABEL: @concat_128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.concat.128(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 concat_128(v16i8 _1, v16i8 _2) { return __builtin_lasx_concat_128(_1, _2); } +// CHECK-LABEL: @extract_128_lo_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> [[_1]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 extract_128_lo_s... [truncated]

github-actions · 2025-09-10T09:23:15Z

✅ With the latest revision this PR passed the C/C++ code formatter.

This patch introduces the LASX and LSX conversion intrinsics: - __m256 __lasx_cast_128_s (__m128) - __m256d __lasx_cast_128_d (__m128d) - __m256i __lasx_cast_128 (__m128i) - __m256 __lasx_concat_128_s (__m128, __m128) - __m256d __lasx_concat_128_d (__m128, __m128d) - __m256i __lasx_concat_128 (__m128, __m128i) - __m128 __lasx_extract_128_lo_s (__m256) - __m128d __lasx_extract_128_lo_d (__m256d) - __m128i __lasx_extract_128_lo (__m256i) - __m128 __lasx_extract_128_hi_s (__m256) - __m128d __lasx_extract_128_hi_d (__m256d) - __m128i __lasx_extract_128_hi (__m256i) - __m256 __lasx_insert_128_lo_s (__m256, __m128) - __m256d __lasx_insert_128_lo_d (__m256d, __m128d) - __m256i __lasx_insert_128_lo (__m256i, __m128i) - __m256 __lasx_insert_128_hi_s (__m256, __m128) - __m256d __lasx_insert_128_hi_d (__m256d, __m128d) - __m256i __lasx_insert_128_hi (__m256i, __m128i)

…#157819) This patch introduces the LASX and LSX conversion intrinsics: - __m256 __lasx_cast_128_s (__m128) - __m256d __lasx_cast_128_d (__m128d) - __m256i __lasx_cast_128 (__m128i) - __m256 __lasx_concat_128_s (__m128, __m128) - __m256d __lasx_concat_128_d (__m128, __m128d) - __m256i __lasx_concat_128 (__m128, __m128i) - __m128 __lasx_extract_128_lo_s (__m256) - __m128d __lasx_extract_128_lo_d (__m256d) - __m128i __lasx_extract_128_lo (__m256i) - __m128 __lasx_extract_128_hi_s (__m256) - __m128d __lasx_extract_128_hi_d (__m256d) - __m128i __lasx_extract_128_hi (__m256i) - __m256 __lasx_insert_128_lo_s (__m256, __m128) - __m256d __lasx_insert_128_lo_d (__m256d, __m128d) - __m256i __lasx_insert_128_lo (__m256i, __m128i) - __m256 __lasx_insert_128_hi_s (__m256, __m128) - __m256d __lasx_insert_128_hi_d (__m256d, __m128d) - __m256i __lasx_insert_128_hi (__m256i, __m128i) Relevant GCC patch: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c2013267642fea4a6e89b826940c8aa80a76089d

llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics backend:loongarch labels Sep 10, 2025

heiher force-pushed the users/hev/clang-lasx-lsx-conversion branch 3 times, most recently from 1574c17 to 154efa8 Compare September 12, 2025 07:43

heiher force-pushed the users/hev/clang-lasx-lsx-conversion branch 2 times, most recently from 42b3d47 to fc722d5 Compare September 18, 2025 04:59

heiher force-pushed the users/hev/llvm-lasx-lsx-conversion branch from a088c6f to aceb318 Compare September 18, 2025 04:59

heiher force-pushed the users/hev/clang-lasx-lsx-conversion branch from fc722d5 to 59e9242 Compare October 30, 2025 12:20

heiher force-pushed the users/hev/llvm-lasx-lsx-conversion branch from aceb318 to 279874f Compare October 30, 2025 12:20

heiher requested review from SixWeining, tangaac and wangleiat November 4, 2025 06:23

Base automatically changed from users/hev/llvm-lasx-lsx-conversion to main November 5, 2025 12:36

heiher force-pushed the users/hev/clang-lasx-lsx-conversion branch from 59e9242 to e6d8e4b Compare November 5, 2025 12:48

SixWeining approved these changes Nov 24, 2025

View reviewed changes

heiher merged commit 9be30e5 into main Nov 24, 2025
10 checks passed

heiher deleted the users/hev/clang-lasx-lsx-conversion branch November 24, 2025 10:19

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[clang][LoongArch] Introduce LASX and LSX conversion intrinsics #157819

[clang][LoongArch] Introduce LASX and LSX conversion intrinsics #157819

heiher commented Sep 10, 2025 •

edited

Loading

llvmbot commented Sep 10, 2025 •

edited

Loading

github-actions bot commented Sep 10, 2025 •

edited

Loading

Uh oh!

Labels

4 participants

[clang][LoongArch] Introduce LASX and LSX conversion intrinsics #157819

[clang][LoongArch] Introduce LASX and LSX conversion intrinsics #157819

Conversation

heiher commented Sep 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

llvmbot commented Sep 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

github-actions bot commented Sep 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Labels

4 participants

heiher commented Sep 10, 2025 •

edited

Loading

llvmbot commented Sep 10, 2025 •

edited

Loading

github-actions bot commented Sep 10, 2025 •

edited

Loading