Skip to content

Conversation

@chaitanyav
Copy link
Contributor

Resolves: #167476

@chaitanyav chaitanyav marked this pull request as ready for review November 13, 2025 01:57
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang:bytecode Issues for the clang bytecode constexpr interpreter labels Nov 13, 2025
@chaitanyav chaitanyav self-assigned this Nov 13, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 13, 2025

@llvm/pr-subscribers-clang

Author: NagaChaitanya Vellanki (chaitanyav)

Changes

Resolves: #167476


Patch is 55.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167802.diff

14 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsX86.td (+13-10)
  • (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+44)
  • (modified) clang/lib/AST/ExprConstant.cpp (+64)
  • (modified) clang/lib/Headers/avx10_2_512bf16intrin.h (+1-1)
  • (modified) clang/lib/Headers/avx10_2bf16intrin.h (+2-2)
  • (modified) clang/lib/Headers/avx2intrin.h (+4-6)
  • (modified) clang/lib/Headers/avx512bwintrin.h (+7-11)
  • (modified) clang/lib/Headers/avx512fintrin.h (+27-38)
  • (modified) clang/lib/Headers/avx512vlbwintrin.h (+14-21)
  • (modified) clang/lib/Headers/avx512vlintrin.h (+22-32)
  • (modified) clang/test/CodeGen/X86/avx512bw-builtins.c (+11-2)
  • (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+46-10)
  • (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+37)
  • (modified) clang/test/CodeGen/X86/avx512vlbw-builtins.c (+21-4)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index cb08e2107f072..b261b681990e0 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -603,6 +603,11 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">; } +let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; + def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; +} + let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; @@ -617,9 +622,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; - def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; - def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; } @@ -3052,38 +3055,38 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512> def permdi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def permvarhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def permvardf512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">; def permvardi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; def permvarsf512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">; def permvarsi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; } -let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def permvarqi512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">; } -let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def permvarqi128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; } -let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def permvarqi256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def permvarhi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def permvarhi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def permvardf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">; def permvardi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 6c7b2f502cc51..c72a3566681b1 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4414,6 +4414,50 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)}; } }); + case X86::BI__builtin_ia32_permvarsi256: + case X86::BI__builtin_ia32_permvarsf256: + case X86::BI__builtin_ia32_permvardf512: + case X86::BI__builtin_ia32_permvardi512: + case X86::BI__builtin_ia32_permvarhi128: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x7; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_permvarqi128: + case X86::BI__builtin_ia32_permvarhi256: + case X86::BI__builtin_ia32_permvarsi512: + case X86::BI__builtin_ia32_permvarsf512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0xF; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_permvardi256: + case X86::BI__builtin_ia32_permvardf256: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_permvarqi256: + case X86::BI__builtin_ia32_permvarhi512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1F; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_permvarqi512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3F; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); case X86::BI__builtin_ia32_vpermi2varq128: case X86::BI__builtin_ia32_vpermi2varpd128: return interp__builtin_ia32_shuffle_generic( diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 1bfea24b228e8..e9e448143477e 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13551,6 +13551,70 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return false; return Success(R, E); } + case X86::BI__builtin_ia32_permvarsi256: + case X86::BI__builtin_ia32_permvarsf256: + case X86::BI__builtin_ia32_permvardf512: + case X86::BI__builtin_ia32_permvardi512: + case X86::BI__builtin_ia32_permvarhi128: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x7; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvarqi128: + case X86::BI__builtin_ia32_permvarhi256: + case X86::BI__builtin_ia32_permvarsi512: + case X86::BI__builtin_ia32_permvarsf512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0xF; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvardi256: + case X86::BI__builtin_ia32_permvardf256: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvarqi256: + case X86::BI__builtin_ia32_permvarhi512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1F; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvarqi512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3F; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } case X86::BI__builtin_ia32_vpermi2varq128: case X86::BI__builtin_ia32_vpermi2varpd128: { APValue R; diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h index 46ec12a63ef9c..3201307af4731 100644 --- a/clang/lib/Headers/avx10_2_512bf16intrin.h +++ b/clang/lib/Headers/avx10_2_512bf16intrin.h @@ -179,7 +179,7 @@ _mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) { (__v32hi)__B); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_pbh(__m512i __A, __m512bh __B) { return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h index 8fb8cd7cd0865..9f5b726d7b789 100644 --- a/clang/lib/Headers/avx10_2bf16intrin.h +++ b/clang/lib/Headers/avx10_2bf16intrin.h @@ -307,12 +307,12 @@ _mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) { (__v16hi)__B); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutexvar_pbh(__m128i __A, __m128bh __B) { return (__m128bh)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +static __inline__ __m256bh __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutexvar_pbh(__m256i __A, __m256bh __B) { return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A); } diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 3cbaaece7b38e..3e3c13d8bd662 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -3214,9 +3214,8 @@ _mm_broadcastq_epi64(__m128i __X) { /// A 256-bit vector of [8 x i32] containing indexes of values to use from /// \a __a. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); } @@ -3272,9 +3271,8 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] containing indexes of values to use from /// \a __a. /// \returns A 256-bit vector of [8 x float] containing the result. -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_permutevar8x32_ps(__m256 __a, __m256i __b) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_permutevar8x32_ps(__m256 __a, __m256i __b) { return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b); } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 4a02c96620335..3cfa32eb9e727 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1846,25 +1846,21 @@ _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) (__v32hi) _mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_epi16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_epi16(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_permutexvar_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_permutexvar_epi16(__A, __B), (__v32hi)__W); diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 997e9608e112f..79c37173ac838 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -7959,93 +7959,82 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) (__v8di)_mm512_permutex_epi64((X), (C)), \ (__v8di)_mm512_setzero_si512())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_pd (__m512i __X, __m512d __Y) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_pd(__m512i __X, __m512d __Y) { return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, + __m512d __Y) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutexvar_pd(__X, __Y), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutexvar_pd(__X, __Y), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_permutexvar_epi64(__X, __Y), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, - __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, + __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_permutexvar_epi64(__X, __Y), (__v8di)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_ps (__m512i __X, __m512 __Y) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_ps(__m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutexvar_ps(__X, __Y), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutexvar_ps(__X, __Y), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X); } #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_permutexvar_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, - __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, + __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_permutexvar_epi32(__... [truncated] 
@llvmbot
Copy link
Member

llvmbot commented Nov 13, 2025

@llvm/pr-subscribers-backend-x86

Author: NagaChaitanya Vellanki (chaitanyav)

Changes

Resolves: #167476


Patch is 55.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167802.diff

14 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsX86.td (+13-10)
  • (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+44)
  • (modified) clang/lib/AST/ExprConstant.cpp (+64)
  • (modified) clang/lib/Headers/avx10_2_512bf16intrin.h (+1-1)
  • (modified) clang/lib/Headers/avx10_2bf16intrin.h (+2-2)
  • (modified) clang/lib/Headers/avx2intrin.h (+4-6)
  • (modified) clang/lib/Headers/avx512bwintrin.h (+7-11)
  • (modified) clang/lib/Headers/avx512fintrin.h (+27-38)
  • (modified) clang/lib/Headers/avx512vlbwintrin.h (+14-21)
  • (modified) clang/lib/Headers/avx512vlintrin.h (+22-32)
  • (modified) clang/test/CodeGen/X86/avx512bw-builtins.c (+11-2)
  • (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+46-10)
  • (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+37)
  • (modified) clang/test/CodeGen/X86/avx512vlbw-builtins.c (+21-4)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index cb08e2107f072..b261b681990e0 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -603,6 +603,11 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">; } +let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; + def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; +} + let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; @@ -617,9 +622,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; - def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; - def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; } @@ -3052,38 +3055,38 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512> def permdi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def permvarhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def permvardf512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">; def permvardi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; def permvarsf512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">; def permvarsi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; } -let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def permvarqi512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">; } -let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def permvarqi128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; } -let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def permvarqi256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def permvarhi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def permvarhi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def permvardf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">; def permvardi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 6c7b2f502cc51..c72a3566681b1 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4414,6 +4414,50 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)}; } }); + case X86::BI__builtin_ia32_permvarsi256: + case X86::BI__builtin_ia32_permvarsf256: + case X86::BI__builtin_ia32_permvardf512: + case X86::BI__builtin_ia32_permvardi512: + case X86::BI__builtin_ia32_permvarhi128: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x7; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_permvarqi128: + case X86::BI__builtin_ia32_permvarhi256: + case X86::BI__builtin_ia32_permvarsi512: + case X86::BI__builtin_ia32_permvarsf512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0xF; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_permvardi256: + case X86::BI__builtin_ia32_permvardf256: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_permvarqi256: + case X86::BI__builtin_ia32_permvarhi512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1F; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); + case X86::BI__builtin_ia32_permvarqi512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3F; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + }); case X86::BI__builtin_ia32_vpermi2varq128: case X86::BI__builtin_ia32_vpermi2varpd128: return interp__builtin_ia32_shuffle_generic( diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 1bfea24b228e8..e9e448143477e 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13551,6 +13551,70 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return false; return Success(R, E); } + case X86::BI__builtin_ia32_permvarsi256: + case X86::BI__builtin_ia32_permvarsf256: + case X86::BI__builtin_ia32_permvardf512: + case X86::BI__builtin_ia32_permvardi512: + case X86::BI__builtin_ia32_permvarhi128: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x7; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvarqi128: + case X86::BI__builtin_ia32_permvarhi256: + case X86::BI__builtin_ia32_permvarsi512: + case X86::BI__builtin_ia32_permvarsf512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0xF; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvardi256: + case X86::BI__builtin_ia32_permvardf256: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvarqi256: + case X86::BI__builtin_ia32_permvarhi512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x1F; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_permvarqi512: { + APValue R; + if (!evalShuffleGeneric(Info, E, R, + [](unsigned DstIdx, unsigned ShuffleMask) { + int Offset = ShuffleMask & 0x3F; + unsigned SrcIdx = 0; + return std::pair<unsigned, int>{SrcIdx, Offset}; + })) + return false; + return Success(R, E); + } case X86::BI__builtin_ia32_vpermi2varq128: case X86::BI__builtin_ia32_vpermi2varpd128: { APValue R; diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h index 46ec12a63ef9c..3201307af4731 100644 --- a/clang/lib/Headers/avx10_2_512bf16intrin.h +++ b/clang/lib/Headers/avx10_2_512bf16intrin.h @@ -179,7 +179,7 @@ _mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) { (__v32hi)__B); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_pbh(__m512i __A, __m512bh __B) { return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h index 8fb8cd7cd0865..9f5b726d7b789 100644 --- a/clang/lib/Headers/avx10_2bf16intrin.h +++ b/clang/lib/Headers/avx10_2bf16intrin.h @@ -307,12 +307,12 @@ _mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) { (__v16hi)__B); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutexvar_pbh(__m128i __A, __m128bh __B) { return (__m128bh)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +static __inline__ __m256bh __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutexvar_pbh(__m256i __A, __m256bh __B) { return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A); } diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 3cbaaece7b38e..3e3c13d8bd662 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -3214,9 +3214,8 @@ _mm_broadcastq_epi64(__m128i __X) { /// A 256-bit vector of [8 x i32] containing indexes of values to use from /// \a __a. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); } @@ -3272,9 +3271,8 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] containing indexes of values to use from /// \a __a. /// \returns A 256-bit vector of [8 x float] containing the result. -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_permutevar8x32_ps(__m256 __a, __m256i __b) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_permutevar8x32_ps(__m256 __a, __m256i __b) { return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b); } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 4a02c96620335..3cfa32eb9e727 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1846,25 +1846,21 @@ _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) (__v32hi) _mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_epi16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_epi16(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_permutexvar_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_permutexvar_epi16(__A, __B), (__v32hi)__W); diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 997e9608e112f..79c37173ac838 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -7959,93 +7959,82 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) (__v8di)_mm512_permutex_epi64((X), (C)), \ (__v8di)_mm512_setzero_si512())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_pd (__m512i __X, __m512d __Y) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_pd(__m512i __X, __m512d __Y) { return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, + __m512d __Y) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutexvar_pd(__X, __Y), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutexvar_pd(__X, __Y), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_permutexvar_epi64(__X, __Y), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, - __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, + __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_permutexvar_epi64(__X, __Y), (__v8di)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_ps (__m512i __X, __m512 __Y) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_ps(__m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutexvar_ps(__X, __Y), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutexvar_ps(__X, __Y), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X); } #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_permutexvar_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, - __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, + __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_permutexvar_epi32(__... [truncated] 
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry - you missed the avx2 tests

_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
{
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test coverage?

… AVX512 permutexvar intrinsics to be used in constexpr Resolves: llvm#167476
- Group permvarsi256/permvarsf256 with other AVX2 constexpr builtins - Remove unnecessary SrcIdx variable and use zero directly in pair construction
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

@RKSimon RKSimon enabled auto-merge (squash) November 13, 2025 13:38
@RKSimon RKSimon merged commit 91a1bde into llvm:main Nov 13, 2025
9 of 10 checks passed
@chaitanyav chaitanyav deleted the issue_167476 branch November 30, 2025 21:00
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:X86 clang:bytecode Issues for the clang bytecode constexpr interpreter clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category

3 participants