- Notifications
You must be signed in to change notification settings - Fork 15.3k
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX512 VPMULTISHIFTQB intrinsics to be used in constexpr #168995
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
🐧 Linux x64 Test Results
|
2808e74 to 4a5b92c Compare | @llvm/pr-subscribers-backend-x86 Author: NagaChaitanya Vellanki (chaitanyav) ChangesResolves: #167477 Patch is 32.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168995.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b760c3e06b8f7..93fb511a508f3 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3358,15 +3358,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">; } -let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def vpmultishiftqb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">; } -let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpmultishiftqb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; } -let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vpmultishiftqb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index eba71d66bc4d6..5be31239fd597 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3468,6 +3468,65 @@ static bool interp__builtin_ia32_shuffle_generic( return true; } +static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 2); + + QualType ATy = Call->getArg(0)->getType(); + QualType BTy = Call->getArg(1)->getType(); + if (!ATy->isVectorType() || !BTy->isVectorType()) { + return false; + } + + const Pointer &BPtr = S.Stk.pop<Pointer>(); + const Pointer &APtr = S.Stk.pop<Pointer>(); + const auto *AVecT = ATy->castAs<VectorType>(); + const auto *BVecT = BTy->castAs<VectorType>(); + assert(AVecT->getNumElements() == BVecT->getNumElements()); + + PrimType ElemT = *S.getContext().classify(AVecT->getElementType()); + + unsigned NumBytesInQWord = 8; + unsigned NumBitsInByte = 8; + unsigned NumBytes = AVecT->getNumElements(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + const Pointer &Dst = S.Stk.peek<Pointer>(); + + for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { + APInt AQWord(64, 0); + APInt BQWord(64, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordId * NumBytesInQWord + ByteIdx; + uint64_t Byte = 0; + INT_TYPE_SWITCH(ElemT, { + Byte = static_cast<uint64_t>(APtr.elem<T>(Idx)); + AQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + + Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx)); + BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + }); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + uint64_t Ctrl = + AQWord.extractBits(8, ByteIdx * NumBitsInByte).getZExtValue() & 0x3F; + + APInt Byte(8, 0); + for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) { + Byte.insertBits(BQWord.extractBits(1, (Ctrl + BitIdx) & 0x3F), BitIdx); + } + INT_TYPE_SWITCH(ElemT, { + Dst.elem<T>(QWordId * NumBytesInQWord + ByteIdx) = + T::from(Byte.getZExtValue()); + }); + } + } + + Dst.initializeAllElements(); + + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -4669,6 +4728,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::make_pair(0, static_cast<int>(LaneOffset + Index)); }); + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: + return interp__builtin_ia32_multishiftqb(S, OpPC, Call); case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: case X86::BI__builtin_ia32_kandsi: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index ce5301f17b3e7..21a664fefde49 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13062,6 +13062,51 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: { + assert(E->getNumArgs() == 2); + + APValue A, B; + if (!Evaluate(A, Info, E->getArg(0)) || !Evaluate(B, Info, E->getArg(1))) + return false; + + assert(A.getVectorLength() == B.getVectorLength()); + unsigned NumBytesInQWord = 8; + unsigned NumBitsInByte = 8; + unsigned NumBytes = A.getVectorLength(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + SmallVector<APValue, 64> Result; + Result.reserve(NumBytes); + + for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { + APInt AQWord(64, 0); + APInt BQWord(64, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordId * NumBytesInQWord + ByteIdx; + uint64_t Byte = A.getVectorElt(Idx).getInt().getZExtValue(); + AQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + + Byte = B.getVectorElt(Idx).getInt().getZExtValue(); + BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + uint64_t Ctrl = + AQWord.extractBits(8, ByteIdx * NumBitsInByte).getZExtValue() & + 0x3F; + + APInt Byte(8, 0); + for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) { + Byte.insertBits(BQWord.extractBits(1, (Ctrl + BitIdx) & 0x3F), + BitIdx); + } + Result.push_back(APValue(APSInt(Byte, /*isUnsigned*/ true))); + } + } + return Success(APValue(Result.data(), Result.size()), E); + } + case X86::BI__builtin_ia32_phminposuw128: { APValue Source; if (!Evaluate(Source, Info, E->getArg(0))) diff --git a/clang/lib/Headers/avx512vbmiintrin.h b/clang/lib/Headers/avx512vbmiintrin.h index 84fda5c5849e8..5ac78f0849c26 100644 --- a/clang/lib/Headers/avx512vbmiintrin.h +++ b/clang/lib/Headers/avx512vbmiintrin.h @@ -15,61 +15,57 @@ #define __VBMIINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \ - __min_vector_width__(512))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr + __min_vector_width__(512))) constexpr #else -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \ + __min_vector_width__(512))) #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I, (__v64qi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I, - __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi8( + __m512i __A, __mmask64 __U, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512(__U, (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), (__v64qi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U, - __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi8( + __m512i __A, __m512i __I, __mmask64 __U, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512(__U, (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), (__v64qi)__I); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I, - __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi8( + __mmask64 __U, __m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512(__U, (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutexvar_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_permutexvar_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A, - __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi8( + __m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_permutexvar_epi8(__A, __B), (__v64qi)__W); @@ -97,6 +93,6 @@ _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y) (__v64qi)_mm512_multishift_epi64_epi8(__X, __Y), (__v64qi)_mm512_setzero_si512()); } -#undef __DEFAULT_FN_ATTRS_CONSTEXPR + #undef __DEFAULT_FN_ATTRS #endif diff --git a/clang/lib/Headers/avx512vbmivlintrin.h b/clang/lib/Headers/avx512vbmivlintrin.h index 58a48dadff863..40a67bd63ca49 100644 --- a/clang/lib/Headers/avx512vbmivlintrin.h +++ b/clang/lib/Headers/avx512vbmivlintrin.h @@ -15,6 +15,16 @@ #define __VBMIVLINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vbmi,avx512vl"), \ + __min_vector_width__(128))) constexpr +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vbmi,avx512vl"), \ + __min_vector_width__(256))) constexpr +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vbmi,avx512vl"), \ @@ -23,111 +33,96 @@ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vbmi,avx512vl"), \ __min_vector_width__(256))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr -#else -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 #endif -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A, (__v16qi)__I, (__v16qi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I, - __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi8( + __m128i __A, __mmask16 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128(__U, (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), (__v16qi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U, - __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi8( + __m128i __A, __m128i __I, __mmask16 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128(__U, (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), (__v16qi)__I); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I, - __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi8( + __mmask16 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128(__U, (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I, (__v32qi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I, - __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi8( + __m256i __A, __mmask32 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256(__U, (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), (__v32qi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U, - __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi8( + __m256i __A, __m256i __I, __mmask32 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256(__U, (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), (__v32qi)__I); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, - __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi8( + __mmask32 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256(__U, (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_permutexvar_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M, __m128i __A, - __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi8( + __m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_permutexvar_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi8(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_permutexvar_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M, __m256i __A, - __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi8( + __m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_permutexvar_epi8(__A, __B), (__v32qi)__W); @@ -179,9 +174,6 @@ _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) (__v32qi)_mm256_setzero_si256()); } -#undef __DEFAULT_FN_ATTRS128_CONSTEXPR -#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 - #endif diff --git a/clang/test/CodeGen/X86/avx512vbmi-builtins.c b/clang/test/CodeGen/X86/avx512vbmi-builtins.c index 7d506db92faeb..fcce58b63737b 100644 --- a/clang/test/CodeGen/X86/avx512vbmi-builtins.c +++ b/clang/test/CodeGen/X86/avx512vbmi-builtins.c @@ -211,18 +211,145 @@ __m512i test_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512 // CHECK-LABEL: test_mm512_mask_multishift_epi64_epi8 // CHECK: call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} - return _mm512_mask_multishift_epi64_epi8(__W, __M, __X, __Y); + return _mm512_mask_multishift_epi64_epi8(__W, __M, __X, __Y); } +TEST_CONSTEXPR(match_v64qu( + _mm512_mask_multishift_epi64_epi8( + (__m512i)(__v64qu){ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0... [truncated] |
| @llvm/pr-subscribers-clang Author: NagaChaitanya Vellanki (chaitanyav) ChangesResolves: #167477 Patch is 32.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168995.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b760c3e06b8f7..93fb511a508f3 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3358,15 +3358,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">; } -let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def vpmultishiftqb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">; } -let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpmultishiftqb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; } -let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vpmultishiftqb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index eba71d66bc4d6..5be31239fd597 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3468,6 +3468,65 @@ static bool interp__builtin_ia32_shuffle_generic( return true; } +static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 2); + + QualType ATy = Call->getArg(0)->getType(); + QualType BTy = Call->getArg(1)->getType(); + if (!ATy->isVectorType() || !BTy->isVectorType()) { + return false; + } + + const Pointer &BPtr = S.Stk.pop<Pointer>(); + const Pointer &APtr = S.Stk.pop<Pointer>(); + const auto *AVecT = ATy->castAs<VectorType>(); + const auto *BVecT = BTy->castAs<VectorType>(); + assert(AVecT->getNumElements() == BVecT->getNumElements()); + + PrimType ElemT = *S.getContext().classify(AVecT->getElementType()); + + unsigned NumBytesInQWord = 8; + unsigned NumBitsInByte = 8; + unsigned NumBytes = AVecT->getNumElements(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + const Pointer &Dst = S.Stk.peek<Pointer>(); + + for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { + APInt AQWord(64, 0); + APInt BQWord(64, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordId * NumBytesInQWord + ByteIdx; + uint64_t Byte = 0; + INT_TYPE_SWITCH(ElemT, { + Byte = static_cast<uint64_t>(APtr.elem<T>(Idx)); + AQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + + Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx)); + BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + }); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + uint64_t Ctrl = + AQWord.extractBits(8, ByteIdx * NumBitsInByte).getZExtValue() & 0x3F; + + APInt Byte(8, 0); + for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) { + Byte.insertBits(BQWord.extractBits(1, (Ctrl + BitIdx) & 0x3F), BitIdx); + } + INT_TYPE_SWITCH(ElemT, { + Dst.elem<T>(QWordId * NumBytesInQWord + ByteIdx) = + T::from(Byte.getZExtValue()); + }); + } + } + + Dst.initializeAllElements(); + + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -4669,6 +4728,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::make_pair(0, static_cast<int>(LaneOffset + Index)); }); + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: + return interp__builtin_ia32_multishiftqb(S, OpPC, Call); case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: case X86::BI__builtin_ia32_kandsi: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index ce5301f17b3e7..21a664fefde49 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13062,6 +13062,51 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: { + assert(E->getNumArgs() == 2); + + APValue A, B; + if (!Evaluate(A, Info, E->getArg(0)) || !Evaluate(B, Info, E->getArg(1))) + return false; + + assert(A.getVectorLength() == B.getVectorLength()); + unsigned NumBytesInQWord = 8; + unsigned NumBitsInByte = 8; + unsigned NumBytes = A.getVectorLength(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + SmallVector<APValue, 64> Result; + Result.reserve(NumBytes); + + for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { + APInt AQWord(64, 0); + APInt BQWord(64, 0); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned Idx = QWordId * NumBytesInQWord + ByteIdx; + uint64_t Byte = A.getVectorElt(Idx).getInt().getZExtValue(); + AQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + + Byte = B.getVectorElt(Idx).getInt().getZExtValue(); + BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); + } + + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + uint64_t Ctrl = + AQWord.extractBits(8, ByteIdx * NumBitsInByte).getZExtValue() & + 0x3F; + + APInt Byte(8, 0); + for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) { + Byte.insertBits(BQWord.extractBits(1, (Ctrl + BitIdx) & 0x3F), + BitIdx); + } + Result.push_back(APValue(APSInt(Byte, /*isUnsigned*/ true))); + } + } + return Success(APValue(Result.data(), Result.size()), E); + } + case X86::BI__builtin_ia32_phminposuw128: { APValue Source; if (!Evaluate(Source, Info, E->getArg(0))) diff --git a/clang/lib/Headers/avx512vbmiintrin.h b/clang/lib/Headers/avx512vbmiintrin.h index 84fda5c5849e8..5ac78f0849c26 100644 --- a/clang/lib/Headers/avx512vbmiintrin.h +++ b/clang/lib/Headers/avx512vbmiintrin.h @@ -15,61 +15,57 @@ #define __VBMIINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \ - __min_vector_width__(512))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr + __min_vector_width__(512))) constexpr #else -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \ + __min_vector_width__(512))) #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I, (__v64qi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I, - __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi8( + __m512i __A, __mmask64 __U, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512(__U, (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), (__v64qi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U, - __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi8( + __m512i __A, __m512i __I, __mmask64 __U, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512(__U, (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), (__v64qi)__I); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I, - __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi8( + __mmask64 __U, __m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512(__U, (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutexvar_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_permutexvar_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A, - __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi8( + __m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_permutexvar_epi8(__A, __B), (__v64qi)__W); @@ -97,6 +93,6 @@ _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y) (__v64qi)_mm512_multishift_epi64_epi8(__X, __Y), (__v64qi)_mm512_setzero_si512()); } -#undef __DEFAULT_FN_ATTRS_CONSTEXPR + #undef __DEFAULT_FN_ATTRS #endif diff --git a/clang/lib/Headers/avx512vbmivlintrin.h b/clang/lib/Headers/avx512vbmivlintrin.h index 58a48dadff863..40a67bd63ca49 100644 --- a/clang/lib/Headers/avx512vbmivlintrin.h +++ b/clang/lib/Headers/avx512vbmivlintrin.h @@ -15,6 +15,16 @@ #define __VBMIVLINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vbmi,avx512vl"), \ + __min_vector_width__(128))) constexpr +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vbmi,avx512vl"), \ + __min_vector_width__(256))) constexpr +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vbmi,avx512vl"), \ @@ -23,111 +33,96 @@ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vbmi,avx512vl"), \ __min_vector_width__(256))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr -#else -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 #endif -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A, (__v16qi)__I, (__v16qi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I, - __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi8( + __m128i __A, __mmask16 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128(__U, (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), (__v16qi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U, - __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi8( + __m128i __A, __m128i __I, __mmask16 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128(__U, (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), (__v16qi)__I); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I, - __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi8( + __mmask16 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128(__U, (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I, (__v32qi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I, - __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi8( + __m256i __A, __mmask32 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256(__U, (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), (__v32qi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U, - __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi8( + __m256i __A, __m256i __I, __mmask32 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256(__U, (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), (__v32qi)__I); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, - __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi8( + __mmask32 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256(__U, (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_permutexvar_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M, __m128i __A, - __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi8( + __m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_permutexvar_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi8(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_permutexvar_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M, __m256i __A, - __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi8( + __m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_permutexvar_epi8(__A, __B), (__v32qi)__W); @@ -179,9 +174,6 @@ _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) (__v32qi)_mm256_setzero_si256()); } -#undef __DEFAULT_FN_ATTRS128_CONSTEXPR -#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 - #endif diff --git a/clang/test/CodeGen/X86/avx512vbmi-builtins.c b/clang/test/CodeGen/X86/avx512vbmi-builtins.c index 7d506db92faeb..fcce58b63737b 100644 --- a/clang/test/CodeGen/X86/avx512vbmi-builtins.c +++ b/clang/test/CodeGen/X86/avx512vbmi-builtins.c @@ -211,18 +211,145 @@ __m512i test_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512 // CHECK-LABEL: test_mm512_mask_multishift_epi64_epi8 // CHECK: call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} - return _mm512_mask_multishift_epi64_epi8(__W, __M, __X, __Y); + return _mm512_mask_multishift_epi64_epi8(__W, __M, __X, __Y); } +TEST_CONSTEXPR(match_v64qu( + _mm512_mask_multishift_epi64_epi8( + (__m512i)(__v64qu){ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0... [truncated] |
4831cf9 to 3e26e39 Compare | @RKSimon attaching the fuzz test results file |
| const Pointer &APtr = S.Stk.pop<Pointer>(); | ||
| const auto *AVecT = ATy->castAs<VectorType>(); | ||
| const auto *BVecT = BTy->castAs<VectorType>(); | ||
| assert(AVecT->getNumElements() == BVecT->getNumElements()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Stop unused variable warnings on non-assert builds:
| assert(AVecT->getNumElements() == BVecT->getNumElements()); | |
| assert(AVecT->getNumElements() == BTy->castAs<VectorType>()->getNumElements()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
understood, i will update the assertion.
📦 Installing… ninja: Entering directory `/home/naga/llvm-project/build' [1205/4534] Building CXX object tools/clang/lib/AST/CMakeFiles/obj.clangAST.dir/ByteCode/InterpBuiltin.cpp.o /home/naga/llvm-project/clang/lib/AST/ByteCode/InterpBuiltin.cpp:3547:15: warning: unused variable 'BVecT' [-Wunused-variable] 3547 | const auto *BVecT = BTy->castAs<VectorType>(); | ^~~~~3e26e39 to 439aa6a Compare | | ||
| APInt Byte(8, 0); | ||
| for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) { | ||
| Byte.insertBits(BQWord.extractBits(1, (Ctrl + BitIdx) & 0x3F), BitIdx); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be:
Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]); | | ||
| for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { | ||
| uint64_t Ctrl = | ||
| AQWord.extractBits(8, ByteIdx * NumBitsInByte).getZExtValue() & 0x3F; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we gain anything from creating/extraction from AQWord instead of just doing this directly?
uint64_t Ctrl = static_cast<uint64_t>(APtr.elem<T>(QWordId * NumBytesInQWord + ByteIdx)) & 0x3F; That's closer to what we do for interp__builtin_ia32_shufbitqmb_mask
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
made the changes to both the files
439aa6a to d5d96ec Compare | verified that everything is passing in the fuzz testing also. |
| Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]); | ||
| } | ||
| INT_TYPE_SWITCH(ElemT, { | ||
| Dst.elem<T>(QWordId * NumBytesInQWord + ByteIdx) = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hoist this unsigned Idx = QWordId * NumBytesInQWord + ByteIdx; - we now repeat it.
clang/lib/AST/ExprConstant.cpp Outdated
| } | ||
| | ||
| for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { | ||
| uint64_t Ctrl = A.getVectorElt(QWordId * NumBytesInQWord + ByteIdx) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hoist unsigned Idx = QWordId * NumBytesInQWord + ByteIdx; - it should help with the clang-format line wrapping
… AVX512 VPMULTISHIFTQB intrinsics to be used in constexpr Resolves:llvm#167477
* Unnecessary AQWord extraction when we can directly extract ctrl from A. * Use setBitVal instead of insertBits(extractBits)
* Use tmp variable to hold the inedx of the byte to avoid duplication and help with clang-format
d5d96ec to 2c67a38 Compare
RKSimon left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
Resolves: #167477