Skip to content
6 changes: 3 additions & 3 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -3310,15 +3310,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">;
}

let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpmultishiftqb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
}

let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpmultishiftqb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
}

let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpmultishiftqb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
}

Expand Down
58 changes: 58 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3531,6 +3531,60 @@ static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
assert(Call->getNumArgs() == 2);

QualType ATy = Call->getArg(0)->getType();
QualType BTy = Call->getArg(1)->getType();
if (!ATy->isVectorType() || !BTy->isVectorType()) {
return false;
}

const Pointer &BPtr = S.Stk.pop<Pointer>();
const Pointer &APtr = S.Stk.pop<Pointer>();
const auto *AVecT = ATy->castAs<VectorType>();
assert(AVecT->getNumElements() ==
BTy->castAs<VectorType>()->getNumElements());

PrimType ElemT = *S.getContext().classify(AVecT->getElementType());

unsigned NumBytesInQWord = 8;
unsigned NumBitsInByte = 8;
unsigned NumBytes = AVecT->getNumElements();
unsigned NumQWords = NumBytes / NumBytesInQWord;
const Pointer &Dst = S.Stk.peek<Pointer>();

for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
APInt BQWord(64, 0);
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
INT_TYPE_SWITCH(ElemT, {
uint64_t Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx));
BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
});
}

for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
uint64_t Ctrl = 0;
INT_TYPE_SWITCH(
ElemT, { Ctrl = static_cast<uint64_t>(APtr.elem<T>(Idx)) & 0x3F; });

APInt Byte(8, 0);
for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]);
}
INT_TYPE_SWITCH(ElemT,
{ Dst.elem<T>(Idx) = T::from(Byte.getZExtValue()); });
}
}

Dst.initializeAllElements();

return true;
}

bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
Expand Down Expand Up @@ -4756,6 +4810,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::make_pair(0, static_cast<int>(LaneOffset + Index));
});

case X86::BI__builtin_ia32_vpmultishiftqb128:
case X86::BI__builtin_ia32_vpmultishiftqb256:
case X86::BI__builtin_ia32_vpmultishiftqb512:
return interp__builtin_ia32_multishiftqb(S, OpPC, Call);
case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
Expand Down
39 changes: 39 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13096,6 +13096,45 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}

case X86::BI__builtin_ia32_vpmultishiftqb128:
case X86::BI__builtin_ia32_vpmultishiftqb256:
case X86::BI__builtin_ia32_vpmultishiftqb512: {
assert(E->getNumArgs() == 2);

APValue A, B;
if (!Evaluate(A, Info, E->getArg(0)) || !Evaluate(B, Info, E->getArg(1)))
return false;

assert(A.getVectorLength() == B.getVectorLength());
unsigned NumBytesInQWord = 8;
unsigned NumBitsInByte = 8;
unsigned NumBytes = A.getVectorLength();
unsigned NumQWords = NumBytes / NumBytesInQWord;
SmallVector<APValue, 64> Result;
Result.reserve(NumBytes);

for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
APInt BQWord(64, 0);
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
uint64_t Byte = B.getVectorElt(Idx).getInt().getZExtValue();
BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
}

for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
uint64_t Ctrl = A.getVectorElt(Idx).getInt().getZExtValue() & 0x3F;

APInt Byte(8, 0);
for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]);
}
Result.push_back(APValue(APSInt(Byte, /*isUnsigned*/ true)));
}
}
return Success(APValue(Result.data(), Result.size()), E);
}

case X86::BI__builtin_ia32_phminposuw128: {
APValue Source;
if (!Evaluate(Source, Info, E->getArg(0)))
Expand Down
38 changes: 17 additions & 21 deletions clang/lib/Headers/avx512vbmiintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,61 +15,57 @@
#define __VBMIINTRIN_H

/* Define the default attributes for the functions in this file. */
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \
__min_vector_width__(512)))

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
__min_vector_width__(512))) constexpr
#else
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \
__min_vector_width__(512)))
#endif

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) {
return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I,
(__v64qi) __B);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I,
__m512i __B) {
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi8(
__m512i __A, __mmask64 __U, __m512i __I, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512(__U,
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
(__v64qi)__A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U,
__m512i __B) {
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi8(
__m512i __A, __m512i __I, __mmask64 __U, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512(__U,
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
(__v64qi)__I);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I,
__m512i __B) {
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi8(
__mmask64 __U, __m512i __A, __m512i __I, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512(__U,
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
(__v64qi)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_permutexvar_epi8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A,
__m512i __B) {
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi8(
__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_permutexvar_epi8(__A, __B),
(__v64qi)__W);
Expand Down Expand Up @@ -97,6 +93,6 @@ _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y)
(__v64qi)_mm512_multishift_epi64_epi8(__X, __Y),
(__v64qi)_mm512_setzero_si512());
}
#undef __DEFAULT_FN_ATTRS_CONSTEXPR

#undef __DEFAULT_FN_ATTRS
#endif
72 changes: 32 additions & 40 deletions clang/lib/Headers/avx512vbmivlintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@
#define __VBMIVLINTRIN_H

/* Define the default attributes for the functions in this file. */
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,avx512vl"), \
__min_vector_width__(128))) constexpr
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,avx512vl"), \
__min_vector_width__(256))) constexpr
#else
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,avx512vl"), \
Expand All @@ -23,111 +33,96 @@
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,avx512vl"), \
__min_vector_width__(256)))

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
#else
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
#endif

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
(__v16qi)__I,
(__v16qi)__B);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
__m128i __B) {
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi8(
__m128i __A, __mmask16 __U, __m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128(__U,
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
(__v16qi)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
__m128i __B) {
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi8(
__m128i __A, __m128i __I, __mmask16 __U, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128(__U,
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
(__v16qi)__I);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
__m128i __B) {
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi8(
__mmask16 __U, __m128i __A, __m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128(__U,
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
(__v16qi)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) {
return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
(__v32qi)__B);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
__m256i __B) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi8(
__m256i __A, __mmask32 __U, __m256i __I, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256(__U,
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
(__v32qi)__A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
__m256i __B) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi8(
__m256i __A, __m256i __I, __mmask32 __U, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256(__U,
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
(__v32qi)__I);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
__m256i __B) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi8(
__mmask32 __U, __m256i __A, __m256i __I, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256(__U,
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
(__v32qi)_mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_permutexvar_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
(__v16qi)_mm_permutexvar_epi8(__A, __B),
(__v16qi)_mm_setzero_si128());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M, __m128i __A,
__m128i __B) {
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi8(
__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
(__v16qi)_mm_permutexvar_epi8(__A, __B),
(__v16qi)__W);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
(__v32qi)_mm256_permutexvar_epi8(__A, __B),
(__v32qi)_mm256_setzero_si256());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M, __m256i __A,
__m256i __B) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi8(
__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
(__v32qi)_mm256_permutexvar_epi8(__A, __B),
(__v32qi)__W);
Expand Down Expand Up @@ -179,9 +174,6 @@ _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
(__v32qi)_mm256_setzero_si256());
}

#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256

#endif
Loading
Loading