llvm · RKSimon · Oct 13, 2025 · Oct 2, 2025 · Oct 2, 2025 · Oct 3, 2025
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
@@ -323,14 +323,22 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
  def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
  def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
  def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
- def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
- def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
- def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
- def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, "
+ "_Vector<2,double>, _Constant char)">;
  def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
  def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
 }
 
+let Features = "sse4.1",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def ptestz128
+ : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def ptestc128
+ : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def ptestnzc128
+ : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
 let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
  def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
  def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -520,8 +528,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
  def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
 }
 
-
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx",
+  Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
  def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
  def vtestcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
  def vtestnzcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
@@ -530,7 +538,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
  def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
 }
 
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
  def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
  def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
  def vtestnzcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
@@ -540,6 +549,10 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
  def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
  def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
  def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx",
+ Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
  def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
  def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
 }

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2756,6 +2756,45 @@ static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
  return true;
 }
 
+static bool interp__builtin_ia32_test_op(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+
+ assert(LHS.getNumElems() == RHS.getNumElems());
+
+ unsigned SourceLen = LHS.getNumElems();
+ QualType ElemQT = getElemType(LHS);
+ OptPrimType ElemPT = S.getContext().classify(ElemQT);
+ unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
+
+ APInt AWide(LaneWidth * SourceLen, 0);
+ APInt BWide(LaneWidth * SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane;
+ APInt BLane;
+
+ if (ElemQT->isIntegerType()) { // Get value.
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ ALane = LHS.elem<T>(I).toAPSInt();
+ BLane = RHS.elem<T>(I).toAPSInt();
+ });
+ } else if (ElemQT->isFloatingType()) { // Get only sign bit.
+ using T = PrimConv<PT_Float>::T;
+ ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
+ BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
+ } else { // Must be integer or floating type.
+ return false;
+ }
+ AWide.insertBits(ALane, I * LaneWidth);
+ BWide.insertBits(BLane, I * LaneWidth);
+ }
+ pushInteger(S, Fn(AWide, BWide), Call->getType());
+ return true;
+}
+
 static bool interp__builtin_elementwise_triop(
  InterpState &S, CodePtr OpPC, const CallExpr *Call,
  llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3712,7 +3751,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
  S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
  return ((APInt)C).isNegative() ? T : F;
  });
-
+ case X86::BI__builtin_ia32_ptestz128:
+ case X86::BI__builtin_ia32_ptestz256:
+ case X86::BI__builtin_ia32_vtestzps:
+ case X86::BI__builtin_ia32_vtestzps256:
+ case X86::BI__builtin_ia32_vtestzpd:
+ case X86::BI__builtin_ia32_vtestzpd256:
+ return interp__builtin_ia32_test_op(
+ S, OpPC, Call,
+ [](const APInt &A, const APInt &B) { return (A & B) == 0; });
+ case X86::BI__builtin_ia32_ptestc128:
+ case X86::BI__builtin_ia32_ptestc256:
+ case X86::BI__builtin_ia32_vtestcps:
+ case X86::BI__builtin_ia32_vtestcps256:
+ case X86::BI__builtin_ia32_vtestcpd:
+ case X86::BI__builtin_ia32_vtestcpd256:
+ return interp__builtin_ia32_test_op(
+ S, OpPC, Call,
+ [](const APInt &A, const APInt &B) { return (~A & B) == 0; });
+ case X86::BI__builtin_ia32_ptestnzc128:
+ case X86::BI__builtin_ia32_ptestnzc256:
+ case X86::BI__builtin_ia32_vtestnzcps:
+ case X86::BI__builtin_ia32_vtestnzcps256:
+ case X86::BI__builtin_ia32_vtestnzcpd:
+ case X86::BI__builtin_ia32_vtestnzcpd256:
+ return interp__builtin_ia32_test_op(
+ S, OpPC, Call, [](const APInt &A, const APInt &B) {
+ return ((A & B) != 0) && ((~A & B) != 0);
+ });
  case X86::BI__builtin_ia32_selectb_128:
  case X86::BI__builtin_ia32_selectb_256:
  case X86::BI__builtin_ia32_selectb_512:

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
@@ -13905,6 +13905,40 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
 
 bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
  unsigned BuiltinOp) {
+ auto EvalTestOp = [&](llvm::function_ref<bool(const APInt &, const APInt &)>
+ Fn) {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
+ QualType ElemQT = VT->getElementType();
+ unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT);
+
+ APInt AWide(LaneWidth * SourceLen, 0);
+ APInt BWide(LaneWidth * SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane;
+ APInt BLane;
+ if (ElemQT->isIntegerType()) { // Get value.
+ ALane = SourceLHS.getVectorElt(I).getInt();
+ BLane = SourceRHS.getVectorElt(I).getInt();
+ } else if (ElemQT->isFloatingType()) { // Get only sign bit.
+ ALane =
+ SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt().isNegative();
+ BLane =
+ SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt().isNegative();
+ } else { // Must be integer or floating type.
+ return false;
+ }
+ AWide.insertBits(ALane, I * LaneWidth);
+ BWide.insertBits(BLane, I * LaneWidth);
+ }
+ return Success(Fn(AWide, BWide), E);
+ };
 
  auto HandleMaskBinOp =
  [&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
@@ -15018,7 +15052,34 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
  Result.setBitVal(P++, Val[I]);
  return Success(Result, E);
  }
-
+ case X86::BI__builtin_ia32_ptestz128:
+ case X86::BI__builtin_ia32_ptestz256:
+ case X86::BI__builtin_ia32_vtestzps:
+ case X86::BI__builtin_ia32_vtestzps256:
+ case X86::BI__builtin_ia32_vtestzpd:
+ case X86::BI__builtin_ia32_vtestzpd256: {
+ return EvalTestOp(
+ [](const APInt &A, const APInt &B) { return (A & B) == 0; });
+ }
+ case X86::BI__builtin_ia32_ptestc128:
+ case X86::BI__builtin_ia32_ptestc256:
+ case X86::BI__builtin_ia32_vtestcps:
+ case X86::BI__builtin_ia32_vtestcps256:
+ case X86::BI__builtin_ia32_vtestcpd:
+ case X86::BI__builtin_ia32_vtestcpd256: {
+ return EvalTestOp(
+ [](const APInt &A, const APInt &B) { return (~A & B) == 0; });
+ }
+ case X86::BI__builtin_ia32_ptestnzc128:
+ case X86::BI__builtin_ia32_ptestnzc256:
+ case X86::BI__builtin_ia32_vtestnzcps:
+ case X86::BI__builtin_ia32_vtestnzcps256:
+ case X86::BI__builtin_ia32_vtestnzcpd:
+ case X86::BI__builtin_ia32_vtestnzcpd256: {
+ return EvalTestOp([](const APInt &A, const APInt &B) {
+ return ((A & B) != 0) && ((~A & B) != 0);
+ });
+ }
  case X86::BI__builtin_ia32_kandqi:
  case X86::BI__builtin_ia32_kandhi:
  case X86::BI__builtin_ia32_kandsi:

diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
@@ -2539,9 +2539,8 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b) {
 /// \param __b
 /// A 128-bit vector of [2 x double].
 /// \returns the ZF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testz_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_pd(__m128d __a,
+ __m128d __b) {
  return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -2568,9 +2567,8 @@ _mm_testz_pd(__m128d __a, __m128d __b)
 /// \param __b
 /// A 128-bit vector of [2 x double].
 /// \returns the CF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testc_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_pd(__m128d __a,
+ __m128d __b) {
  return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -2598,9 +2596,8 @@ _mm_testc_pd(__m128d __a, __m128d __b)
 /// \param __b
 /// A 128-bit vector of [2 x double].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testnzc_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_testnzc_pd(__m128d __a, __m128d __b) {
  return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -2627,9 +2624,8 @@ _mm_testnzc_pd(__m128d __a, __m128d __b)
 /// \param __b
 /// A 128-bit vector of [4 x float].
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testz_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_ps(__m128 __a,
+ __m128 __b) {
  return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -2656,9 +2652,8 @@ _mm_testz_ps(__m128 __a, __m128 __b)
 /// \param __b
 /// A 128-bit vector of [4 x float].
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testc_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_ps(__m128 __a,
+ __m128 __b) {
  return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -2686,9 +2681,8 @@ _mm_testc_ps(__m128 __a, __m128 __b)
 /// \param __b
 /// A 128-bit vector of [4 x float].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testnzc_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testnzc_ps(__m128 __a,
+ __m128 __b) {
  return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -2715,9 +2709,8 @@ _mm_testnzc_ps(__m128 __a, __m128 __b)
 /// \param __b
 /// A 256-bit vector of [4 x double].
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_pd(__m256d __a,
+ __m256d __b) {
  return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -2744,9 +2737,8 @@ _mm256_testz_pd(__m256d __a, __m256d __b)
 /// \param __b
 /// A 256-bit vector of [4 x double].
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_pd(__m256d __a,
+ __m256d __b) {
  return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -2774,9 +2766,8 @@ _mm256_testc_pd(__m256d __a, __m256d __b)
 /// \param __b
 /// A 256-bit vector of [4 x double].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testnzc_pd(__m256d __a, __m256d __b) {
  return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -2803,9 +2794,8 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b)
 /// \param __b
 /// A 256-bit vector of [8 x float].
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_ps(__m256 __a,
+ __m256 __b) {
  return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -2832,9 +2822,8 @@ _mm256_testz_ps(__m256 __a, __m256 __b)
 /// \param __b
 /// A 256-bit vector of [8 x float].
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_ps(__m256 __a,
+ __m256 __b) {
  return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -2862,9 +2851,8 @@ _mm256_testc_ps(__m256 __a, __m256 __b)
 /// \param __b
 /// A 256-bit vector of [8 x float].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testnzc_ps(__m256 __a,
+ __m256 __b) {
  return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -2888,9 +2876,8 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b)
 /// \param __b
 /// A 256-bit integer vector.
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testz_si256(__m256i __a, __m256i __b) {
  return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
 }
 
@@ -2914,9 +2901,8 @@ _mm256_testz_si256(__m256i __a, __m256i __b)
 /// \param __b
 /// A 256-bit integer vector.
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testc_si256(__m256i __a, __m256i __b) {
  return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
 }
 
@@ -2941,9 +2927,8 @@ _mm256_testc_si256(__m256i __a, __m256i __b)
 /// \param __b
 /// A 256-bit integer vector.
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testnzc_si256(__m256i __a, __m256i __b) {
  return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
 }