Skip to content

Conversation

@leandrolcampos
Copy link
Contributor

This patch adds a set of randomized conformance tests for double-precision math functions.

The functions included in this set were selected based on the following criteria:

  • An implementation exists in libc/src/math/generic (i.e., it is not just a wrapper around a compiler built-in).
  • The corresponding LLVM CPU libm implementation is correctly rounded.
  • The function is listed in Table 68 of the OpenCL C Specification v3.0.19.
@llvmbot
Copy link
Member

llvmbot commented Aug 22, 2025

@llvm/pr-subscribers-offload

Author: Leandro Lacerda (leandrolcampos)

Changes

This patch adds a set of randomized conformance tests for double-precision math functions.

The functions included in this set were selected based on the following criteria:

  • An implementation exists in libc/src/math/generic (i.e., it is not just a wrapper around a compiler built-in).
  • The corresponding LLVM CPU libm implementation is correctly rounded.
  • The function is listed in Table 68 of the OpenCL C Specification v3.0.19.

Patch is 62.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155003.diff

20 Files Affected:

  • (modified) offload/unittests/Conformance/device_code/CUDAMath.cpp (+93-1)
  • (modified) offload/unittests/Conformance/device_code/DeviceAPIs.hpp (+30)
  • (modified) offload/unittests/Conformance/device_code/HIPMath.cpp (+93-1)
  • (modified) offload/unittests/Conformance/device_code/LLVMLibm.cpp (+94-2)
  • (added) offload/unittests/Conformance/tests/AcosTest.cpp (+63)
  • (added) offload/unittests/Conformance/tests/AsinTest.cpp (+63)
  • (modified) offload/unittests/Conformance/tests/CMakeLists.txt (+15)
  • (added) offload/unittests/Conformance/tests/CbrtTest.cpp (+63)
  • (added) offload/unittests/Conformance/tests/CosTest.cpp (+63)
  • (added) offload/unittests/Conformance/tests/Exp10Test.cpp (+64)
  • (added) offload/unittests/Conformance/tests/Exp2Test.cpp (+63)
  • (added) offload/unittests/Conformance/tests/ExpTest.cpp (+63)
  • (added) offload/unittests/Conformance/tests/Expm1Test.cpp (+64)
  • (added) offload/unittests/Conformance/tests/HypotTest.cpp (+65)
  • (added) offload/unittests/Conformance/tests/Log10Test.cpp (+67)
  • (added) offload/unittests/Conformance/tests/Log1pTest.cpp (+67)
  • (added) offload/unittests/Conformance/tests/Log2Test.cpp (+66)
  • (added) offload/unittests/Conformance/tests/SinTest.cpp (+63)
  • (added) offload/unittests/Conformance/tests/SincosTest.cpp (+80)
  • (added) offload/unittests/Conformance/tests/TanTest.cpp (+63)
diff --git a/offload/unittests/Conformance/device_code/CUDAMath.cpp b/offload/unittests/Conformance/device_code/CUDAMath.cpp index d47607a7c862e..d80660b2e3c74 100644 --- a/offload/unittests/Conformance/device_code/CUDAMath.cpp +++ b/offload/unittests/Conformance/device_code/CUDAMath.cpp @@ -30,6 +30,18 @@ static inline float powfRoundedExponent(float Base, float Exponent) { return __nv_powf(Base, __nv_roundf(Exponent)); } +static inline double sincosSin(double X) { + double SinX, CosX; + __nv_sincos(X, &SinX, &CosX); + return SinX; +} + +static inline double sincosCos(double X) { + double SinX, CosX; + __nv_sincos(X, &SinX, &CosX); + return CosX; +} + static inline float sincosfSin(float X) { float SinX, CosX; __nv_sincosf(X, &SinX, &CosX); @@ -48,6 +60,11 @@ static inline float sincosfCos(float X) { extern "C" { +__gpu_kernel void acosKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_acos>(NumElements, Out, X); +} + __gpu_kernel void acosfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_acosf>(NumElements, Out, X); @@ -58,6 +75,11 @@ __gpu_kernel void acoshfKernel(const float *X, float *Out, runKernelBody<__nv_acoshf>(NumElements, Out, X); } +__gpu_kernel void asinKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_asin>(NumElements, Out, X); +} + __gpu_kernel void asinfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_asinf>(NumElements, Out, X); @@ -83,11 +105,21 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out, runKernelBody<__nv_atanhf>(NumElements, Out, X); } +__gpu_kernel void cbrtKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_cbrt>(NumElements, Out, X); +} + __gpu_kernel void cbrtfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_cbrtf>(NumElements, Out, X); } +__gpu_kernel void cosKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_cos>(NumElements, Out, X); +} + __gpu_kernel void cosfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_cosf>(NumElements, Out, X); @@ -108,27 +140,52 @@ __gpu_kernel void erffKernel(const float *X, float *Out, runKernelBody<__nv_erff>(NumElements, Out, X); } +__gpu_kernel void expKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_exp>(NumElements, Out, X); +} + __gpu_kernel void expfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_expf>(NumElements, Out, X); } +__gpu_kernel void exp10Kernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_exp10>(NumElements, Out, X); +} + __gpu_kernel void exp10fKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_exp10f>(NumElements, Out, X); } +__gpu_kernel void exp2Kernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_exp2>(NumElements, Out, X); +} + __gpu_kernel void exp2fKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_exp2f>(NumElements, Out, X); } +__gpu_kernel void expm1Kernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_expm1>(NumElements, Out, X); +} + __gpu_kernel void expm1fKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_expm1f>(NumElements, Out, X); } -__gpu_kernel void hypotfKernel(const float *X, float *Y, float *Out, +__gpu_kernel void hypotKernel(const double *X, const double *Y, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_hypot>(NumElements, Out, X, Y); +} + +__gpu_kernel void hypotfKernel(const float *X, const float *Y, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_hypotf>(NumElements, Out, X, Y); } @@ -143,16 +200,31 @@ __gpu_kernel void logfKernel(const float *X, float *Out, runKernelBody<__nv_logf>(NumElements, Out, X); } +__gpu_kernel void log10Kernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_log10>(NumElements, Out, X); +} + __gpu_kernel void log10fKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_log10f>(NumElements, Out, X); } +__gpu_kernel void log1pKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_log1p>(NumElements, Out, X); +} + __gpu_kernel void log1pfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_log1pf>(NumElements, Out, X); } +__gpu_kernel void log2Kernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_log2>(NumElements, Out, X); +} + __gpu_kernel void log2fKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_log2f>(NumElements, Out, X); @@ -169,11 +241,26 @@ __gpu_kernel void powfRoundedExponentKernel(const float *X, float *Y, runKernelBody<powfRoundedExponent>(NumElements, Out, X, Y); } +__gpu_kernel void sinKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_sin>(NumElements, Out, X); +} + __gpu_kernel void sinfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_sinf>(NumElements, Out, X); } +__gpu_kernel void sincosSinKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<sincosSin>(NumElements, Out, X); +} + +__gpu_kernel void sincosCosKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<sincosCos>(NumElements, Out, X); +} + __gpu_kernel void sincosfSinKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<sincosfSin>(NumElements, Out, X); @@ -194,6 +281,11 @@ __gpu_kernel void sinpifKernel(const float *X, float *Out, runKernelBody<__nv_sinpif>(NumElements, Out, X); } +__gpu_kernel void tanKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_tan>(NumElements, Out, X); +} + __gpu_kernel void tanfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_tanf>(NumElements, Out, X); diff --git a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp index 6504fff125640..32f21991d9ec3 100644 --- a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp +++ b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp @@ -48,34 +48,49 @@ extern const inline uint32_t __oclc_ISA_version = 9000; extern "C" { +double __nv_acos(double); float __nv_acosf(float); float __nv_acoshf(float); +double __nv_asin(double); float __nv_asinf(float); float __nv_asinhf(float); float __nv_atanf(float); float __nv_atan2f(float, float); float __nv_atanhf(float); +double __nv_cbrt(double); float __nv_cbrtf(float); +double __nv_cos(double); float __nv_cosf(float); float __nv_coshf(float); float __nv_cospif(float); float __nv_erff(float); +double __nv_exp(double); float __nv_expf(float); +double __nv_exp10(double); float __nv_exp10f(float); +double __nv_exp2(double); float __nv_exp2f(float); +double __nv_expm1(double); float __nv_expm1f(float); +double __nv_hypot(double, double); float __nv_hypotf(float, float); double __nv_log(double); float __nv_logf(float); +double __nv_log10(double); float __nv_log10f(float); +double __nv_log1p(double); float __nv_log1pf(float); +double __nv_log2(double); float __nv_log2f(float); float __nv_powf(float, float); float __nv_roundf(float); +double __nv_sin(double); float __nv_sinf(float); +void __nv_sincos(double, double *, double *); void __nv_sincosf(float, float *, float *); float __nv_sinhf(float); float __nv_sinpif(float); +double __nv_tan(double); float __nv_tanf(float); float __nv_tanhf(float); } // extern "C" @@ -86,34 +101,49 @@ float __nv_tanhf(float); extern "C" { +double __ocml_acos_f64(double); float __ocml_acos_f32(float); float __ocml_acosh_f32(float); +double __ocml_asin_f64(double); float __ocml_asin_f32(float); float __ocml_asinh_f32(float); float __ocml_atan_f32(float); float __ocml_atan2_f32(float, float); float __ocml_atanh_f32(float); +double __ocml_cbrt_f64(double); float __ocml_cbrt_f32(float); +double __ocml_cos_f64(double); float __ocml_cos_f32(float); float __ocml_cosh_f32(float); float __ocml_cospi_f32(float); float __ocml_erf_f32(float); +double __ocml_exp_f64(double); float __ocml_exp_f32(float); +double __ocml_exp10_f64(double); float __ocml_exp10_f32(float); +double __ocml_exp2_f64(double); float __ocml_exp2_f32(float); +double __ocml_expm1_f64(double); float __ocml_expm1_f32(float); +double __ocml_hypot_f64(double, double); float __ocml_hypot_f32(float, float); double __ocml_log_f64(double); float __ocml_log_f32(float); +double __ocml_log10_f64(double); float __ocml_log10_f32(float); +double __ocml_log1p_f64(double); float __ocml_log1p_f32(float); +double __ocml_log2_f64(double); float __ocml_log2_f32(float); float __ocml_pow_f32(float, float); float __ocml_round_f32(float); +double __ocml_sin_f64(double); float __ocml_sin_f32(float); +double __ocml_sincos_f64(double, double *); float __ocml_sincos_f32(float, float *); float __ocml_sinh_f32(float); float __ocml_sinpi_f32(float); +double __ocml_tan_f64(double); float __ocml_tan_f32(float); float __ocml_tanh_f32(float); } // extern "C" diff --git a/offload/unittests/Conformance/device_code/HIPMath.cpp b/offload/unittests/Conformance/device_code/HIPMath.cpp index 74a7f5c3a9492..71dea4c8d2656 100644 --- a/offload/unittests/Conformance/device_code/HIPMath.cpp +++ b/offload/unittests/Conformance/device_code/HIPMath.cpp @@ -30,6 +30,18 @@ static inline float powfRoundedExponent(float Base, float Exponent) { return __ocml_pow_f32(Base, __ocml_round_f32(Exponent)); } +static inline double sincosSin(double X) { + double CosX; + double SinX = __ocml_sincos_f64(X, &CosX); + return SinX; +} + +static inline double sincosCos(double X) { + double CosX; + double SinX = __ocml_sincos_f64(X, &CosX); + return CosX; +} + static inline float sincosfSin(float X) { float CosX; float SinX = __ocml_sincos_f32(X, &CosX); @@ -48,6 +60,11 @@ static inline float sincosfCos(float X) { extern "C" { +__gpu_kernel void acosKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_acos_f64>(NumElements, Out, X); +} + __gpu_kernel void acosfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_acos_f32>(NumElements, Out, X); @@ -58,6 +75,11 @@ __gpu_kernel void acoshfKernel(const float *X, float *Out, runKernelBody<__ocml_acosh_f32>(NumElements, Out, X); } +__gpu_kernel void asinKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_asin_f64>(NumElements, Out, X); +} + __gpu_kernel void asinfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_asin_f32>(NumElements, Out, X); @@ -83,11 +105,21 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out, runKernelBody<__ocml_atanh_f32>(NumElements, Out, X); } +__gpu_kernel void cbrtKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_cbrt_f64>(NumElements, Out, X); +} + __gpu_kernel void cbrtfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_cbrt_f32>(NumElements, Out, X); } +__gpu_kernel void cosKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_cos_f64>(NumElements, Out, X); +} + __gpu_kernel void cosfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_cos_f32>(NumElements, Out, X); @@ -108,27 +140,52 @@ __gpu_kernel void erffKernel(const float *X, float *Out, runKernelBody<__ocml_erf_f32>(NumElements, Out, X); } +__gpu_kernel void expKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_exp_f64>(NumElements, Out, X); +} + __gpu_kernel void expfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_exp_f32>(NumElements, Out, X); } +__gpu_kernel void exp10Kernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_exp10_f64>(NumElements, Out, X); +} + __gpu_kernel void exp10fKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_exp10_f32>(NumElements, Out, X); } +__gpu_kernel void exp2Kernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_exp2_f64>(NumElements, Out, X); +} + __gpu_kernel void exp2fKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_exp2_f32>(NumElements, Out, X); } +__gpu_kernel void expm1Kernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_expm1_f64>(NumElements, Out, X); +} + __gpu_kernel void expm1fKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_expm1_f32>(NumElements, Out, X); } -__gpu_kernel void hypotfKernel(const float *X, float *Y, float *Out, +__gpu_kernel void hypotKernel(const double *X, const double *Y, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_hypot_f64>(NumElements, Out, X, Y); +} + +__gpu_kernel void hypotfKernel(const float *X, const float *Y, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_hypot_f32>(NumElements, Out, X, Y); } @@ -143,16 +200,31 @@ __gpu_kernel void logfKernel(const float *X, float *Out, runKernelBody<__ocml_log_f32>(NumElements, Out, X); } +__gpu_kernel void log10Kernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_log10_f64>(NumElements, Out, X); +} + __gpu_kernel void log10fKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_log10_f32>(NumElements, Out, X); } +__gpu_kernel void log1pKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_log1p_f64>(NumElements, Out, X); +} + __gpu_kernel void log1pfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_log1p_f32>(NumElements, Out, X); } +__gpu_kernel void log2Kernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_log2_f64>(NumElements, Out, X); +} + __gpu_kernel void log2fKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_log2_f32>(NumElements, Out, X); @@ -169,11 +241,26 @@ __gpu_kernel void powfRoundedExponentKernel(const float *X, float *Y, runKernelBody<powfRoundedExponent>(NumElements, Out, X, Y); } +__gpu_kernel void sinKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_sin_f64>(NumElements, Out, X); +} + __gpu_kernel void sinfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_sin_f32>(NumElements, Out, X); } +__gpu_kernel void sincosSinKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<sincosSin>(NumElements, Out, X); +} + +__gpu_kernel void sincosCosKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<sincosCos>(NumElements, Out, X); +} + __gpu_kernel void sincosfSinKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<sincosfSin>(NumElements, Out, X); @@ -194,6 +281,11 @@ __gpu_kernel void sinpifKernel(const float *X, float *Out, runKernelBody<__ocml_sinpi_f32>(NumElements, Out, X); } +__gpu_kernel void tanKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_tan_f64>(NumElements, Out, X); +} + __gpu_kernel void tanfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_tan_f32>(NumElements, Out, X); diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.cpp b/offload/unittests/Conformance/device_code/LLVMLibm.cpp index 20ad796c6d172..e25f8e1c6c042 100644 --- a/offload/unittests/Conformance/device_code/LLVMLibm.cpp +++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp @@ -29,6 +29,18 @@ static inline float powfRoundedExponent(float Base, float Exponent) { return powf(Base, roundf(Exponent)); } +static inline double sincosSin(double X) { + double SinX, CosX; + sincos(X, &SinX, &CosX); + return SinX; +} + +static inline double sincosCos(double X) { + double SinX, CosX; + sincos(X, &SinX, &CosX); + return CosX; +} + static inline float sincosfSin(float X) { float SinX, CosX; sincosf(X, &SinX, &CosX); @@ -47,6 +59,11 @@ static inline float sincosfCos(float X) { extern "C" { +__gpu_kernel void acosKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<acos>(NumElements, Out, X); +} + __gpu_kernel void acosfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<acosf>(NumElements, Out, X); @@ -57,6 +74,11 @@ __gpu_kernel void acoshfKernel(const float *X, float *Out, runKernelBody<acoshf>(NumElements, Out, X); } +__gpu_kernel void asinKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<asin>(NumElements, Out, X); +} + __gpu_kernel void asinfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<asinf>(NumElements, Out, X); @@ -82,11 +104,21 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out, runKernelBody<atanhf>(NumElements, Out, X); } +__gpu_kernel void cbrtKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<cbrt>(NumElements, Out, X); +} + __gpu_kernel void cbrtfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<cbrtf>(NumElements, Out, X); } +__gpu_kernel void cosKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<cos>(NumElements, Out, X); +} + __gpu_kernel void cosfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<cosf>(NumElements, Out, X); @@ -107,32 +139,57 @@ __gpu_kernel void erffKernel(const float *... [truncated] 
@github-actions
Copy link

github-actions bot commented Aug 22, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@leandrolcampos
Copy link
Contributor Author

leandrolcampos commented Aug 22, 2025

Randomized Test Results for Double-Precision Math Functions

Function ULP Tolerance Max ULP Distance
llvm-libm
(AMDGPU)
llvm-libm
(CUDA)
cuda-math
(CUDA)
hip-math
(AMDGPU)
acos 4 5 (FAILED) 5 (FAILED) 1 1
asin 4 6 (FAILED) 6 (FAILED) 2 1
cbrtf 2 0 0 1 1
cos 4 1 1 2 1
exp 3 1 1 1 1
exp10 3 1 1 1 1
exp2 3 1 1 1 1
expm1 3 0 0 1 2
hypot 4 0 0 2 1
log 3 1 1 1 1
log10 3 1 1 1 1
log1p 2 1 1 1 1
log2 3 1 1 1 1
sin 4 1 1 1 1
sincos (cos part) 4 1 1 2 1
sincos (sin part) 4 1 1 1 1
tan 5 2 2 2 1
  • ULP (Units in the Last Place) tolerances are based on The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4, Table 68, Khronos Registry [July 10, 2025].
  • The AMD GPU used for testing is the gfx1030.
  • The NVIDIA GPU used for testing is the NVIDIA GeForce RTX 4070 Laptop GPU.
@jhuber6 jhuber6 merged commit 9919301 into llvm:main Aug 22, 2025
9 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

4 participants