Skip to content

Conversation

@yxsamliu
Copy link
Collaborator

@yxsamliu yxsamliu commented Oct 26, 2023

Added option -foffload-implicit-host-device-templates which is off by default.

When the option is on, template functions and specializations without
host/device attributes have implicit host device attributes.

They can be overridden by device template functions with the same signagure.
They are emitted on device side only if they are used on device side.

This feature is added as an extension.
__has_extension(cuda_implicit_host_device_templates) can be used to
check whether it is enabled.

This is to facilitate using standard C++ headers for device.

Fixes: #69956

Fixes: SWDEV-428314

@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Oct 26, 2023
@yxsamliu yxsamliu requested review from Artem-B and jhuber6 October 26, 2023 19:12
@llvmbot
Copy link
Member

llvmbot commented Oct 26, 2023

@llvm/pr-subscribers-clang-driver
@llvm/pr-subscribers-clang-codegen
@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Yaxun (Sam) Liu (yxsamliu)

Changes

Currently std::is_invocable does not work for CUDA/HIP since its implementation requires checking whether a function is invocable in the context of a synthesized host function.

In general, to make <type_traits> work with CUDA/HIP, the template functions need to be defined as
so that they are available in both host and device contexts.

Fixes: #69956

Fixes: SWDEV-428314


Full diff: https://github.com/llvm/llvm-project/pull/70369.diff

5 Files Affected:

  • (modified) clang/lib/Headers/CMakeLists.txt (+14-2)
  • (added) clang/lib/Headers/cuda_wrappers/__utility/swap.h (+3)
  • (added) clang/lib/Headers/cuda_wrappers/bits/move.h (+3)
  • (modified) clang/lib/Headers/cuda_wrappers/cmath (+6)
  • (added) clang/lib/Headers/cuda_wrappers/type_traits (+31)
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 02a0c81644b6c6d..64908dcd9b2b9c4 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -283,12 +283,18 @@ set(cuda_wrapper_files cuda_wrappers/cmath cuda_wrappers/complex cuda_wrappers/new + cuda_wrappers/type_traits ) set(cuda_wrapper_bits_files cuda_wrappers/bits/shared_ptr_base.h cuda_wrappers/bits/basic_string.h cuda_wrappers/bits/basic_string.tcc + cuda_wrappers/bits/move.h +) + +set(cuda_wrapper_utility_files + cuda_wrappers/__utility/swap.h ) set(ppc_wrapper_files @@ -363,7 +369,7 @@ endfunction(clang_generate_header) # Copy header files from the source directory to the build directory foreach( f ${files} ${cuda_wrapper_files} ${cuda_wrapper_bits_files} ${ppc_wrapper_files} ${openmp_wrapper_files} ${hlsl_files} - ${llvm_libc_wrapper_files}) + ${llvm_libc_wrapper_files} ${cuda_wrapper_utility_files}) copy_header_to_output_dir(${CMAKE_CURRENT_SOURCE_DIR} ${f}) endforeach( f ) @@ -468,7 +474,7 @@ add_header_target("arm-common-resource-headers" "${arm_common_files};${arm_commo # Architecture/platform specific targets add_header_target("arm-resource-headers" "${arm_only_files};${arm_only_generated_files}") add_header_target("aarch64-resource-headers" "${aarch64_only_files};${aarch64_only_generated_files}") -add_header_target("cuda-resource-headers" "${cuda_files};${cuda_wrapper_files};${cuda_wrapper_bits_files}") +add_header_target("cuda-resource-headers" "${cuda_files};${cuda_wrapper_files};${cuda_wrapper_bits_files};${cuda_wrapper_utility_files}") add_header_target("hexagon-resource-headers" "${hexagon_files}") add_header_target("hip-resource-headers" "${hip_files}") add_header_target("loongarch-resource-headers" "${loongarch_files}") @@ -561,6 +567,12 @@ install( EXCLUDE_FROM_ALL COMPONENT cuda-resource-headers) +install( + FILES ${cuda_wrapper_utility_files} + DESTINATION ${header_install_dir}/cuda_wrappers/__utility + EXCLUDE_FROM_ALL + COMPONENT cuda-resource-headers) + install( FILES ${cuda_files} DESTINATION ${header_install_dir} diff --git a/clang/lib/Headers/cuda_wrappers/__utility/swap.h b/clang/lib/Headers/cuda_wrappers/__utility/swap.h new file mode 100644 index 000000000000000..128dc56ffc55755 --- /dev/null +++ b/clang/lib/Headers/cuda_wrappers/__utility/swap.h @@ -0,0 +1,3 @@ +#pragma clang force_cuda_host_device begin +#include_next "__utility/swap.h" +#pragma clang force_cuda_host_device end diff --git a/clang/lib/Headers/cuda_wrappers/bits/move.h b/clang/lib/Headers/cuda_wrappers/bits/move.h new file mode 100644 index 000000000000000..23580e36d094a16 --- /dev/null +++ b/clang/lib/Headers/cuda_wrappers/bits/move.h @@ -0,0 +1,3 @@ +#pragma clang force_cuda_host_device begin +#include_next "bits/move.h" +#pragma clang force_cuda_host_device end diff --git a/clang/lib/Headers/cuda_wrappers/cmath b/clang/lib/Headers/cuda_wrappers/cmath index 45f89beec9b4df4..512a422b977972f 100644 --- a/clang/lib/Headers/cuda_wrappers/cmath +++ b/clang/lib/Headers/cuda_wrappers/cmath @@ -27,6 +27,12 @@ #include_next <cmath> #if defined(_LIBCPP_STD_VER) +#if !defined(_LIBCPP_CONSTEXPR_SINCE_CXX14) +#define _LIBCPP_CONSTEXPR_SINCE_CXX14 +#endif +#if !defined(_LIBCPP_CONSTEXPR_SINCE_CXX20) +#define _LIBCPP_CONSTEXPR_SINCE_CXX20 +#endif // libc++ will need long double variants of these functions, but CUDA does not // provide them. We'll provide their declarations, which should allow the diff --git a/clang/lib/Headers/cuda_wrappers/type_traits b/clang/lib/Headers/cuda_wrappers/type_traits new file mode 100644 index 000000000000000..a4e178dc2d34afc --- /dev/null +++ b/clang/lib/Headers/cuda_wrappers/type_traits @@ -0,0 +1,31 @@ +/*===---- type_traits - CUDA wrapper for <type_traits> ---------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_CUDA_WRAPPERS_TYPE_TRAITS +#define __CLANG_CUDA_WRAPPERS_TYPE_TRAITS + +#pragma clang force_cuda_host_device begin +#include_next <type_traits> +#pragma clang force_cuda_host_device end + +#endif // __CLANG_CUDA_WRAPPERS_TYPE_TRAITS 
Copy link
Contributor

@jhuber6 jhuber6 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks reasonable, the same approach we've always done for these things.

@llvmbot llvmbot added clang:driver 'clang' and 'clang++' user-facing binaries. Not 'clang-cl' clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:codegen IR generation bugs: mangling, exceptions, etc. labels Oct 31, 2023
@yxsamliu yxsamliu changed the title [CUDA][HIP] Fix std::is_invocable [CUDA][HIP] Make template implicitly host device Oct 31, 2023
@yxsamliu yxsamliu force-pushed the fix-invocable2 branch 2 times, most recently from f68b605 to 0c44a09 Compare November 1, 2023 02:35
@github-actions
Copy link

github-actions bot commented Nov 1, 2023

✅ With the latest revision this PR passed the C/C++ code formatter.

Added option -foffload-implicit-host-device-templates which is off by default. When the option is on, template functions and specializations without host/device attributes have implicit host device attributes. They can be overridden by device template functions with the same signagure. They are emitted on device side only if they are used on device side. This feature is added as an extension. `__has_extension(cuda_implicit_host_device_templates)` can be used to check whether it is enabled. This is to facilitate using standard C++ headers for device. Fixes: llvm#69956 Fixes: SWDEV-428314
@yxsamliu
Copy link
Collaborator Author

yxsamliu commented Nov 9, 2023

ping

This patch passes our internal CI.

@Artem-B
Copy link
Member

Artem-B commented Nov 9, 2023

Now that we're making an even larger class of functions implicitly HD, the last logical step would be to make all unattributed functions implicitly HD, too (in a separate patch). After all, a template is as GPU-portable (or not) as a regular function. Unlike constexpr or compiler-generated glue for lambdas, template functions do not confer any benefits to our assumptions about whether the code will be compileable and working on a GPU.

@yxsamliu yxsamliu merged commit 9774d0c into llvm:main Nov 10, 2023
zahiraam pushed a commit to zahiraam/llvm-project that referenced this pull request Nov 20, 2023
Added option -foffload-implicit-host-device-templates which is off by default. When the option is on, template functions and specializations without host/device attributes have implicit host device attributes. They can be overridden by device template functions with the same signagure. They are emitted on device side only if they are used on device side. This feature is added as an extension. `__has_extension(cuda_implicit_host_device_templates)` can be used to check whether it is enabled. This is to facilitate using standard C++ headers for device. Fixes: llvm#69956 Fixes: SWDEV-428314
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:X86 clang:codegen IR generation bugs: mangling, exceptions, etc. clang:driver 'clang' and 'clang++' user-facing binaries. Not 'clang-cl' clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category

4 participants