diff --git a/libc/cmake/modules/LLVMLibCLibraryRules.cmake b/libc/cmake/modules/LLVMLibCLibraryRules.cmake index 75bc81e2aee8e..e677b4cd2c28f 100644 --- a/libc/cmake/modules/LLVMLibCLibraryRules.cmake +++ b/libc/cmake/modules/LLVMLibCLibraryRules.cmake @@ -83,97 +83,6 @@ function(get_all_object_file_deps result fq_deps_list) set(${result} ${all_deps} PARENT_SCOPE) endfunction() -# A rule to build a library from a collection of entrypoint objects and bundle -# it into a GPU fatbinary. Usage is the same as 'add_entrypoint_library'. -# Usage: -# add_gpu_entrypoint_library( -# DEPENDS -# ) -function(add_gpu_entrypoint_library target_name base_target_name) - cmake_parse_arguments( - "ENTRYPOINT_LIBRARY" - "" # No optional arguments - "" # No single value arguments - "DEPENDS" # Multi-value arguments - ${ARGN} - ) - if(NOT ENTRYPOINT_LIBRARY_DEPENDS) - message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list " - "of 'add_entrypoint_object' targets.") - endif() - - get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS}) - get_all_object_file_deps(all_deps "${fq_deps_list}") - - # The GPU 'libc' needs to be exported in a format that can be linked with - # offloading langauges like OpenMP or CUDA. This wraps every GPU object into a - # fat binary and adds them to a static library. - set(objects "") - foreach(dep IN LISTS all_deps) - set(object $<$,${dep}>:$>) - string(FIND ${dep} "." last_dot_loc REVERSE) - math(EXPR name_loc "${last_dot_loc} + 1") - string(SUBSTRING ${dep} ${name_loc} -1 name) - if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) - set(prefix --image=arch=generic,triple=nvptx64-nvidia-cuda,feature=+ptx63) - elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) - set(prefix --image=arch=generic,triple=amdgcn-amd-amdhsa) - endif() - - # Use the 'clang-offload-packager' to merge these files into a binary blob. - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin" - COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary - COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER} - "${prefix},file=$" -o - ${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin - DEPENDS ${dep} ${base_target_name} - COMMENT "Packaging LLVM offloading binary for '${object}'" - ) - add_custom_target(${dep}.__gpubin__ DEPENDS ${dep} - "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin") - if(TARGET clang-offload-packager) - add_dependencies(${dep}.__gpubin__ clang-offload-packager) - endif() - - # CMake does not permit setting the name on object files. In order to have - # human readable names we create an empty stub file with the entrypoint - # name. This empty file will then have the created binary blob embedded. - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp" - COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs - COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp - DEPENDS ${dep} ${dep}.__gpubin__ ${base_target_name} - ) - add_custom_target(${dep}.__stub__ - DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp") - - add_library(${dep}.__fatbin__ - EXCLUDE_FROM_ALL OBJECT - "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp" - ) - - # This is always compiled for the LLVM host triple instead of the native GPU - # triple that is used by default in the build. - target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib) - target_compile_options(${dep}.__fatbin__ PRIVATE - --target=${LLVM_HOST_TRIPLE} - "SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin") - add_dependencies(${dep}.__fatbin__ - ${dep} ${dep}.__stub__ ${dep}.__gpubin__ ${base_target_name}) - - # Set the list of newly create fat binaries containing embedded device code. - list(APPEND objects $) - endforeach() - - add_library( - ${target_name} - STATIC - ${objects} - ) - set_target_properties(${target_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR}) -endfunction(add_gpu_entrypoint_library) - # A rule to build a library from a collection of entrypoint objects and bundle # it in a single LLVM-IR bitcode file. # Usage: diff --git a/libc/docs/gpu/building.rst b/libc/docs/gpu/building.rst index d3e64c6d42431..60498e348395a 100644 --- a/libc/docs/gpu/building.rst +++ b/libc/docs/gpu/building.rst @@ -151,25 +151,6 @@ Build overview Once installed, the GPU build will create several files used for different targets. This section will briefly describe their purpose. -**lib//libcgpu-amdgpu.a or lib/libcgpu-amdgpu.a** - A static library containing fat binaries supporting AMD GPUs. These are built - using the support described in the `clang documentation - `_. These are intended to - be static libraries included natively for offloading languages like CUDA, HIP, - or OpenMP. This implements the standard C library. - -**lib//libmgpu-amdgpu.a or lib/libmgpu-amdgpu.a** - A static library containing fat binaries that implements the standard math - library for AMD GPUs. - -**lib//libcgpu-nvptx.a or lib/libcgpu-nvptx.a** - A static library containing fat binaries that implement the standard C library - for NVIDIA GPUs. - -**lib//libmgpu-nvptx.a or lib/libmgpu-nvptx.a** - A static library containing fat binaries that implement the standard math - library for NVIDIA GPUs. - **include/** The include directory where all of the generated headers for the target will go. These definitions are strictly for the GPU when being targeted directly. diff --git a/libc/docs/gpu/using.rst b/libc/docs/gpu/using.rst index d5ad4c7a0368d..4034c04867c99 100644 --- a/libc/docs/gpu/using.rst +++ b/libc/docs/gpu/using.rst @@ -34,16 +34,17 @@ described in the `clang documentation by the OpenMP toolchain, but is currently opt-in for the CUDA and HIP toolchains through the ``--offload-new-driver``` and ``-fgpu-rdc`` flags. -The installation should contain a static library called ``libcgpu-amdgpu.a`` or -``libcgpu-nvptx.a`` depending on which GPU architectures your build targeted. -These contain fat binaries compatible with the offloading toolchain such that -they can be used directly. +In order or link the GPU runtime, we simply pass this library to the embedded +device linker job. This can be done using the ``-Xoffload-linker`` option, which +forwards an argument to a ``clang`` job used to create the final GPU executable. +The toolchain should pick up the C libraries automatically in most cases, so +this shouldn't be necessary. .. code-block:: sh - $> clang openmp.c -fopenmp --offload-arch=gfx90a -lcgpu-amdgpu - $> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -lcgpu-nvptx - $> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -lcgpu-amdgpu + $> clang openmp.c -fopenmp --offload-arch=gfx90a -Xoffload-linker -lc + $> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc + $> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc This will automatically link in the needed function definitions if they were required by the user's application. Normally using the ``-fgpu-rdc`` option diff --git a/libc/lib/CMakeLists.txt b/libc/lib/CMakeLists.txt index 37acf3950b460..4b7cfc4b76e2e 100644 --- a/libc/lib/CMakeLists.txt +++ b/libc/lib/CMakeLists.txt @@ -40,20 +40,6 @@ foreach(archive IN ZIP_LISTS # Add the offloading version of the library for offloading languages. These # are installed in the standard search path separate from the other libraries. if(LIBC_TARGET_OS_IS_GPU) - add_gpu_entrypoint_library( - ${archive_1}gpu - ${archive_1} - DEPENDS - ${${archive_2}} - ) - set_target_properties( - ${archive_1}gpu - PROPERTIES - ARCHIVE_OUTPUT_NAME ${archive_0}gpu-${LIBC_TARGET_ARCHITECTURE} - ARCHIVE_OUTPUT_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR} - ) - list(APPEND added_gpu_archive_targets ${archive_1}gpu) - add_bitcode_entrypoint_library( ${archive_1}bitcode ${archive_1} @@ -65,7 +51,6 @@ foreach(archive IN ZIP_LISTS PROPERTIES OUTPUT_NAME ${archive_1}.bc ) - add_dependencies(${archive_1}gpu ${archive_1}bitcode) list(APPEND added_gpu_bitcode_targets ${archive_1}bitcode) endif() endforeach()