[libc] Remove 'packaged' GPU build support (llvm#100208)

jhuber6 · web-flow · commit c1c6ed83e9ac · 2024-07-24T07:22:49.000-05:00
Summary:
Previously, the GPU built the `libc` in a fat binary version that was
used to pass this to the link job in offloading languages like CUDA or
OpenMP. This was mostly required because NVIDIA couldn't consume the
standard static library version. Recent patches have now created the
`clang-nvlink-wrapper` which lets us do that. Now, the C library is just
included implicitly by the toolchain (or passed with -Xoffload-linker
-lc).

This code can be fully removed, which will heavily simplify the build
(and removed some bugs and garbage files I've encoutnered).
diff --git a/libc/cmake/modules/LLVMLibCLibraryRules.cmake b/libc/cmake/modules/LLVMLibCLibraryRules.cmake
@@ -83,97 +83,6 @@ function(get_all_object_file_deps result fq_deps_list)
   set(${result} ${all_deps} PARENT_SCOPE)
 endfunction()
 
-# A rule to build a library from a collection of entrypoint objects and bundle
-# it into a GPU fatbinary. Usage is the same as 'add_entrypoint_library'.
-# Usage:
-#     add_gpu_entrypoint_library(
-#       DEPENDS <list of add_entrypoint_object targets>
-#     )
-function(add_gpu_entrypoint_library target_name base_target_name)
-  cmake_parse_arguments(
-    "ENTRYPOINT_LIBRARY"
-    "" # No optional arguments
-    "" # No single value arguments
-    "DEPENDS" # Multi-value arguments
-    ${ARGN}
-  )
-  if(NOT ENTRYPOINT_LIBRARY_DEPENDS)
-    message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list "
-                        "of 'add_entrypoint_object' targets.")
-  endif()
-
-  get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS})
-  get_all_object_file_deps(all_deps "${fq_deps_list}")
-
-  # The GPU 'libc' needs to be exported in a format that can be linked with
-  # offloading langauges like OpenMP or CUDA. This wraps every GPU object into a
-  # fat binary and adds them to a static library.
-  set(objects "")
-  foreach(dep IN LISTS all_deps)
-    set(object $<$<STREQUAL:$<TARGET_NAME_IF_EXISTS:${dep}>,${dep}>:$<TARGET_OBJECTS:${dep}>>)
-    string(FIND ${dep} "." last_dot_loc REVERSE)
-    math(EXPR name_loc "${last_dot_loc} + 1")
-    string(SUBSTRING ${dep} ${name_loc} -1 name)
-    if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
-      set(prefix --image=arch=generic,triple=nvptx64-nvidia-cuda,feature=+ptx63)
-    elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
-      set(prefix --image=arch=generic,triple=amdgcn-amd-amdhsa)
-    endif()
-
-    # Use the 'clang-offload-packager' to merge these files into a binary blob.
-    add_custom_command(
-      OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin"
-      COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary
-      COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER}
-              "${prefix},file=$<JOIN:${object},,file=>" -o
-              ${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin
-      DEPENDS ${dep} ${base_target_name}
-      COMMENT "Packaging LLVM offloading binary for '${object}'"
-    )
-    add_custom_target(${dep}.__gpubin__ DEPENDS ${dep}
-                      "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
-    if(TARGET clang-offload-packager)
-      add_dependencies(${dep}.__gpubin__ clang-offload-packager)
-    endif()
-
-    # CMake does not permit setting the name on object files. In order to have
-    # human readable names we create an empty stub file with the entrypoint
-    # name. This empty file will then have the created binary blob embedded.
-    add_custom_command(
-      OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
-      COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs
-      COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp
-      DEPENDS ${dep} ${dep}.__gpubin__ ${base_target_name}
-    )
-    add_custom_target(${dep}.__stub__
-                      DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp")
-
-    add_library(${dep}.__fatbin__
-      EXCLUDE_FROM_ALL OBJECT
-      "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
-    )
-
-    # This is always compiled for the LLVM host triple instead of the native GPU
-    # triple that is used by default in the build.
-    target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib)
-    target_compile_options(${dep}.__fatbin__ PRIVATE
-      --target=${LLVM_HOST_TRIPLE}
-      "SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
-    add_dependencies(${dep}.__fatbin__
-                     ${dep} ${dep}.__stub__ ${dep}.__gpubin__ ${base_target_name})
-
-    # Set the list of newly create fat binaries containing embedded device code.
-    list(APPEND objects $<TARGET_OBJECTS:${dep}.__fatbin__>)
-  endforeach()
-
-  add_library(
-    ${target_name}
-    STATIC
-      ${objects}
-  )
-  set_target_properties(${target_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR})
-endfunction(add_gpu_entrypoint_library)
-
 # A rule to build a library from a collection of entrypoint objects and bundle
 # it in a single LLVM-IR bitcode file.
 # Usage:
diff --git a/libc/docs/gpu/building.rst b/libc/docs/gpu/building.rst
@@ -151,25 +151,6 @@ Build overview
 Once installed, the GPU build will create several files used for different
 targets. This section will briefly describe their purpose.
 
-**lib/<host-triple>/libcgpu-amdgpu.a or lib/libcgpu-amdgpu.a**
-  A static library containing fat binaries supporting AMD GPUs. These are built
-  using the support described in the `clang documentation
-  <https://clang.llvm.org/docs/OffloadingDesign.html>`_. These are intended to
-  be static libraries included natively for offloading languages like CUDA, HIP,
-  or OpenMP. This implements the standard C library.
-
-**lib/<host-triple>/libmgpu-amdgpu.a or lib/libmgpu-amdgpu.a**
-  A static library containing fat binaries that implements the standard math
-  library for AMD GPUs.
-
-**lib/<host-triple>/libcgpu-nvptx.a or lib/libcgpu-nvptx.a**
-  A static library containing fat binaries that implement the standard C library
-  for NVIDIA GPUs.
-
-**lib/<host-triple>/libmgpu-nvptx.a or lib/libmgpu-nvptx.a**
-  A static library containing fat binaries that implement the standard math
-  library for NVIDIA GPUs.
-
 **include/<target-triple>**
   The include directory where all of the generated headers for the target will
   go. These definitions are strictly for the GPU when being targeted directly.
diff --git a/libc/docs/gpu/using.rst b/libc/docs/gpu/using.rst
@@ -34,16 +34,17 @@ described in the `clang documentation
 by the OpenMP toolchain, but is currently opt-in for the CUDA and HIP toolchains
 through the ``--offload-new-driver``` and ``-fgpu-rdc`` flags.
 
-The installation should contain a static library called ``libcgpu-amdgpu.a`` or
-``libcgpu-nvptx.a`` depending on which GPU architectures your build targeted.
-These contain fat binaries compatible with the offloading toolchain such that
-they can be used directly.
+In order or link the GPU runtime, we simply pass this library to the embedded 
+device linker job. This can be done using the ``-Xoffload-linker`` option, which 
+forwards an argument to a ``clang`` job used to create the final GPU executable. 
+The toolchain should pick up the C libraries automatically in most cases, so 
+this shouldn't be necessary.
 
 .. code-block:: sh
 
-  $> clang openmp.c -fopenmp --offload-arch=gfx90a -lcgpu-amdgpu
-  $> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -lcgpu-nvptx
-  $> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -lcgpu-amdgpu
+  $> clang openmp.c -fopenmp --offload-arch=gfx90a -Xoffload-linker -lc
+  $> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc
+  $> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc
 
 This will automatically link in the needed function definitions if they were
 required by the user's application. Normally using the ``-fgpu-rdc`` option
diff --git a/libc/lib/CMakeLists.txt b/libc/lib/CMakeLists.txt
@@ -40,20 +40,6 @@ foreach(archive IN ZIP_LISTS
   # Add the offloading version of the library for offloading languages. These
   # are installed in the standard search path separate from the other libraries.
   if(LIBC_TARGET_OS_IS_GPU)
-    add_gpu_entrypoint_library(
-      ${archive_1}gpu
-      ${archive_1}
-      DEPENDS
-        ${${archive_2}}
-    )
-    set_target_properties(
-      ${archive_1}gpu
-      PROPERTIES
-        ARCHIVE_OUTPUT_NAME ${archive_0}gpu-${LIBC_TARGET_ARCHITECTURE}
-        ARCHIVE_OUTPUT_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR}
-    )
-    list(APPEND added_gpu_archive_targets ${archive_1}gpu)
-
     add_bitcode_entrypoint_library(
       ${archive_1}bitcode
       ${archive_1}
@@ -65,7 +51,6 @@ foreach(archive IN ZIP_LISTS
       PROPERTIES
         OUTPUT_NAME ${archive_1}.bc
     )
-    add_dependencies(${archive_1}gpu ${archive_1}bitcode)
     list(APPEND added_gpu_bitcode_targets ${archive_1}bitcode)
   endif()
 endforeach()