Skip to content

Commit c1c6ed8

Browse files
authored
[libc] Remove 'packaged' GPU build support (llvm#100208)
Summary: Previously, the GPU built the `libc` in a fat binary version that was used to pass this to the link job in offloading languages like CUDA or OpenMP. This was mostly required because NVIDIA couldn't consume the standard static library version. Recent patches have now created the `clang-nvlink-wrapper` which lets us do that. Now, the C library is just included implicitly by the toolchain (or passed with -Xoffload-linker -lc). This code can be fully removed, which will heavily simplify the build (and removed some bugs and garbage files I've encoutnered).
1 parent 59eae91 commit c1c6ed8

File tree

4 files changed

+8
-132
lines changed

4 files changed

+8
-132
lines changed

libc/cmake/modules/LLVMLibCLibraryRules.cmake

-91
Original file line numberDiff line numberDiff line change
@@ -83,97 +83,6 @@ function(get_all_object_file_deps result fq_deps_list)
8383
set(${result} ${all_deps} PARENT_SCOPE)
8484
endfunction()
8585

86-
# A rule to build a library from a collection of entrypoint objects and bundle
87-
# it into a GPU fatbinary. Usage is the same as 'add_entrypoint_library'.
88-
# Usage:
89-
# add_gpu_entrypoint_library(
90-
# DEPENDS <list of add_entrypoint_object targets>
91-
# )
92-
function(add_gpu_entrypoint_library target_name base_target_name)
93-
cmake_parse_arguments(
94-
"ENTRYPOINT_LIBRARY"
95-
"" # No optional arguments
96-
"" # No single value arguments
97-
"DEPENDS" # Multi-value arguments
98-
${ARGN}
99-
)
100-
if(NOT ENTRYPOINT_LIBRARY_DEPENDS)
101-
message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list "
102-
"of 'add_entrypoint_object' targets.")
103-
endif()
104-
105-
get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS})
106-
get_all_object_file_deps(all_deps "${fq_deps_list}")
107-
108-
# The GPU 'libc' needs to be exported in a format that can be linked with
109-
# offloading langauges like OpenMP or CUDA. This wraps every GPU object into a
110-
# fat binary and adds them to a static library.
111-
set(objects "")
112-
foreach(dep IN LISTS all_deps)
113-
set(object $<$<STREQUAL:$<TARGET_NAME_IF_EXISTS:${dep}>,${dep}>:$<TARGET_OBJECTS:${dep}>>)
114-
string(FIND ${dep} "." last_dot_loc REVERSE)
115-
math(EXPR name_loc "${last_dot_loc} + 1")
116-
string(SUBSTRING ${dep} ${name_loc} -1 name)
117-
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
118-
set(prefix --image=arch=generic,triple=nvptx64-nvidia-cuda,feature=+ptx63)
119-
elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
120-
set(prefix --image=arch=generic,triple=amdgcn-amd-amdhsa)
121-
endif()
122-
123-
# Use the 'clang-offload-packager' to merge these files into a binary blob.
124-
add_custom_command(
125-
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin"
126-
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary
127-
COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER}
128-
"${prefix},file=$<JOIN:${object},,file=>" -o
129-
${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin
130-
DEPENDS ${dep} ${base_target_name}
131-
COMMENT "Packaging LLVM offloading binary for '${object}'"
132-
)
133-
add_custom_target(${dep}.__gpubin__ DEPENDS ${dep}
134-
"${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
135-
if(TARGET clang-offload-packager)
136-
add_dependencies(${dep}.__gpubin__ clang-offload-packager)
137-
endif()
138-
139-
# CMake does not permit setting the name on object files. In order to have
140-
# human readable names we create an empty stub file with the entrypoint
141-
# name. This empty file will then have the created binary blob embedded.
142-
add_custom_command(
143-
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
144-
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs
145-
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp
146-
DEPENDS ${dep} ${dep}.__gpubin__ ${base_target_name}
147-
)
148-
add_custom_target(${dep}.__stub__
149-
DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp")
150-
151-
add_library(${dep}.__fatbin__
152-
EXCLUDE_FROM_ALL OBJECT
153-
"${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
154-
)
155-
156-
# This is always compiled for the LLVM host triple instead of the native GPU
157-
# triple that is used by default in the build.
158-
target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib)
159-
target_compile_options(${dep}.__fatbin__ PRIVATE
160-
--target=${LLVM_HOST_TRIPLE}
161-
"SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
162-
add_dependencies(${dep}.__fatbin__
163-
${dep} ${dep}.__stub__ ${dep}.__gpubin__ ${base_target_name})
164-
165-
# Set the list of newly create fat binaries containing embedded device code.
166-
list(APPEND objects $<TARGET_OBJECTS:${dep}.__fatbin__>)
167-
endforeach()
168-
169-
add_library(
170-
${target_name}
171-
STATIC
172-
${objects}
173-
)
174-
set_target_properties(${target_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR})
175-
endfunction(add_gpu_entrypoint_library)
176-
17786
# A rule to build a library from a collection of entrypoint objects and bundle
17887
# it in a single LLVM-IR bitcode file.
17988
# Usage:

libc/docs/gpu/building.rst

-19
Original file line numberDiff line numberDiff line change
@@ -151,25 +151,6 @@ Build overview
151151
Once installed, the GPU build will create several files used for different
152152
targets. This section will briefly describe their purpose.
153153

154-
**lib/<host-triple>/libcgpu-amdgpu.a or lib/libcgpu-amdgpu.a**
155-
A static library containing fat binaries supporting AMD GPUs. These are built
156-
using the support described in the `clang documentation
157-
<https://clang.llvm.org/docs/OffloadingDesign.html>`_. These are intended to
158-
be static libraries included natively for offloading languages like CUDA, HIP,
159-
or OpenMP. This implements the standard C library.
160-
161-
**lib/<host-triple>/libmgpu-amdgpu.a or lib/libmgpu-amdgpu.a**
162-
A static library containing fat binaries that implements the standard math
163-
library for AMD GPUs.
164-
165-
**lib/<host-triple>/libcgpu-nvptx.a or lib/libcgpu-nvptx.a**
166-
A static library containing fat binaries that implement the standard C library
167-
for NVIDIA GPUs.
168-
169-
**lib/<host-triple>/libmgpu-nvptx.a or lib/libmgpu-nvptx.a**
170-
A static library containing fat binaries that implement the standard math
171-
library for NVIDIA GPUs.
172-
173154
**include/<target-triple>**
174155
The include directory where all of the generated headers for the target will
175156
go. These definitions are strictly for the GPU when being targeted directly.

libc/docs/gpu/using.rst

+8-7
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,17 @@ described in the `clang documentation
3434
by the OpenMP toolchain, but is currently opt-in for the CUDA and HIP toolchains
3535
through the ``--offload-new-driver``` and ``-fgpu-rdc`` flags.
3636

37-
The installation should contain a static library called ``libcgpu-amdgpu.a`` or
38-
``libcgpu-nvptx.a`` depending on which GPU architectures your build targeted.
39-
These contain fat binaries compatible with the offloading toolchain such that
40-
they can be used directly.
37+
In order or link the GPU runtime, we simply pass this library to the embedded
38+
device linker job. This can be done using the ``-Xoffload-linker`` option, which
39+
forwards an argument to a ``clang`` job used to create the final GPU executable.
40+
The toolchain should pick up the C libraries automatically in most cases, so
41+
this shouldn't be necessary.
4142

4243
.. code-block:: sh
4344
44-
$> clang openmp.c -fopenmp --offload-arch=gfx90a -lcgpu-amdgpu
45-
$> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -lcgpu-nvptx
46-
$> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -lcgpu-amdgpu
45+
$> clang openmp.c -fopenmp --offload-arch=gfx90a -Xoffload-linker -lc
46+
$> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc
47+
$> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc
4748
4849
This will automatically link in the needed function definitions if they were
4950
required by the user's application. Normally using the ``-fgpu-rdc`` option

libc/lib/CMakeLists.txt

-15
Original file line numberDiff line numberDiff line change
@@ -40,20 +40,6 @@ foreach(archive IN ZIP_LISTS
4040
# Add the offloading version of the library for offloading languages. These
4141
# are installed in the standard search path separate from the other libraries.
4242
if(LIBC_TARGET_OS_IS_GPU)
43-
add_gpu_entrypoint_library(
44-
${archive_1}gpu
45-
${archive_1}
46-
DEPENDS
47-
${${archive_2}}
48-
)
49-
set_target_properties(
50-
${archive_1}gpu
51-
PROPERTIES
52-
ARCHIVE_OUTPUT_NAME ${archive_0}gpu-${LIBC_TARGET_ARCHITECTURE}
53-
ARCHIVE_OUTPUT_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR}
54-
)
55-
list(APPEND added_gpu_archive_targets ${archive_1}gpu)
56-
5743
add_bitcode_entrypoint_library(
5844
${archive_1}bitcode
5945
${archive_1}
@@ -65,7 +51,6 @@ foreach(archive IN ZIP_LISTS
6551
PROPERTIES
6652
OUTPUT_NAME ${archive_1}.bc
6753
)
68-
add_dependencies(${archive_1}gpu ${archive_1}bitcode)
6954
list(APPEND added_gpu_bitcode_targets ${archive_1}bitcode)
7055
endif()
7156
endforeach()

0 commit comments

Comments
 (0)