Skip to content

Commit 550b83d

Browse files
committed
Revert "[libc] Remove 'packaged' GPU build support (#100208)"
Summary: I forgot that the OpenMP tests still look for this, reverting for now until I can make a fix. This reverts commit c1c6ed8.
1 parent 9914609 commit 550b83d

File tree

4 files changed

+132
-8
lines changed

4 files changed

+132
-8
lines changed

libc/cmake/modules/LLVMLibCLibraryRules.cmake

+91
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,97 @@ function(get_all_object_file_deps result fq_deps_list)
8383
set(${result} ${all_deps} PARENT_SCOPE)
8484
endfunction()
8585

86+
# A rule to build a library from a collection of entrypoint objects and bundle
87+
# it into a GPU fatbinary. Usage is the same as 'add_entrypoint_library'.
88+
# Usage:
89+
# add_gpu_entrypoint_library(
90+
# DEPENDS <list of add_entrypoint_object targets>
91+
# )
92+
function(add_gpu_entrypoint_library target_name base_target_name)
93+
cmake_parse_arguments(
94+
"ENTRYPOINT_LIBRARY"
95+
"" # No optional arguments
96+
"" # No single value arguments
97+
"DEPENDS" # Multi-value arguments
98+
${ARGN}
99+
)
100+
if(NOT ENTRYPOINT_LIBRARY_DEPENDS)
101+
message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list "
102+
"of 'add_entrypoint_object' targets.")
103+
endif()
104+
105+
get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS})
106+
get_all_object_file_deps(all_deps "${fq_deps_list}")
107+
108+
# The GPU 'libc' needs to be exported in a format that can be linked with
109+
# offloading langauges like OpenMP or CUDA. This wraps every GPU object into a
110+
# fat binary and adds them to a static library.
111+
set(objects "")
112+
foreach(dep IN LISTS all_deps)
113+
set(object $<$<STREQUAL:$<TARGET_NAME_IF_EXISTS:${dep}>,${dep}>:$<TARGET_OBJECTS:${dep}>>)
114+
string(FIND ${dep} "." last_dot_loc REVERSE)
115+
math(EXPR name_loc "${last_dot_loc} + 1")
116+
string(SUBSTRING ${dep} ${name_loc} -1 name)
117+
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
118+
set(prefix --image=arch=generic,triple=nvptx64-nvidia-cuda,feature=+ptx63)
119+
elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
120+
set(prefix --image=arch=generic,triple=amdgcn-amd-amdhsa)
121+
endif()
122+
123+
# Use the 'clang-offload-packager' to merge these files into a binary blob.
124+
add_custom_command(
125+
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin"
126+
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary
127+
COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER}
128+
"${prefix},file=$<JOIN:${object},,file=>" -o
129+
${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin
130+
DEPENDS ${dep} ${base_target_name}
131+
COMMENT "Packaging LLVM offloading binary for '${object}'"
132+
)
133+
add_custom_target(${dep}.__gpubin__ DEPENDS ${dep}
134+
"${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
135+
if(TARGET clang-offload-packager)
136+
add_dependencies(${dep}.__gpubin__ clang-offload-packager)
137+
endif()
138+
139+
# CMake does not permit setting the name on object files. In order to have
140+
# human readable names we create an empty stub file with the entrypoint
141+
# name. This empty file will then have the created binary blob embedded.
142+
add_custom_command(
143+
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
144+
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs
145+
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp
146+
DEPENDS ${dep} ${dep}.__gpubin__ ${base_target_name}
147+
)
148+
add_custom_target(${dep}.__stub__
149+
DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp")
150+
151+
add_library(${dep}.__fatbin__
152+
EXCLUDE_FROM_ALL OBJECT
153+
"${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
154+
)
155+
156+
# This is always compiled for the LLVM host triple instead of the native GPU
157+
# triple that is used by default in the build.
158+
target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib)
159+
target_compile_options(${dep}.__fatbin__ PRIVATE
160+
--target=${LLVM_HOST_TRIPLE}
161+
"SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
162+
add_dependencies(${dep}.__fatbin__
163+
${dep} ${dep}.__stub__ ${dep}.__gpubin__ ${base_target_name})
164+
165+
# Set the list of newly create fat binaries containing embedded device code.
166+
list(APPEND objects $<TARGET_OBJECTS:${dep}.__fatbin__>)
167+
endforeach()
168+
169+
add_library(
170+
${target_name}
171+
STATIC
172+
${objects}
173+
)
174+
set_target_properties(${target_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR})
175+
endfunction(add_gpu_entrypoint_library)
176+
86177
# A rule to build a library from a collection of entrypoint objects and bundle
87178
# it in a single LLVM-IR bitcode file.
88179
# Usage:

libc/docs/gpu/building.rst

+19
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,25 @@ Build overview
151151
Once installed, the GPU build will create several files used for different
152152
targets. This section will briefly describe their purpose.
153153

154+
**lib/<host-triple>/libcgpu-amdgpu.a or lib/libcgpu-amdgpu.a**
155+
A static library containing fat binaries supporting AMD GPUs. These are built
156+
using the support described in the `clang documentation
157+
<https://clang.llvm.org/docs/OffloadingDesign.html>`_. These are intended to
158+
be static libraries included natively for offloading languages like CUDA, HIP,
159+
or OpenMP. This implements the standard C library.
160+
161+
**lib/<host-triple>/libmgpu-amdgpu.a or lib/libmgpu-amdgpu.a**
162+
A static library containing fat binaries that implements the standard math
163+
library for AMD GPUs.
164+
165+
**lib/<host-triple>/libcgpu-nvptx.a or lib/libcgpu-nvptx.a**
166+
A static library containing fat binaries that implement the standard C library
167+
for NVIDIA GPUs.
168+
169+
**lib/<host-triple>/libmgpu-nvptx.a or lib/libmgpu-nvptx.a**
170+
A static library containing fat binaries that implement the standard math
171+
library for NVIDIA GPUs.
172+
154173
**include/<target-triple>**
155174
The include directory where all of the generated headers for the target will
156175
go. These definitions are strictly for the GPU when being targeted directly.

libc/docs/gpu/using.rst

+7-8
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,16 @@ described in the `clang documentation
3434
by the OpenMP toolchain, but is currently opt-in for the CUDA and HIP toolchains
3535
through the ``--offload-new-driver``` and ``-fgpu-rdc`` flags.
3636

37-
In order or link the GPU runtime, we simply pass this library to the embedded
38-
device linker job. This can be done using the ``-Xoffload-linker`` option, which
39-
forwards an argument to a ``clang`` job used to create the final GPU executable.
40-
The toolchain should pick up the C libraries automatically in most cases, so
41-
this shouldn't be necessary.
37+
The installation should contain a static library called ``libcgpu-amdgpu.a`` or
38+
``libcgpu-nvptx.a`` depending on which GPU architectures your build targeted.
39+
These contain fat binaries compatible with the offloading toolchain such that
40+
they can be used directly.
4241

4342
.. code-block:: sh
4443
45-
$> clang openmp.c -fopenmp --offload-arch=gfx90a -Xoffload-linker -lc
46-
$> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc
47-
$> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc
44+
$> clang openmp.c -fopenmp --offload-arch=gfx90a -lcgpu-amdgpu
45+
$> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -lcgpu-nvptx
46+
$> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -lcgpu-amdgpu
4847
4948
This will automatically link in the needed function definitions if they were
5049
required by the user's application. Normally using the ``-fgpu-rdc`` option

libc/lib/CMakeLists.txt

+15
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,20 @@ foreach(archive IN ZIP_LISTS
4040
# Add the offloading version of the library for offloading languages. These
4141
# are installed in the standard search path separate from the other libraries.
4242
if(LIBC_TARGET_OS_IS_GPU)
43+
add_gpu_entrypoint_library(
44+
${archive_1}gpu
45+
${archive_1}
46+
DEPENDS
47+
${${archive_2}}
48+
)
49+
set_target_properties(
50+
${archive_1}gpu
51+
PROPERTIES
52+
ARCHIVE_OUTPUT_NAME ${archive_0}gpu-${LIBC_TARGET_ARCHITECTURE}
53+
ARCHIVE_OUTPUT_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR}
54+
)
55+
list(APPEND added_gpu_archive_targets ${archive_1}gpu)
56+
4357
add_bitcode_entrypoint_library(
4458
${archive_1}bitcode
4559
${archive_1}
@@ -51,6 +65,7 @@ foreach(archive IN ZIP_LISTS
5165
PROPERTIES
5266
OUTPUT_NAME ${archive_1}.bc
5367
)
68+
add_dependencies(${archive_1}gpu ${archive_1}bitcode)
5469
list(APPEND added_gpu_bitcode_targets ${archive_1}bitcode)
5570
endif()
5671
endforeach()

0 commit comments

Comments
 (0)