Skip to content

Commit 35a9f27

Browse files
committed
Add support for CUDA forward compatibility
1 parent ebed710 commit 35a9f27

File tree

11 files changed

+174
-26
lines changed

11 files changed

+174
-26
lines changed

src/driver.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@
2929
#include "utils.h"
3030
#include "xfuncs.h"
3131

32-
#define SONAME_LIBCUDA "libcuda.so.1"
33-
#define SONAME_LIBNVML "libnvidia-ml.so.1"
34-
3532
#define MAX_DEVICES 64
3633
#define REAP_TIMEOUT_MS 10
3734

src/driver.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ SVCXPRT *svcunixfd_create(int, u_int, u_int);
1818

1919
#include "error.h"
2020

21+
#define SONAME_LIBCUDA "libcuda.so.1"
22+
#define SONAME_LIBNVML "libnvidia-ml.so.1"
23+
2124
#define SOCK_CLT 0
2225
#define SOCK_SVC 1
2326

src/nvc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ struct nvc_container_config {
7777
char *bins_dir;
7878
char *libs_dir;
7979
char *libs32_dir;
80+
char *cudart_dir;
8081
char *ldconfig;
8182
};
8283

src/nvc_container.c

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ static char *cgroup_root(char *, char *, const char *);
2626
static char *parse_proc_file(struct error *, const char *, parse_fn, char *, const char *);
2727
static char *find_cgroup_path(struct error *, const struct nvc_container *, const char *);
2828
static char *find_namespace_path(struct error *, const struct nvc_container *, const char *);
29+
static int find_library_paths(struct error *, struct nvc_container *);
2930
static int lookup_owner(struct error *, struct nvc_container *);
3031
static int copy_config(struct error *, struct nvc_container *, const struct nvc_container_config *);
3132

@@ -180,6 +181,48 @@ find_namespace_path(struct error *err, const struct nvc_container *cnt, const ch
180181
return (ns);
181182
}
182183

184+
static int
185+
find_library_paths(struct error *err, struct nvc_container *cnt)
186+
{
187+
char path[PATH_MAX];
188+
glob_t gl;
189+
int rv = -1;
190+
char **ptr;
191+
192+
if (!(cnt->flags & OPT_COMPUTE_LIBS))
193+
return (0);
194+
195+
if (path_join(err, path, cnt->cfg.rootfs, cnt->cfg.cudart_dir) < 0)
196+
return (-1);
197+
if (path_append(err, path, "compat/lib*.so.*") < 0)
198+
return (-1);
199+
200+
if (xglob(err, path, GLOB_ERR, NULL, &gl) < 0)
201+
goto fail;
202+
if (gl.gl_pathc > 0) {
203+
cnt->nlibs = gl.gl_pathc;
204+
cnt->libs = ptr = array_new(err, gl.gl_pathc);
205+
if (cnt->libs == NULL)
206+
goto fail;
207+
208+
for (size_t i = 0; i < gl.gl_pathc; ++i) {
209+
if (path_resolve(err, path, cnt->cfg.rootfs, gl.gl_pathv[i] + strlen(cnt->cfg.rootfs)) < 0)
210+
goto fail;
211+
if (!str_array_match(path, (const char * const *)cnt->libs, (size_t)(ptr - cnt->libs))) {
212+
log_infof("selecting %s%s", cnt->cfg.rootfs, path);
213+
if ((*ptr++ = xstrdup(err, path)) == NULL)
214+
goto fail;
215+
}
216+
}
217+
array_pack(cnt->libs, &cnt->nlibs);
218+
}
219+
rv = 0;
220+
221+
fail:
222+
globfree(&gl);
223+
return (rv);
224+
}
225+
183226
static int
184227
lookup_owner(struct error *err, struct nvc_container *cnt)
185228
{
@@ -205,6 +248,7 @@ copy_config(struct error *err, struct nvc_container *cnt, const struct nvc_conta
205248
const char *bins_dir = cfg->bins_dir;
206249
const char *libs_dir = cfg->libs_dir;
207250
const char *libs32_dir = cfg->libs32_dir;
251+
const char *cudart_dir = cfg->cudart_dir;
208252
const char *ldconfig = cfg->ldconfig;
209253
char *rootfs;
210254
int multiarch, ret;
@@ -266,6 +310,8 @@ copy_config(struct error *err, struct nvc_container *cnt, const struct nvc_conta
266310
}
267311
}
268312
}
313+
if (cudart_dir == NULL)
314+
cudart_dir = CUDA_RUNTIME_DIR;
269315
if (ldconfig == NULL) {
270316
/*
271317
* Some distributions have a wrapper script around ldconfig to reduce package install time.
@@ -284,6 +330,8 @@ copy_config(struct error *err, struct nvc_container *cnt, const struct nvc_conta
284330
goto fail;
285331
if ((cnt->cfg.libs32_dir = xstrdup(err, libs32_dir)) == NULL)
286332
goto fail;
333+
if ((cnt->cfg.cudart_dir = xstrdup(err, cudart_dir)) == NULL)
334+
goto fail;
287335
if ((cnt->cfg.ldconfig = xstrdup(err, ldconfig)) == NULL)
288336
goto fail;
289337
rv = 0;
@@ -302,7 +350,7 @@ nvc_container_new(struct nvc_context *ctx, const struct nvc_container_config *cf
302350
if (validate_context(ctx) < 0)
303351
return (NULL);
304352
if (validate_args(ctx, cfg != NULL && cfg->pid > 0 && cfg->rootfs != NULL && !str_empty(cfg->rootfs) && cfg->rootfs[0] == '/' &&
305-
!str_empty(cfg->bins_dir) && !str_empty(cfg->libs_dir) && !str_empty(cfg->libs32_dir) && !str_empty(cfg->ldconfig)) < 0)
353+
!str_empty(cfg->bins_dir) && !str_empty(cfg->libs_dir) && !str_empty(cfg->libs32_dir) && !str_empty(cfg->cudart_dir) && !str_empty(cfg->ldconfig)) < 0)
306354
return (NULL);
307355
if (opts == NULL)
308356
opts = default_container_opts;
@@ -322,6 +370,10 @@ nvc_container_new(struct nvc_context *ctx, const struct nvc_container_config *cf
322370
goto fail;
323371
if (lookup_owner(&ctx->err, cnt) < 0)
324372
goto fail;
373+
if (!(flags & OPT_NO_CNTLIBS)) {
374+
if (find_library_paths(&ctx->err, cnt) < 0)
375+
goto fail;
376+
}
325377
if ((cnt->mnt_ns = find_namespace_path(&ctx->err, cnt, "mnt")) == NULL)
326378
goto fail;
327379
if (!(flags & OPT_NO_CGROUPS)) {
@@ -335,6 +387,7 @@ nvc_container_new(struct nvc_context *ctx, const struct nvc_container_config *cf
335387
log_infof("setting bins directory to %s", cnt->cfg.bins_dir);
336388
log_infof("setting libs directory to %s", cnt->cfg.libs_dir);
337389
log_infof("setting libs32 directory to %s", cnt->cfg.libs32_dir);
390+
log_infof("setting cudart directory to %s", cnt->cfg.cudart_dir);
338391
log_infof("setting ldconfig to %s%s", cnt->cfg.ldconfig, (cnt->cfg.ldconfig[0] == '@') ? " (host relative)" : "");
339392
log_infof("setting mount namespace to %s", cnt->mnt_ns);
340393
if (!(flags & OPT_NO_CGROUPS))
@@ -355,8 +408,10 @@ nvc_container_free(struct nvc_container *cnt)
355408
free(cnt->cfg.bins_dir);
356409
free(cnt->cfg.libs_dir);
357410
free(cnt->cfg.libs32_dir);
411+
free(cnt->cfg.cudart_dir);
358412
free(cnt->cfg.ldconfig);
359413
free(cnt->mnt_ns);
360414
free(cnt->dev_cg);
415+
array_free(cnt->libs, cnt->nlibs);
361416
free(cnt);
362417
}

src/nvc_info.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ select_libraries(struct error *err, void *ptr, const char *root, const char *ori
134134
/* Check the driver version. */
135135
if ((rv = str_has_suffix(lib, info->nvrm_version)) == false)
136136
goto done;
137-
if (str_array_match(lib, graphics_libs_compat, nitems(graphics_libs_compat))) {
137+
if (str_array_match_prefix(lib, graphics_libs_compat, nitems(graphics_libs_compat))) {
138138
/* Only choose OpenGL/EGL libraries issued by NVIDIA. */
139139
if ((rv = elftool_has_dependency(&et, "libnvidia-glcore.so")) != false)
140140
goto done;
@@ -394,25 +394,25 @@ lookup_ipcs(struct error *err, struct nvc_driver_info *info, const char *root, i
394394
bool
395395
match_binary_flags(const char *bin, int32_t flags)
396396
{
397-
if ((flags & OPT_UTILITY_BINS) && str_array_match(bin, utility_bins, nitems(utility_bins)))
397+
if ((flags & OPT_UTILITY_BINS) && str_array_match_prefix(bin, utility_bins, nitems(utility_bins)))
398398
return (true);
399-
if ((flags & OPT_COMPUTE_BINS) && str_array_match(bin, compute_bins, nitems(compute_bins)))
399+
if ((flags & OPT_COMPUTE_BINS) && str_array_match_prefix(bin, compute_bins, nitems(compute_bins)))
400400
return (true);
401401
return (false);
402402
}
403403

404404
bool
405405
match_library_flags(const char *lib, int32_t flags)
406406
{
407-
if ((flags & OPT_UTILITY_LIBS) && str_array_match(lib, utility_libs, nitems(utility_libs)))
407+
if ((flags & OPT_UTILITY_LIBS) && str_array_match_prefix(lib, utility_libs, nitems(utility_libs)))
408408
return (true);
409-
if ((flags & OPT_COMPUTE_LIBS) && str_array_match(lib, compute_libs, nitems(compute_libs)))
409+
if ((flags & OPT_COMPUTE_LIBS) && str_array_match_prefix(lib, compute_libs, nitems(compute_libs)))
410410
return (true);
411-
if ((flags & OPT_VIDEO_LIBS) && str_array_match(lib, video_libs, nitems(video_libs)))
411+
if ((flags & OPT_VIDEO_LIBS) && str_array_match_prefix(lib, video_libs, nitems(video_libs)))
412412
return (true);
413-
if ((flags & OPT_GRAPHICS_LIBS) && (str_array_match(lib, graphics_libs, nitems(graphics_libs)) ||
414-
str_array_match(lib, graphics_libs_glvnd, nitems(graphics_libs_glvnd)) ||
415-
str_array_match(lib, graphics_libs_compat, nitems(graphics_libs_compat))))
413+
if ((flags & OPT_GRAPHICS_LIBS) && (str_array_match_prefix(lib, graphics_libs, nitems(graphics_libs)) ||
414+
str_array_match_prefix(lib, graphics_libs_glvnd, nitems(graphics_libs_glvnd)) ||
415+
str_array_match_prefix(lib, graphics_libs_compat, nitems(graphics_libs_compat))))
416416
return (true);
417417
return (false);
418418
}

src/nvc_internal.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
#define NV_UVM_PROC_DRIVER "/proc/driver/nvidia-uvm"
3737
#define NV_APP_PROFILE_DIR "/etc/nvidia/nvidia-application-profiles-rc.d"
3838

39+
#define CUDA_RUNTIME_DIR "/usr/local/cuda"
40+
3941
struct nvc_context {
4042
bool initialized;
4143
struct error err;
@@ -51,6 +53,8 @@ struct nvc_container {
5153
gid_t gid;
5254
char *mnt_ns;
5355
char *dev_cg;
56+
char **libs;
57+
size_t nlibs;
5458
};
5559

5660
enum {

src/nvc_mount.c

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ static void unmount(const char *);
3333
static int setup_cgroup(struct error *, const char *, dev_t);
3434
static int symlink_library(struct error *, const char *, const char *, const char *, uid_t, gid_t);
3535
static int symlink_libraries(struct error *, const struct nvc_container *, const char * const [], size_t);
36+
static void filter_libraries(const struct nvc_driver_info *, char * [], size_t *);
3637

3738
static char **
3839
mount_files(struct error *err, const char *root, const struct nvc_container *cnt, const char *dir, char *paths[], size_t size)
@@ -397,7 +398,7 @@ symlink_libraries(struct error *err, const struct nvc_container *cnt, const char
397398
lib = basename(paths[i]);
398399
if (str_has_prefix(lib, "libcuda.so")) {
399400
/* XXX Many applications wrongly assume that libcuda.so exists (e.g. with dlopen). */
400-
if (symlink_library(err, paths[i], lib, "libcuda.so", cnt->uid, cnt->gid) < 0)
401+
if (symlink_library(err, paths[i], SONAME_LIBCUDA, "libcuda.so", cnt->uid, cnt->gid) < 0)
401402
return (-1);
402403
} else if (str_has_prefix(lib, "libGLX_nvidia.so")) {
403404
/* XXX GLVND requires this symlink for indirect GLX support. */
@@ -408,6 +409,27 @@ symlink_libraries(struct error *err, const struct nvc_container *cnt, const char
408409
return (0);
409410
}
410411

412+
static void
413+
filter_libraries(const struct nvc_driver_info *info, char * paths[], size_t *size)
414+
{
415+
char *lib, *maj;
416+
417+
/*
418+
* XXX Filter out any library that matches the major version of RM to prevent us from
419+
* running into an unsupported configurations (e.g. CUDA compat on Geforce or non-LTS drivers).
420+
*/
421+
for (size_t i = 0; i < *size; ++i) {
422+
lib = basename(paths[i]);
423+
if ((maj = strstr(lib, ".so.")) != NULL) {
424+
maj += strlen(".so.");
425+
if (strncmp(info->nvrm_version, maj, strspn(maj, "0123456789")))
426+
continue;
427+
}
428+
paths[i] = NULL;
429+
}
430+
array_pack(paths, size);
431+
}
432+
411433
int
412434
nvc_driver_mount(struct nvc_context *ctx, const struct nvc_container *cnt, const struct nvc_driver_info *info)
413435
{
@@ -423,20 +445,22 @@ nvc_driver_mount(struct nvc_context *ctx, const struct nvc_container *cnt, const
423445
if (ns_enter(&ctx->err, cnt->mnt_ns, CLONE_NEWNS) < 0)
424446
return (-1);
425447

426-
nmnt = 2 + info->nbins + info->nlibs + info->nlibs32 + info->nipcs + info->ndevs;
448+
nmnt = 2 + info->nbins + info->nlibs + cnt->nlibs + info->nlibs32 + info->nipcs + info->ndevs;
427449
mnt = ptr = (const char **)array_new(&ctx->err, nmnt);
428450
if (mnt == NULL)
429451
goto fail;
430452

431453
/* Procfs mount */
432454
if ((*ptr++ = mount_procfs(&ctx->err, ctx->cfg.root, cnt)) == NULL)
433455
goto fail;
456+
434457
/* Application profile mount */
435458
if (cnt->flags & OPT_GRAPHICS_LIBS) {
436459
if ((*ptr++ = mount_app_profile(&ctx->err, cnt)) == NULL)
437460
goto fail;
438461
}
439-
/* Binary and library mounts */
462+
463+
/* Host binary and library mounts */
440464
if (info->bins != NULL && info->nbins > 0) {
441465
if ((tmp = (const char **)mount_files(&ctx->err, ctx->cfg.root, cnt, cnt->cfg.bins_dir, info->bins, info->nbins)) == NULL)
442466
goto fail;
@@ -457,6 +481,24 @@ nvc_driver_mount(struct nvc_context *ctx, const struct nvc_container *cnt, const
457481
}
458482
if (symlink_libraries(&ctx->err, cnt, mnt, (size_t)(ptr - mnt)) < 0)
459483
goto fail;
484+
485+
/* Container library mounts */
486+
if (cnt->libs != NULL && cnt->nlibs > 0) {
487+
size_t nlibs = cnt->nlibs;
488+
char **libs = array_copy(&ctx->err, (const char * const *)cnt->libs, cnt->nlibs);
489+
if (libs == NULL)
490+
goto fail;
491+
492+
filter_libraries(info, libs, &nlibs);
493+
if ((tmp = (const char **)mount_files(&ctx->err, cnt->cfg.rootfs, cnt, cnt->cfg.libs_dir, libs, nlibs)) == NULL) {
494+
free(libs);
495+
goto fail;
496+
}
497+
ptr = array_append(ptr, tmp, array_size(tmp));
498+
free(tmp);
499+
free(libs);
500+
}
501+
460502
/* IPC mounts */
461503
for (size_t i = 0; i < info->nipcs; ++i) {
462504
/* XXX Only utility libraries require persistenced IPC, everything else is compute only. */
@@ -468,6 +510,7 @@ nvc_driver_mount(struct nvc_context *ctx, const struct nvc_container *cnt, const
468510
if ((*ptr++ = mount_ipc(&ctx->err, ctx->cfg.root, cnt, info->ipcs[i])) == NULL)
469511
goto fail;
470512
}
513+
471514
/* Device mounts */
472515
for (size_t i = 0; i < info->ndevs; ++i) {
473516
/* XXX Only compute libraries require specific devices (e.g. UVM). */

src/options.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,18 @@ enum {
5555
OPT_STANDALONE = 1 << 1,
5656
OPT_NO_CGROUPS = 1 << 2,
5757
OPT_NO_DEVBIND = 1 << 3,
58-
OPT_UTILITY_LIBS = 1 << 4,
59-
OPT_COMPUTE_LIBS = 1 << 5,
60-
OPT_VIDEO_LIBS = 1 << 6,
61-
OPT_GRAPHICS_LIBS = 1 << 7,
62-
OPT_DISPLAY = 1 << 8,
63-
OPT_UTILITY_BINS = 1 << 9,
64-
OPT_COMPUTE_BINS = 1 << 10,
58+
OPT_NO_CNTLIBS = 1 << 4,
59+
OPT_UTILITY_LIBS = 1 << 5,
60+
OPT_COMPUTE_LIBS = 1 << 6,
61+
OPT_VIDEO_LIBS = 1 << 7,
62+
OPT_GRAPHICS_LIBS = 1 << 8,
63+
OPT_DISPLAY = 1 << 9,
64+
OPT_UTILITY_BINS = 1 << 10,
65+
OPT_COMPUTE_BINS = 1 << 11,
6566
#if defined(__powerpc64__) /* ppc64le doesn't support compat32. */
6667
OPT_COMPAT32 = 1 << 0,
6768
#else
68-
OPT_COMPAT32 = 1 << 11,
69+
OPT_COMPAT32 = 1 << 12,
6970
#endif /* defined(__powerpc64__) */
7071
};
7172

@@ -74,6 +75,7 @@ static const struct option container_opts[] = {
7475
{"standalone", OPT_STANDALONE},
7576
{"no-cgroups", OPT_NO_CGROUPS},
7677
{"no-devbind", OPT_NO_DEVBIND},
78+
{"no-cntlibs", OPT_NO_CNTLIBS},
7779
{"utility", OPT_UTILITY_BINS|OPT_UTILITY_LIBS},
7880
{"compute", OPT_COMPUTE_BINS|OPT_COMPUTE_LIBS},
7981
{"video", OPT_VIDEO_LIBS|OPT_COMPUTE_LIBS},

0 commit comments

Comments
 (0)