Skip to content

Commit d6326e7

Browse files
authored
Merge pull request #1168 from ArangoGutierrez/b/5363680
Fix leaking of tmpfs mount in CDI mode
2 parents 7551ed2 + 6d68eb7 commit d6326e7

File tree

2 files changed

+102
-15
lines changed

2 files changed

+102
-15
lines changed

cmd/nvidia-cdi-hook/disable-device-node-modification/params_linux.go

Lines changed: 61 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,39 +20,57 @@
2020
package disabledevicenodemodification
2121

2222
import (
23+
"errors"
2324
"fmt"
2425
"os"
2526
"path/filepath"
2627

28+
securejoin "github.com/cyphar/filepath-securejoin"
2729
"github.com/opencontainers/runc/libcontainer/utils"
2830
"golang.org/x/sys/unix"
2931
)
3032

3133
func createParamsFileInContainer(containerRootDirPath string, contents []byte) error {
32-
tmpRoot, err := os.MkdirTemp("", "nvct-empty-dir*")
33-
if err != nil {
34-
return fmt.Errorf("failed to create temp root: %w", err)
34+
hookScratchDirPath := "/var/run/nvidia-ctk-hook"
35+
if err := utils.MkdirAllInRoot(containerRootDirPath, hookScratchDirPath, 0755); err != nil {
36+
return fmt.Errorf("error creating hook scratch folder: %w", err)
3537
}
3638

37-
if err := createTmpFs(tmpRoot, len(contents)); err != nil {
38-
return fmt.Errorf("failed to create tmpfs mount for params file: %w", err)
39-
}
39+
err := utils.WithProcfd(containerRootDirPath, hookScratchDirPath, func(hookScratchDirFdPath string) error {
40+
return createTmpFs(hookScratchDirFdPath, len(contents))
4041

41-
modifiedParamsFile, err := os.OpenFile(filepath.Join(tmpRoot, "nvct-params"), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0444)
42+
})
4243
if err != nil {
43-
return fmt.Errorf("failed to open modified params file: %w", err)
44+
return fmt.Errorf("failed to create tmpfs mount for params file: %w", err)
4445
}
45-
defer modifiedParamsFile.Close()
4646

47-
if _, err := modifiedParamsFile.Write(contents); err != nil {
48-
return fmt.Errorf("failed to write temporary params file: %w", err)
47+
modifiedParamsFilePath := filepath.Join(hookScratchDirPath, "nvct-params")
48+
if _, err := createFileInRoot(containerRootDirPath, modifiedParamsFilePath, 0444); err != nil {
49+
return fmt.Errorf("error creating modified params file: %w", err)
4950
}
5051

51-
err = utils.WithProcfd(containerRootDirPath, nvidiaDriverParamsPath, func(nvidiaDriverParamsFdPath string) error {
52-
return unix.Mount(modifiedParamsFile.Name(), nvidiaDriverParamsFdPath, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NODEV|unix.MS_PRIVATE|unix.MS_NOSYMFOLLOW, "")
52+
err = utils.WithProcfd(containerRootDirPath, modifiedParamsFilePath, func(modifiedParamsFileFdPath string) error {
53+
modifiedParamsFile, err := os.OpenFile(modifiedParamsFileFdPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0444)
54+
if err != nil {
55+
return fmt.Errorf("failed to open modified params file: %w", err)
56+
}
57+
defer modifiedParamsFile.Close()
58+
59+
if _, err := modifiedParamsFile.Write(contents); err != nil {
60+
return fmt.Errorf("failed to write temporary params file: %w", err)
61+
}
62+
63+
err = utils.WithProcfd(containerRootDirPath, nvidiaDriverParamsPath, func(nvidiaDriverParamsFdPath string) error {
64+
return unix.Mount(modifiedParamsFileFdPath, nvidiaDriverParamsFdPath, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NODEV|unix.MS_PRIVATE|unix.MS_NOSYMFOLLOW, "")
65+
})
66+
if err != nil {
67+
return fmt.Errorf("failed to mount modified params file: %w", err)
68+
}
69+
70+
return nil
5371
})
5472
if err != nil {
55-
return fmt.Errorf("failed to mount modified params file: %w", err)
73+
return err
5674
}
5775

5876
return nil
@@ -61,3 +79,32 @@ func createParamsFileInContainer(containerRootDirPath string, contents []byte) e
6179
func createTmpFs(target string, size int) error {
6280
return unix.Mount("tmpfs", target, "tmpfs", 0, fmt.Sprintf("size=%d", size))
6381
}
82+
83+
// TODO(ArangoGutierrez): This function also exists in internal/ldconfig we should move this to a separate package.
84+
func createFileInRoot(containerRootDirPath string, destinationPath string, mode os.FileMode) (string, error) {
85+
dest, err := securejoin.SecureJoin(containerRootDirPath, destinationPath)
86+
if err != nil {
87+
return "", err
88+
}
89+
// Make the parent directory.
90+
destDir, destBase := filepath.Split(dest)
91+
destDirFd, err := utils.MkdirAllInRootOpen(containerRootDirPath, destDir, 0755)
92+
if err != nil {
93+
return "", fmt.Errorf("error creating parent dir: %w", err)
94+
}
95+
defer destDirFd.Close()
96+
// Make the target file. We want to avoid opening any file that is
97+
// already there because it could be a "bad" file like an invalid
98+
// device or hung tty that might cause a DoS, so we use mknodat.
99+
// destBase does not contain any "/" components, and mknodat does
100+
// not follow trailing symlinks, so we can safely just call mknodat
101+
// here.
102+
if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|uint32(mode), 0); err != nil {
103+
// If we get EEXIST, there was already an inode there and
104+
// we can consider that a success.
105+
if !errors.Is(err, unix.EEXIST) {
106+
return "", fmt.Errorf("error creating empty file: %w", err)
107+
}
108+
}
109+
return dest, nil
110+
}

tests/e2e/nvidia-container-toolkit_test.go

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
217217
})
218218
})
219219

220-
Describe("Disabling device node creation", Ordered, func() {
220+
When("Disabling device node creation", Ordered, func() {
221221
BeforeAll(func(ctx context.Context) {
222222
_, _, err := runner.Run("docker pull ubuntu")
223223
Expect(err).ToNot(HaveOccurred())
@@ -257,4 +257,44 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
257257
Expect(libs).To(ContainElements([]string{"libcuda.so", "libcuda.so.1"}))
258258
})
259259
})
260+
261+
When("Running containers with shared mount propagation", Ordered, func() {
262+
var mountsBefore string
263+
var tmpDirPath string
264+
265+
BeforeAll(func(ctx context.Context) {
266+
_, _, err := runner.Run("docker pull ubuntu")
267+
Expect(err).ToNot(HaveOccurred())
268+
269+
tmpDirPath = GinkgoT().TempDir()
270+
_, _, err = runner.Run("mkdir -p " + tmpDirPath)
271+
Expect(err).ToNot(HaveOccurred())
272+
273+
output, _, err := runner.Run("mount | sort")
274+
Expect(err).ToNot(HaveOccurred())
275+
Expect(output).ToNot(BeEmpty())
276+
mountsBefore = output
277+
})
278+
279+
AfterEach(func() {
280+
mountsAfter, _, err := runner.Run("mount | sort")
281+
Expect(err).ToNot(HaveOccurred())
282+
Expect(mountsAfter).To(Equal(mountsBefore))
283+
})
284+
285+
It("should not leak mounts when using the nvidia-container-runtime-hook", Label("legacy"), func(ctx context.Context) {
286+
_, _, err := runner.Run("docker run --rm -i --runtime=runc -e NVIDIA_VISIBLE_DEVICES=all -e NVIDIA_DRIVER_CAPABILITIES=all --mount type=bind,source=" + tmpDirPath + ",target=/empty,bind-propagation=shared ubuntu true")
287+
Expect(err).ToNot(HaveOccurred())
288+
})
289+
290+
It("should not leak mounts when using the nvidia-container-runtime", func(ctx context.Context) {
291+
_, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all -e NVIDIA_DRIVER_CAPABILITIES=all --mount type=bind,source=" + tmpDirPath + ",target=/empty,bind-propagation=shared ubuntu true")
292+
Expect(err).ToNot(HaveOccurred())
293+
})
294+
295+
It("should not leak mounts when using CDI mode", func(ctx context.Context) {
296+
_, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all -e NVIDIA_DRIVER_CAPABILITIES=runtime.nvidia.com/gpu=all --mount type=bind,source=" + tmpDirPath + ",target=/empty,bind-propagation=shared ubuntu true")
297+
Expect(err).ToNot(HaveOccurred())
298+
})
299+
})
260300
})

0 commit comments

Comments
 (0)