Skip to content

Commit de29ef3

Browse files
sercherjerboaa
authored and
Dmitry Chuyko
committed
8343191: Cgroup v1 subsystem fails to set subsystem path
Co-authored-by: Severin Gehwolf <[email protected]> Reviewed-by: sgehwolf, mbaesken
1 parent 75f028b commit de29ef3

File tree

9 files changed

+488
-42
lines changed

9 files changed

+488
-42
lines changed

src/hotspot/os/linux/cgroupUtil_linux.cpp

+23-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, Red Hat, Inc.
2+
* Copyright (c) 2024, 2025, Red Hat, Inc.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -49,20 +49,27 @@ int CgroupUtil::processor_count(CgroupCpuController* cpu_ctrl, int host_cpus) {
4949
}
5050

5151
void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
52+
assert(mem->cgroup_path() != nullptr, "invariant");
53+
if (strstr(mem->cgroup_path(), "../") != nullptr) {
54+
log_warning(os, container)("Cgroup memory controller path at '%s' seems to have moved to '%s', detected limits won't be accurate",
55+
mem->mount_point(), mem->cgroup_path());
56+
mem->set_subsystem_path("/");
57+
return;
58+
}
5259
if (!mem->needs_hierarchy_adjustment()) {
5360
// nothing to do
5461
return;
5562
}
5663
log_trace(os, container)("Adjusting controller path for memory: %s", mem->subsystem_path());
57-
assert(mem->cgroup_path() != nullptr, "invariant");
5864
char* orig = os::strdup(mem->cgroup_path());
5965
char* cg_path = os::strdup(orig);
6066
char* last_slash;
6167
assert(cg_path[0] == '/', "cgroup path must start with '/'");
6268
julong phys_mem = os::Linux::physical_memory();
6369
char* limit_cg_path = nullptr;
6470
jlong limit = mem->read_memory_limit_in_bytes(phys_mem);
65-
jlong lowest_limit = phys_mem;
71+
jlong lowest_limit = limit < 0 ? phys_mem : limit;
72+
julong orig_limit = ((julong)lowest_limit) != phys_mem ? lowest_limit : phys_mem;
6673
while ((last_slash = strrchr(cg_path, '/')) != cg_path) {
6774
*last_slash = '\0'; // strip path
6875
// update to shortened path and try again
@@ -83,7 +90,7 @@ void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
8390
limit_cg_path = os::strdup("/");
8491
}
8592
assert(lowest_limit >= 0, "limit must be positive");
86-
if ((julong)lowest_limit != phys_mem) {
93+
if ((julong)lowest_limit != orig_limit) {
8794
// we've found a lower limit anywhere in the hierarchy,
8895
// set the path to the limit path
8996
assert(limit_cg_path != nullptr, "limit path must be set");
@@ -93,6 +100,7 @@ void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
93100
mem->subsystem_path(),
94101
lowest_limit);
95102
} else {
103+
log_trace(os, container)("Lowest limit was: " JLONG_FORMAT, lowest_limit);
96104
log_trace(os, container)("No lower limit found for memory in hierarchy %s, "
97105
"adjusting to original path %s",
98106
mem->mount_point(), orig);
@@ -104,19 +112,26 @@ void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
104112
}
105113

106114
void CgroupUtil::adjust_controller(CgroupCpuController* cpu) {
115+
assert(cpu->cgroup_path() != nullptr, "invariant");
116+
if (strstr(cpu->cgroup_path(), "../") != nullptr) {
117+
log_warning(os, container)("Cgroup cpu controller path at '%s' seems to have moved to '%s', detected limits won't be accurate",
118+
cpu->mount_point(), cpu->cgroup_path());
119+
cpu->set_subsystem_path("/");
120+
return;
121+
}
107122
if (!cpu->needs_hierarchy_adjustment()) {
108123
// nothing to do
109124
return;
110125
}
111126
log_trace(os, container)("Adjusting controller path for cpu: %s", cpu->subsystem_path());
112-
assert(cpu->cgroup_path() != nullptr, "invariant");
113127
char* orig = os::strdup(cpu->cgroup_path());
114128
char* cg_path = os::strdup(orig);
115129
char* last_slash;
116130
assert(cg_path[0] == '/', "cgroup path must start with '/'");
117131
int host_cpus = os::Linux::active_processor_count();
118132
int cpus = CgroupUtil::processor_count(cpu, host_cpus);
119-
int lowest_limit = host_cpus;
133+
int lowest_limit = cpus < host_cpus ? cpus: host_cpus;
134+
int orig_limit = lowest_limit != host_cpus ? lowest_limit : host_cpus;
120135
char* limit_cg_path = nullptr;
121136
while ((last_slash = strrchr(cg_path, '/')) != cg_path) {
122137
*last_slash = '\0'; // strip path
@@ -138,7 +153,7 @@ void CgroupUtil::adjust_controller(CgroupCpuController* cpu) {
138153
limit_cg_path = os::strdup(cg_path);
139154
}
140155
assert(lowest_limit >= 0, "limit must be positive");
141-
if (lowest_limit != host_cpus) {
156+
if (lowest_limit != orig_limit) {
142157
// we've found a lower limit anywhere in the hierarchy,
143158
// set the path to the limit path
144159
assert(limit_cg_path != nullptr, "limit path must be set");
@@ -148,6 +163,7 @@ void CgroupUtil::adjust_controller(CgroupCpuController* cpu) {
148163
cpu->subsystem_path(),
149164
lowest_limit);
150165
} else {
166+
log_trace(os, container)("Lowest limit was: %d", lowest_limit);
151167
log_trace(os, container)("No lower limit found for cpu in hierarchy %s, "
152168
"adjusting to original path %s",
153169
cpu->mount_point(), orig);

src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp

+63-14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -37,6 +37,47 @@
3737
/*
3838
* Set directory to subsystem specific files based
3939
* on the contents of the mountinfo and cgroup files.
40+
*
41+
* The method determines whether it runs in
42+
* - host mode
43+
* - container mode
44+
*
45+
* In the host mode, _root is equal to "/" and
46+
* the subsystem path is equal to the _mount_point path
47+
* joined with cgroup_path.
48+
*
49+
* In the container mode, it can be two possibilities:
50+
* - private namespace (cgroupns=private)
51+
* - host namespace (cgroupns=host, default mode in cgroup V1 hosts)
52+
*
53+
* Private namespace is equivalent to the host mode, i.e.
54+
* the subsystem path is set by concatenating
55+
* _mount_point and cgroup_path.
56+
*
57+
* In the host namespace, _root is equal to host's cgroup path
58+
* of the control group to which the containerized process
59+
* belongs to at the moment of creation. The mountinfo and
60+
* cgroup files are mirrored from the host, while the subsystem
61+
* specific files are mapped directly at _mount_point, i.e.
62+
* at /sys/fs/cgroup/<controller>/, the subsystem path is
63+
* then set equal to _mount_point.
64+
*
65+
* A special case of the subsystem path is when a cgroup path
66+
* includes a subgroup, when a containerized process was associated
67+
* with an existing cgroup, that is different from cgroup
68+
* in which the process has been created.
69+
* Here, the _root is equal to the host's initial cgroup path,
70+
* cgroup_path will be equal to host's new cgroup path.
71+
* As host cgroup hierarchies are not accessible in the container,
72+
* it needs to be determined which part of cgroup path
73+
* is accessible inside container, i.e. mapped under
74+
* /sys/fs/cgroup/<controller>/<subgroup>.
75+
* In Docker default setup, host's cgroup path can be
76+
* of the form: /docker/<CONTAINER_ID>/<subgroup>,
77+
* from which only <subgroup> is mapped.
78+
* The method trims cgroup path from left, until the subgroup
79+
* component is found. The subsystem path will be set to
80+
* the _mount_point joined with the subgroup path.
4081
*/
4182
void CgroupV1Controller::set_subsystem_path(const char* cgroup_path) {
4283
if (_cgroup_path != nullptr) {
@@ -49,28 +90,36 @@ void CgroupV1Controller::set_subsystem_path(const char* cgroup_path) {
4990
_cgroup_path = os::strdup(cgroup_path);
5091
stringStream ss;
5192
if (_root != nullptr && cgroup_path != nullptr) {
93+
ss.print_raw(_mount_point);
5294
if (strcmp(_root, "/") == 0) {
53-
ss.print_raw(_mount_point);
95+
// host processes and containers with cgroupns=private
5496
if (strcmp(cgroup_path,"/") != 0) {
5597
ss.print_raw(cgroup_path);
5698
}
57-
_path = os::strdup(ss.base());
5899
} else {
59-
if (strcmp(_root, cgroup_path) == 0) {
60-
ss.print_raw(_mount_point);
61-
_path = os::strdup(ss.base());
62-
} else {
63-
char *p = strstr((char*)cgroup_path, _root);
64-
if (p != nullptr && p == _root) {
65-
if (strlen(cgroup_path) > strlen(_root)) {
66-
ss.print_raw(_mount_point);
67-
const char* cg_path_sub = cgroup_path + strlen(_root);
68-
ss.print_raw(cg_path_sub);
69-
_path = os::strdup(ss.base());
100+
// containers with cgroupns=host, default setting is _root==cgroup_path
101+
if (strcmp(_root, cgroup_path) != 0) {
102+
if (*cgroup_path != '\0' && strcmp(cgroup_path, "/") != 0) {
103+
// When moved to a subgroup, between subgroups, the path suffix will change.
104+
const char *suffix = cgroup_path;
105+
while (suffix != nullptr) {
106+
stringStream pp;
107+
pp.print_raw(_mount_point);
108+
pp.print_raw(suffix);
109+
if (os::file_exists(pp.base())) {
110+
ss.print_raw(suffix);
111+
if (suffix != cgroup_path) {
112+
log_trace(os, container)("set_subsystem_path: cgroup v1 path reduced to: %s.", suffix);
113+
}
114+
break;
115+
}
116+
log_trace(os, container)("set_subsystem_path: skipped non-existent directory: %s.", suffix);
117+
suffix = strchr(suffix + 1, '/');
70118
}
71119
}
72120
}
73121
}
122+
_path = os::strdup(ss.base());
74123
}
75124
}
76125

src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2020, 2022, Red Hat Inc.
2+
* Copyright (c) 2020, 2025, Red Hat Inc.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -292,6 +292,10 @@ jlong memory_swap_limit_value(CgroupV2Controller* ctrl) {
292292
}
293293

294294
void CgroupV2Controller::set_subsystem_path(const char* cgroup_path) {
295+
if (_cgroup_path != nullptr) {
296+
os::free(_cgroup_path);
297+
}
298+
_cgroup_path = os::strdup(cgroup_path);
295299
if (_path != nullptr) {
296300
os::free(_path);
297301
}

src/java.base/linux/classes/jdk/internal/platform/cgroupv1/CgroupV1SubsystemController.java

+27-15
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,9 @@
2525

2626
package jdk.internal.platform.cgroupv1;
2727

28+
import java.lang.System.Logger.Level;
29+
import java.nio.file.Path;
30+
import java.nio.file.Files;
2831
import jdk.internal.platform.CgroupSubsystem;
2932
import jdk.internal.platform.CgroupSubsystemController;
3033

@@ -44,27 +47,36 @@ public CgroupV1SubsystemController(String root, String mountPoint) {
4447

4548
public void setPath(String cgroupPath) {
4649
if (root != null && cgroupPath != null) {
50+
String path = mountPoint;
4751
if (root.equals("/")) {
52+
// host processes and containers with cgroupns=private
4853
if (!cgroupPath.equals("/")) {
49-
path = mountPoint + cgroupPath;
54+
path += cgroupPath;
5055
}
51-
else {
52-
path = mountPoint;
53-
}
54-
}
55-
else {
56-
if (root.equals(cgroupPath)) {
57-
path = mountPoint;
58-
}
59-
else {
60-
if (cgroupPath.startsWith(root)) {
61-
if (cgroupPath.length() > root.length()) {
62-
String cgroupSubstr = cgroupPath.substring(root.length());
63-
path = mountPoint + cgroupSubstr;
56+
} else {
57+
// containers with cgroupns=host, default setting is _root==cgroup_path
58+
if (!cgroupPath.equals(root)) {
59+
if (!cgroupPath.equals("") && !cgroupPath.equals("/")) {
60+
// When moved to a subgroup, between subgroups, the path suffix will change.
61+
Path cgp = Path.of(cgroupPath);
62+
int nameCount = cgp.getNameCount();
63+
for (int i=0; i < nameCount; i++) {
64+
Path dir = Path.of(mountPoint, cgp.toString());
65+
if (Files.isDirectory(dir)) {
66+
path = dir.toString();
67+
if (i > 0) {
68+
System.getLogger("jdk.internal.platform").log(Level.DEBUG, String.format(
69+
"Cgroup v1 path reduced to: %s.", cgp));
70+
}
71+
break;
72+
}
73+
int currentNameCount = cgp.getNameCount();
74+
cgp = (currentNameCount > 1) ? cgp.subpath(1, currentNameCount) : Path.of("");
6475
}
6576
}
6677
}
6778
}
79+
this.path = path;
6880
}
6981
}
7082

test/hotspot/gtest/runtime/test_cgroupSubsystem_linux.cpp

+76-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
#include "runtime/os.hpp"
2727
#include "cgroupSubsystem_linux.hpp"
28+
#include "cgroupUtil_linux.hpp"
2829
#include "cgroupV1Subsystem_linux.hpp"
2930
#include "cgroupV2Subsystem_linux.hpp"
3031
#include "unittest.hpp"
@@ -432,9 +433,16 @@ TEST(cgroupTest, set_cgroupv1_subsystem_path) {
432433
"/user.slice/user-1000.slice/[email protected]", // cgroup_path
433434
"/sys/fs/cgroup/mem" // expected_path
434435
};
435-
int length = 2;
436+
TestCase container_moving_cgroup = {
437+
"/sys/fs/cgroup/cpu,cpuacct", // mount_path
438+
"/system.slice/garden.service/garden/good/2f57368b-0eda-4e52-64d8-af5c", // root_path
439+
"/system.slice/garden.service/garden/bad/2f57368b-0eda-4e52-64d8-af5c", // cgroup_path
440+
"/sys/fs/cgroup/cpu,cpuacct" // expected_path
441+
};
442+
int length = 3;
436443
TestCase* testCases[] = { &host,
437-
&container_engine };
444+
&container_engine,
445+
&container_moving_cgroup };
438446
for (int i = 0; i < length; i++) {
439447
CgroupV1Controller* ctrl = new CgroupV1Controller( (char*)testCases[i]->root_path,
440448
(char*)testCases[i]->mount_path,
@@ -444,6 +452,72 @@ TEST(cgroupTest, set_cgroupv1_subsystem_path) {
444452
}
445453
}
446454

455+
TEST(cgroupTest, set_cgroupv1_subsystem_path_adjusted) {
456+
TestCase memory = {
457+
"/sys/fs/cgroup/memory", // mount_path
458+
"/", // root_path
459+
"../test1", // cgroup_path
460+
"/sys/fs/cgroup/memory" // expected_path
461+
};
462+
TestCase cpu = {
463+
"/sys/fs/cgroup/cpu", // mount_path
464+
"/", // root_path
465+
"../../test2", // cgroup_path
466+
"/sys/fs/cgroup/cpu" // expected_path
467+
};
468+
CgroupCpuController* ccc = new CgroupV1CpuController(CgroupV1Controller((char*)cpu.root_path,
469+
(char*)cpu.mount_path,
470+
true /* read-only mount */));
471+
ccc->set_subsystem_path((char*)cpu.cgroup_path);
472+
EXPECT_TRUE(ccc->needs_hierarchy_adjustment());
473+
474+
CgroupUtil::adjust_controller(ccc);
475+
ASSERT_STREQ(cpu.expected_path, ccc->subsystem_path());
476+
EXPECT_FALSE(ccc->needs_hierarchy_adjustment());
477+
478+
CgroupMemoryController* cmc = new CgroupV1MemoryController(CgroupV1Controller((char*)memory.root_path,
479+
(char*)memory.mount_path,
480+
true /* read-only mount */));
481+
cmc->set_subsystem_path((char*)memory.cgroup_path);
482+
EXPECT_TRUE(cmc->needs_hierarchy_adjustment());
483+
484+
CgroupUtil::adjust_controller(cmc);
485+
ASSERT_STREQ(memory.expected_path, cmc->subsystem_path());
486+
EXPECT_FALSE(cmc->needs_hierarchy_adjustment());
487+
}
488+
489+
TEST(cgroupTest, set_cgroupv2_subsystem_path_adjusted) {
490+
TestCase memory = {
491+
"/sys/fs/cgroup", // mount_path
492+
"/", // root_path
493+
"../test1", // cgroup_path
494+
"/sys/fs/cgroup" // expected_path
495+
};
496+
TestCase cpu = {
497+
"/sys/fs/cgroup", // mount_path
498+
"/", // root_path
499+
"../../test2", // cgroup_path
500+
"/sys/fs/cgroup" // expected_path
501+
};
502+
CgroupCpuController* ccc = new CgroupV2CpuController(CgroupV2Controller((char*)cpu.mount_path,
503+
(char*)cpu.cgroup_path,
504+
true /* read-only mount */));
505+
EXPECT_TRUE(ccc->needs_hierarchy_adjustment());
506+
507+
CgroupUtil::adjust_controller(ccc);
508+
ASSERT_STREQ(cpu.expected_path, ccc->subsystem_path());
509+
EXPECT_FALSE(ccc->needs_hierarchy_adjustment());
510+
511+
CgroupMemoryController* cmc = new CgroupV2MemoryController(CgroupV2Controller((char*)memory.mount_path,
512+
(char*)memory.cgroup_path,
513+
true /* read-only mount */));
514+
EXPECT_TRUE(cmc->needs_hierarchy_adjustment());
515+
516+
CgroupUtil::adjust_controller(cmc);
517+
ASSERT_STREQ(memory.expected_path, cmc->subsystem_path());
518+
EXPECT_FALSE(cmc->needs_hierarchy_adjustment());
519+
}
520+
447521
TEST(cgroupTest, set_cgroupv2_subsystem_path) {
448522
TestCase at_mount_root = {
449523
"/sys/fs/cgroup", // mount_path

0 commit comments

Comments
 (0)