Skip to content

Commit f496e62

Browse files
alexhudspithmihalicyn
authored andcommitted
proc: Fix swap handling for cgroups v2 (can_use_swap)
On cgroups v2, there are no swap current/max files at the cgroup root, so can_use_swap must look lower in the hierarchy to determine if swap accounting is enabled. To also account for memory accounting being turned off at some level, walk the hierarchy upwards from lxcfs' own cgroup. Signed-off-by: Alex Hudspith <[email protected]> [ added check cgroup pointer is not NULL in lxcfs_init() ] Signed-off-by: Alexander Mikhalitsyn <[email protected]>
1 parent b50a9a3 commit f496e62

File tree

4 files changed

+21
-27
lines changed

4 files changed

+21
-27
lines changed

src/bindings.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,7 @@ static void __attribute__((constructor)) lxcfs_init(void)
866866
{
867867
__do_close int init_ns = -EBADF, root_fd = -EBADF,
868868
pidfd = -EBADF;
869+
__do_free char *cgroup = NULL;
869870
int i = 0;
870871
pid_t pid;
871872
struct hierarchy *hierarchy;
@@ -920,7 +921,8 @@ static void __attribute__((constructor)) lxcfs_init(void)
920921
lxcfs_info("Kernel supports pidfds");
921922
}
922923

923-
can_use_swap = cgroup_ops->can_use_swap(cgroup_ops);
924+
cgroup = get_pid_cgroup(pid, "memory");
925+
can_use_swap = cgroup && cgroup_ops->can_use_swap(cgroup_ops, cgroup);
924926
if (can_use_swap)
925927
lxcfs_info("Kernel supports swap accounting");
926928
else

src/cgroups/cgfsng.c

+12-21
Original file line numberDiff line numberDiff line change
@@ -631,34 +631,25 @@ static int cgfsng_get_memory_slabinfo_fd(struct cgroup_ops *ops, const char *cgr
631631
return openat(h->fd, path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW);
632632
}
633633

634-
static bool cgfsng_can_use_swap(struct cgroup_ops *ops)
634+
static bool cgfsng_can_use_swap(struct cgroup_ops *ops, const char *cgroup)
635635
{
636-
bool has_swap = false;
636+
__do_free char *cgroup_rel = NULL, *junk_value = NULL;
637+
const char *file;
637638
struct hierarchy *h;
638639

639640
h = ops->get_hierarchy(ops, "memory");
640641
if (!h)
641642
return false;
642643

643-
if (is_unified_hierarchy(h)) {
644-
if (faccessat(h->fd, "memory.swap.max", F_OK, 0))
645-
return false;
646-
647-
if (faccessat(h->fd, "memory.swap.current", F_OK, 0))
648-
return false;
649-
650-
has_swap = true;
651-
} else {
652-
if (faccessat(h->fd, "memory.memsw.limit_in_bytes", F_OK, 0))
653-
return false;
654-
655-
if (faccessat(h->fd, "memory.memsw.usage_in_bytes", F_OK, 0))
656-
return false;
657-
658-
has_swap = true;
659-
}
660-
661-
return has_swap;
644+
cgroup_rel = must_make_path_relative(cgroup, NULL);
645+
file = is_unified_hierarchy(h) ? "memory.swap.current" : "memory.memsw.usage_in_bytes";
646+
/* For v2, we need to look at the lower levels of the hierarchy because
647+
* no 'memory.swap.current' file exists at the root. We must search
648+
* upwards in the hierarchy in case memory accounting is disabled via
649+
* cgroup.subtree_control for the given cgroup itself.
650+
*/
651+
int ret = cgroup_walkup_to_root(ops->cgroup2_root_fd, h->fd, cgroup_rel, file, &junk_value);
652+
return ret == 0;
662653
}
663654

664655
static int cgfsng_get_memory_stats(struct cgroup_ops *ops, const char *cgroup,

src/cgroups/cgroup.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ struct cgroup_ops {
148148
char **value);
149149
int (*get_memory_slabinfo_fd)(struct cgroup_ops *ops,
150150
const char *cgroup);
151-
bool (*can_use_swap)(struct cgroup_ops *ops);
151+
bool (*can_use_swap)(struct cgroup_ops *ops, const char *cgroup);
152152

153153
/* cpuset */
154154
int (*get_cpuset_cpus)(struct cgroup_ops *ops, const char *cgroup,

src/proc_fuse.c

+5-4
Original file line numberDiff line numberDiff line change
@@ -459,11 +459,13 @@ static int proc_swaps_read(char *buf, size_t size, off_t offset,
459459
}
460460

461461
if (wants_swap) {
462-
/* The total amount of swap is always reported to be the
462+
/* For cgroups v1, the total amount of swap is always reported to be the
463463
lesser of the RAM+SWAP limit or the SWAP device size.
464464
This is because the kernel can swap as much as it
465465
wants and not only up to swtotal. */
466-
swtotal = memlimit / 1024 + swtotal;
466+
if (!liblxcfs_memory_is_cgroupv2())
467+
swtotal = memlimit / 1024 + swtotal;
468+
467469
if (hostswtotal < swtotal) {
468470
swtotal = hostswtotal;
469471
}
@@ -1359,11 +1361,10 @@ static int proc_meminfo_read(char *buf, size_t size, off_t offset,
13591361

13601362
sscanf(line + STRLITERALLEN("SwapTotal:"), "%" PRIu64, &hostswtotal);
13611363

1362-
/* The total amount of swap is always reported to be the
1364+
/* In cgroups v1, the total amount of swap is always reported to be the
13631365
lesser of the RAM+SWAP limit or the SWAP device size.
13641366
This is because the kernel can swap as much as it
13651367
wants and not only up to swtotal. */
1366-
13671368
if (!liblxcfs_memory_is_cgroupv2())
13681369
swtotal += memlimit;
13691370

0 commit comments

Comments
 (0)