@@ -34,7 +34,6 @@ type mountConfig struct {
34
34
cgroup2Path string
35
35
rootlessCgroups bool
36
36
cgroupns bool
37
- noMountFallback bool
38
37
}
39
38
40
39
// mountEntry contains mount data specific to a mount point.
@@ -83,7 +82,6 @@ func prepareRootfs(pipe *syncSocket, iConfig *initConfig, mountFds mountFds) (er
83
82
cgroup2Path : iConfig .Cgroup2Path ,
84
83
rootlessCgroups : iConfig .RootlessCgroups ,
85
84
cgroupns : config .Namespaces .Contains (configs .NEWCGROUP ),
86
- noMountFallback : config .NoMountFallback ,
87
85
}
88
86
for i , m := range config .Mounts {
89
87
entry := mountEntry {Mount : m }
@@ -409,6 +407,51 @@ func doTmpfsCopyUp(m mountEntry, rootfs, mountLabel string) (Err error) {
409
407
})
410
408
}
411
409
410
+ const (
411
+ // The atime "enum" flags (which are mutually exclusive).
412
+ mntAtimeEnumFlags = unix .MS_NOATIME | unix .MS_RELATIME | unix .MS_STRICTATIME
413
+ // All atime-related flags.
414
+ mntAtimeFlags = mntAtimeEnumFlags | unix .MS_NODIRATIME
415
+ // Flags which can be locked when inheriting mounts in a different userns.
416
+ // In the kernel, these are the mounts that are locked using MNT_LOCK_*.
417
+ mntLockFlags = unix .MS_RDONLY | unix .MS_NODEV | unix .MS_NOEXEC |
418
+ unix .MS_NOSUID | mntAtimeFlags
419
+ )
420
+
421
+ func statfsToMountFlags (st unix.Statfs_t ) int {
422
+ // From <linux/statfs.h>.
423
+ const ST_NOSYMFOLLOW = 0x2000 //nolint:revive
424
+
425
+ var flags int
426
+ for _ , f := range []struct {
427
+ st , ms int
428
+ }{
429
+ // See calculate_f_flags() in fs/statfs.c.
430
+ {unix .ST_RDONLY , unix .MS_RDONLY },
431
+ {unix .ST_NOSUID , unix .MS_NOSUID },
432
+ {unix .ST_NODEV , unix .MS_NODEV },
433
+ {unix .ST_NOEXEC , unix .MS_NOEXEC },
434
+ {unix .ST_MANDLOCK , unix .MS_MANDLOCK },
435
+ {unix .ST_SYNCHRONOUS , unix .MS_SYNCHRONOUS },
436
+ {unix .ST_NOATIME , unix .MS_NOATIME },
437
+ {unix .ST_NODIRATIME , unix .MS_NODIRATIME },
438
+ {unix .ST_RELATIME , unix .MS_RELATIME },
439
+ {ST_NOSYMFOLLOW , unix .MS_NOSYMFOLLOW },
440
+ // There is no ST_STRICTATIME -- see below.
441
+ } {
442
+ if int (st .Flags )& f .st == f .st {
443
+ flags |= f .ms
444
+ }
445
+ }
446
+ // MS_STRICTATIME is a "fake" MS_* flag. It isn't stored in mnt->mnt_flags,
447
+ // and so it doesn't show up in statfs(2). If none of the other flags in
448
+ // atime enum are present, the mount is MS_STRICTATIME.
449
+ if flags & mntAtimeEnumFlags == 0 {
450
+ flags |= unix .MS_STRICTATIME
451
+ }
452
+ return flags
453
+ }
454
+
412
455
func mountToRootfs (c * mountConfig , m mountEntry ) error {
413
456
rootfs := c .root
414
457
@@ -509,11 +552,97 @@ func mountToRootfs(c *mountConfig, m mountEntry) error {
509
552
return err
510
553
}
511
554
}
512
- // bind mount won't change mount options, we need remount to make mount options effective.
513
- // first check that we have non-default options required before attempting a remount
514
- if m .Flags &^(unix .MS_REC | unix .MS_REMOUNT | unix .MS_BIND ) != 0 {
515
- // only remount if unique mount options are set
516
- if err := remount (m , rootfs , c .noMountFallback ); err != nil {
555
+
556
+ // The initial MS_BIND won't change the mount options, we need to do a
557
+ // separate MS_BIND|MS_REMOUNT to apply the mount options. We skip
558
+ // doing this if the user has not specified any mount flags at all
559
+ // (including cleared flags) -- in which case we just keep the original
560
+ // mount flags.
561
+ //
562
+ // Note that the fact we check whether any clearing flags are set is in
563
+ // contrast to mount(8)'s current behaviour, but is what users probably
564
+ // expect. See <https://github.com/util-linux/util-linux/issues/2433>.
565
+ if m .Flags & ^ (unix .MS_BIND | unix .MS_REC | unix .MS_REMOUNT ) != 0 || m .ClearedFlags != 0 {
566
+ if err := utils .WithProcfd (rootfs , m .Destination , func (dstFD string ) error {
567
+ flags := m .Flags | unix .MS_BIND | unix .MS_REMOUNT
568
+ // The runtime-spec says we SHOULD map to the relevant mount(8)
569
+ // behaviour. However, it's not clear whether we want the
570
+ // "mount --bind -o ..." or "mount --bind -o remount,..."
571
+ // behaviour here -- both of which are somewhat broken[1].
572
+ //
573
+ // So, if the user has passed "remount" as a mount option, we
574
+ // implement the "mount --bind -o remount" behaviour, otherwise
575
+ // we implement the spiritual intent of the "mount --bind -o"
576
+ // behaviour, which should match what users expect. Maybe
577
+ // mount(8) will eventually implement this behaviour too..
578
+ //
579
+ // [1]: https://github.com/util-linux/util-linux/issues/2433
580
+
581
+ // Initially, we emulate "mount --bind -o ..." where we set
582
+ // only the requested flags (clearing any existing flags). The
583
+ // only difference from mount(8) is that we do this
584
+ // unconditionally, regardless of whether any set-me mount
585
+ // options have been requested.
586
+ //
587
+ // TODO: We are not doing any special handling of the atime
588
+ // flags here, which means that the mount will inherit the old
589
+ // atime flags if the user didn't explicitly request a
590
+ // different set of flags. This also has the mount(8) bug where
591
+ // "nodiratime,norelatime" will result in a
592
+ // "nodiratime,relatime" mount.
593
+ mountErr := mountViaFDs ("" , nil , m .Destination , dstFD , "" , uintptr (flags ), "" )
594
+ if mountErr == nil {
595
+ return nil
596
+ }
597
+
598
+ // If the mount failed, the mount may contain locked mount
599
+ // flags. In that case, we emulate "mount --bind -o
600
+ // remount,...", where we take the existing mount flags of the
601
+ // mount and apply the request flags (including clearing flags)
602
+ // on top. The main divergence we have from mount(8) here is
603
+ // that we handle atimes correctly to make sure we error out if
604
+ // we cannot fulfil the requested mount flags.
605
+
606
+ var st unix.Statfs_t
607
+ if err := unix .Statfs (m .src (), & st ); err != nil {
608
+ return & os.PathError {Op : "statfs" , Path : m .src (), Err : err }
609
+ }
610
+ srcFlags := statfsToMountFlags (st )
611
+ // If the user explicitly request one of the locked flags *not*
612
+ // be set, we need to return an error to avoid producing mounts
613
+ // that don't match the user's request.
614
+ if srcFlags & m .ClearedFlags & mntLockFlags != 0 {
615
+ return mountErr
616
+ }
617
+
618
+ // If an MS_*ATIME flag was requested, it must match the
619
+ // existing one. This handles two separate kernel bugs, and
620
+ // matches the logic of can_change_locked_flags() but without
621
+ // these bugs:
622
+ //
623
+ // * (2.6.30+) Since commit 613cbe3d4870 ("Don't set relatime
624
+ // when noatime is specified"), MS_RELATIME is ignored when
625
+ // MS_NOATIME is set. This means that us inheriting MS_NOATIME
626
+ // from a mount while requesting MS_RELATIME would *silently*
627
+ // produce an MS_NOATIME mount.
628
+ //
629
+ // * (2.6.30+) Since its introduction in commit d0adde574b84
630
+ // ("Add a strictatime mount option"), MS_STRICTATIME has
631
+ // caused any passed MS_RELATIME and MS_NOATIME flags to be
632
+ // ignored which results in us *silently* producing
633
+ // MS_STRICTATIME mounts even if the user requested MS_RELATIME
634
+ // or MS_NOATIME.
635
+ if m .Flags & mntAtimeFlags != 0 && m .Flags & mntAtimeFlags != srcFlags & mntAtimeFlags {
636
+ return mountErr
637
+ }
638
+
639
+ // Retry the mount with the existing lockable mount flags
640
+ // applied.
641
+ flags |= srcFlags & mntLockFlags
642
+ mountErr = mountViaFDs ("" , nil , m .Destination , dstFD , "" , uintptr (flags ), "" )
643
+ logrus .Debugf ("remount retry: srcFlags=0x%x flagsSet=0x%x flagsClr=0x%x: %v" , srcFlags , m .Flags , m .ClearedFlags , mountErr )
644
+ return mountErr
645
+ }); err != nil {
517
646
return err
518
647
}
519
648
}
@@ -1103,37 +1232,6 @@ func writeSystemProperty(key, value string) error {
1103
1232
return os .WriteFile (path .Join ("/proc/sys" , keyPath ), []byte (value ), 0o644 )
1104
1233
}
1105
1234
1106
- func remount (m mountEntry , rootfs string , noMountFallback bool ) error {
1107
- return utils .WithProcfd (rootfs , m .Destination , func (dstFD string ) error {
1108
- flags := uintptr (m .Flags | unix .MS_REMOUNT )
1109
- err := mountViaFDs ("" , nil , m .Destination , dstFD , m .Device , flags , "" )
1110
- if err == nil {
1111
- return nil
1112
- }
1113
- // Check if the source has flags set according to noMountFallback
1114
- src := m .src ()
1115
- var s unix.Statfs_t
1116
- if err := unix .Statfs (src , & s ); err != nil {
1117
- return & os.PathError {Op : "statfs" , Path : src , Err : err }
1118
- }
1119
- var checkflags int
1120
- if noMountFallback {
1121
- // Check for ro only
1122
- checkflags = unix .MS_RDONLY
1123
- } else {
1124
- // Check for ro, nodev, noexec, nosuid, noatime, relatime, strictatime,
1125
- // nodiratime
1126
- checkflags = unix .MS_RDONLY | unix .MS_NODEV | unix .MS_NOEXEC | unix .MS_NOSUID | unix .MS_NOATIME | unix .MS_RELATIME | unix .MS_STRICTATIME | unix .MS_NODIRATIME
1127
- }
1128
- if int (s .Flags )& checkflags == 0 {
1129
- return err
1130
- }
1131
- // ... and retry the mount with flags found above.
1132
- flags |= uintptr (int (s .Flags ) & checkflags )
1133
- return mountViaFDs ("" , nil , m .Destination , dstFD , m .Device , flags , "" )
1134
- })
1135
- }
1136
-
1137
1235
// Do the mount operation followed by additional mounts required to take care
1138
1236
// of propagation flags. This will always be scoped inside the container rootfs.
1139
1237
func mountPropagate (m mountEntry , rootfs string , mountLabel string ) error {
0 commit comments