diff --git a/Cargo.lock b/Cargo.lock index 5d1d5e220..7056a772b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -190,6 +190,16 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "caps" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190baaad529bcfbde9e1a19022c42781bdb6ff9de25721abdb8fd98c0807730b" +dependencies = [ + "libc", + "thiserror", +] + [[package]] name = "cc" version = "1.2.1" @@ -348,6 +358,7 @@ version = "0.1.0" dependencies = [ "arch", "bitflags 1.3.2", + "caps", "crossbeam-channel", "env_logger", "hvf", diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index 7e097f57e..f703da64f 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -42,3 +42,4 @@ lru = ">=0.9" [target.'cfg(target_os = "linux")'.dependencies] rutabaga_gfx = { path = "../rutabaga_gfx", features = ["x"], optional = true } +caps = "0.5.5" diff --git a/src/devices/src/virtio/fs/linux/passthrough.rs b/src/devices/src/virtio/fs/linux/passthrough.rs index fdc618c61..3cace4433 100644 --- a/src/devices/src/virtio/fs/linux/passthrough.rs +++ b/src/devices/src/virtio/fs/linux/passthrough.rs @@ -15,6 +15,7 @@ use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::{Arc, RwLock}; use std::time::Duration; +use caps::{has_cap, CapSet, Capability}; use nix::request_code_read; use vm_memory::ByteValued; @@ -78,11 +79,6 @@ macro_rules! scoped_cred { // Changes the effective uid/gid of the current thread to `val`. Changes // the thread's credentials back to root when the returned struct is dropped. fn new(val: $ty) -> io::Result> { - if val == 0 { - // Nothing to do since we are already uid 0. - return Ok(None); - } - // We want credential changes to be per-thread because otherwise // we might interfere with operations being carried out on other // threads with different uids/gids. However, posix requires that @@ -122,15 +118,6 @@ macro_rules! scoped_cred { scoped_cred!(ScopedUid, libc::uid_t, libc::SYS_setresuid); scoped_cred!(ScopedGid, libc::gid_t, libc::SYS_setresgid); -fn set_creds( - uid: libc::uid_t, - gid: libc::gid_t, -) -> io::Result<(Option, Option)> { - // We have to change the gid before we change the uid because if we change the uid first then we - // lose the capability to change the gid. However changing back can happen in any order. - ScopedGid::new(gid).and_then(|gid| Ok((ScopedUid::new(uid)?, gid))) -} - fn ebadf() -> io::Error { io::Error::from_raw_os_error(libc::EBADF) } @@ -351,6 +338,9 @@ pub struct PassthroughFs { // `cfg.writeback` is true and `init` was called with `FsOptions::WRITEBACK_CACHE`. writeback: AtomicBool, announce_submounts: AtomicBool, + my_uid: Option, + my_gid: Option, + cap_fowner: bool, cfg: Config, } @@ -385,6 +375,25 @@ impl PassthroughFs { fd }; + let my_uid = if has_cap(None, CapSet::Effective, Capability::CAP_SETUID).unwrap_or_default() + { + None + } else { + // SAFETY: This syscall is always safe to call and always succeeds. + Some(unsafe { libc::getuid() }) + }; + + let my_gid = if has_cap(None, CapSet::Effective, Capability::CAP_SETGID).unwrap_or_default() + { + None + } else { + // SAFETY: This syscall is always safe to call and always succeeds. + Some(unsafe { libc::getgid() }) + }; + + let cap_fowner = + has_cap(None, CapSet::Effective, Capability::CAP_FOWNER).unwrap_or_default(); + // Safe because we just opened this fd or it was provided by our caller. let proc_self_fd = unsafe { File::from_raw_fd(fd) }; @@ -401,6 +410,9 @@ impl PassthroughFs { writeback: AtomicBool::new(false), announce_submounts: AtomicBool::new(false), + my_uid, + my_gid, + cap_fowner, cfg, }) } @@ -669,8 +681,15 @@ impl PassthroughFs { Ok(()) } - fn do_open(&self, inode: Inode, flags: u32) -> io::Result<(Option, OpenOptions)> { + fn do_open(&self, inode: Inode, mut flags: u32) -> io::Result<(Option, OpenOptions)> { debug!("do_open: {:?}", inode); + if !self.cap_fowner { + // O_NOATIME can only be used with CAP_FOWNER or if we are the file + // owner. Not worth checking the latter, just drop it if we don't + // have the cap. This makes overlayfs mounts with virtiofs lower dirs + // work. + flags &= !(libc::O_NOATIME as u32); + } let file = RwLock::new(self.open_inode(inode, flags as i32)?); let handle = self.next_handle.fetch_add(1, Ordering::Relaxed); @@ -758,6 +777,37 @@ impl PassthroughFs { Err(io::Error::last_os_error()) } } + + fn set_creds( + &self, + uid: libc::uid_t, + gid: libc::gid_t, + ) -> io::Result<(Option, Option)> { + // Change the gid first, since once we change the uid we lose the capability to change the gid. + let scoped_gid = if gid == 0 || self.my_gid == Some(gid) { + // Always allow "root" accesses even if we don't have root powers. + // This means guest processes running as root can use /tmp (though + // the files will not be actually owned by root), which is desirable. + None + } else if self.my_gid.is_some() { + // Reject writes as any other gid if we do not have setgid + // privileges. + return Err(io::Error::from_raw_os_error(libc::EPERM)); + } else { + ScopedGid::new(gid)? + }; + + // Same logic as above, for uid. + let scoped_uid = if uid == 0 || self.my_uid == Some(uid) { + None + } else if self.my_uid.is_some() { + return Err(io::Error::from_raw_os_error(libc::EPERM)); + } else { + ScopedUid::new(uid)? + }; + + Ok((scoped_uid, scoped_gid)) + } } fn forget_one( @@ -957,7 +1007,7 @@ impl FileSystem for PassthroughFs { unimplemented!("SECURITY_CTX is not supported and should not be used by the guest"); } - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; + let (_uid, _gid) = self.set_creds(ctx.uid, ctx.gid)?; let data = self .inodes .read() @@ -1060,7 +1110,7 @@ impl FileSystem for PassthroughFs { unimplemented!("SECURITY_CTX is not supported and should not be used by the guest"); } - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; + let (_uid, _gid) = self.set_creds(ctx.uid, ctx.gid)?; let data = self .inodes .read() @@ -1167,7 +1217,7 @@ impl FileSystem for PassthroughFs { if kill_priv { // We need to change credentials during a write so that the kernel will remove setuid // or setgid bits from the file if it was written to by someone other than the owner. - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; + let (_uid, _gid) = self.set_creds(ctx.uid, ctx.gid)?; } let data = self @@ -1395,7 +1445,7 @@ impl FileSystem for PassthroughFs { unimplemented!("SECURITY_CTX is not supported and should not be used by the guest"); } - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; + let (_uid, _gid) = self.set_creds(ctx.uid, ctx.gid)?; let data = self .inodes .read() @@ -1476,7 +1526,7 @@ impl FileSystem for PassthroughFs { unimplemented!("SECURITY_CTX is not supported and should not be used by the guest"); } - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; + let (_uid, _gid) = self.set_creds(ctx.uid, ctx.gid)?; let data = self .inodes .read()