Skip to content

std: Force Instant::now() to be monotonic #56988

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 2 additions & 15 deletions src/librustc/util/profiling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use session::config::Options;

use std::fs;
use std::io::{self, StderrLock, Write};
use std::time::{Duration, Instant};
use std::time::Instant;

macro_rules! define_categories {
($($name:ident,)*) => {
Expand Down Expand Up @@ -203,20 +203,7 @@ impl SelfProfiler {
}

fn stop_timer(&mut self) -> u64 {
let elapsed = if cfg!(windows) {
// On Windows, timers don't always appear to be monotonic (see #51648)
// which can lead to panics when calculating elapsed time.
// Work around this by testing to see if the current time is less than
// our recorded time, and if it is, just returning 0.
let now = Instant::now();
if self.current_timer >= now {
Duration::new(0, 0)
} else {
self.current_timer.elapsed()
}
} else {
self.current_timer.elapsed()
};
let elapsed = self.current_timer.elapsed();

self.current_timer = Instant::now();

Expand Down
8 changes: 8 additions & 0 deletions src/libstd/sys/cloudabi/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ impl Instant {
}
}

pub fn actually_monotonic() -> bool {
true
}

pub const fn zero() -> Instant {
Instant { t: 0 }
}

pub fn sub_instant(&self, other: &Instant) -> Duration {
let diff = self.t
.checked_sub(other.t)
Expand Down
8 changes: 8 additions & 0 deletions src/libstd/sys/redox/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,14 @@ impl Instant {
Instant { t: now(syscall::CLOCK_MONOTONIC) }
}

pub const fn zero() -> Instant {
Instant { t: Timespec { t: syscall::TimeSpec { tv_sec: 0, tv_nsec: 0 } } }
}

pub fn actually_monotonic() -> bool {
false
}

pub fn sub_instant(&self, other: &Instant) -> Duration {
self.t.sub_timespec(&other.t).unwrap_or_else(|_| {
panic!("specified instant was later than self")
Expand Down
40 changes: 28 additions & 12 deletions src/libstd/sys/unix/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ struct Timespec {
}

impl Timespec {
const fn zero() -> Timespec {
Timespec {
t: libc::timespec { tv_sec: 0, tv_nsec: 0 },
}
}

fn sub_timespec(&self, other: &Timespec) -> Result<Duration, Duration> {
if self >= other {
Ok(if self.t.tv_nsec >= other.t.tv_nsec {
Expand Down Expand Up @@ -128,19 +134,22 @@ mod inner {
}

pub const UNIX_EPOCH: SystemTime = SystemTime {
t: Timespec {
t: libc::timespec {
tv_sec: 0,
tv_nsec: 0,
},
},
t: Timespec::zero(),
};

impl Instant {
pub fn now() -> Instant {
Instant { t: unsafe { libc::mach_absolute_time() } }
}

pub const fn zero() -> Instant {
Instant { t: 0 }
}

pub fn actually_monotonic() -> bool {
true
}

pub fn sub_instant(&self, other: &Instant) -> Duration {
let info = info();
let diff = self.t.checked_sub(other.t)
Expand Down Expand Up @@ -258,19 +267,26 @@ mod inner {
}

pub const UNIX_EPOCH: SystemTime = SystemTime {
t: Timespec {
t: libc::timespec {
tv_sec: 0,
tv_nsec: 0,
},
},
t: Timespec::zero(),
};

impl Instant {
pub fn now() -> Instant {
Instant { t: now(libc::CLOCK_MONOTONIC) }
}

pub const fn zero() -> Instant {
Instant {
t: Timespec::zero(),
}
}

pub fn actually_monotonic() -> bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@edelsonh Can you confirm ppc/ppc64 has a reliable monotonic clock?

(cfg!(target_os = "linux") && cfg!(target_arch = "x86_64")) ||
(cfg!(target_os = "linux") && cfg!(target_arch = "x86")) ||
false // last clause, used so `||` is always trailing above
}

pub fn sub_instant(&self, other: &Instant) -> Duration {
self.t.sub_timespec(&other.t).unwrap_or_else(|_| {
panic!("specified instant was later than self")
Expand Down
8 changes: 8 additions & 0 deletions src/libstd/sys/wasm/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ impl Instant {
Instant(TimeSysCall::perform(TimeClock::Monotonic))
}

pub const fn zero() -> Instant {
Instant(Duration::from_secs(0))
}

pub fn actually_monotonic() -> bool {
false
}

pub fn sub_instant(&self, other: &Instant) -> Duration {
self.0 - other.0
}
Expand Down
8 changes: 8 additions & 0 deletions src/libstd/sys/windows/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ impl Instant {
t
}

pub fn actually_monotonic() -> bool {
false
}

pub const fn zero() -> Instant {
Instant { t: 0 }
}

pub fn sub_instant(&self, other: &Instant) -> Duration {
// Values which are +- 1 need to be considered as basically the same
// units in time due to various measurement oddities, according to
Expand Down
42 changes: 41 additions & 1 deletion src/libstd/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@

#![stable(feature = "time", since = "1.3.0")]

use cmp;
use error::Error;
use fmt;
use ops::{Add, Sub, AddAssign, SubAssign};
use sys::time;
use sys_common::FromInner;
use sys_common::mutex::Mutex;

#[stable(feature = "time", since = "1.3.0")]
pub use core::time::Duration;
Expand Down Expand Up @@ -150,7 +152,45 @@ impl Instant {
/// ```
#[stable(feature = "time2", since = "1.8.0")]
pub fn now() -> Instant {
Instant(time::Instant::now())
let os_now = time::Instant::now();

// And here we come upon a sad state of affairs. The whole point of
// `Instant` is that it's monotonically increasing. We've found in the
// wild, however, that it's not actually monotonically increasing for
// one reason or another. These appear to be OS and hardware level bugs,
// and there's not really a whole lot we can do about them. Here's a
// taste of what we've found:
//
// * #48514 - OpenBSD, x86_64
// * #49281 - linux arm64 and s390x
// * #51648 - windows, x86
// * #56560 - windows, x86_64, AWS
// * #56612 - windows, x86, vm (?)
// * #56940 - linux, arm64
// * https://bugzilla.mozilla.org/show_bug.cgi?id=1487778 - a similar
// Firefox bug
//
// It simply seems that this it just happens so that a lot in the wild
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Delete 'this'?

// we're seeing panics across various platforms where consecutive calls
// to `Instant::now`, such as via the `elapsed` function, are panicking
// as they're going backwards. Placed here is a last-ditch effort to try
// to fix things up. We keep a global "latest now" instance which is
// returned instead of what the OS says if the OS goes backwards.
//
// To hopefully mitigate the impact of this though a few platforms are
// whitelisted as "these at least haven't gone backwards yet".
if time::Instant::actually_monotonic() {
return Instant(os_now)
}

static LOCK: Mutex = Mutex::new();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have dead-code elimination in MIR debug builds ? Otherwise LLVM-IR for this code will always be emitted independently of the result of actually_monotonic, and whether that code will end up generating machine code will depend on the optimization level.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't, now, but actually_monotonic is a function that'll be trivially inlined so LLVM will optimize this away

static mut LAST_NOW: time::Instant = time::Instant::zero();
unsafe {
let _lock = LOCK.lock();
let now = cmp::max(LAST_NOW, os_now);
LAST_NOW = now;
Instant(now)
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A mutex seems a bit heavy-handed. Many uses of rdtsc (where available) are for minimal-overhead, thread-local timing of functions.
Wouldn't rdtsc + some atomic ops that prevent things from going backwards be much lighter than potential thread suspension?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that a full-on mutex is quite a heavy hammer for this use case! I wasn't sure though how to best to minimize the cost here.

The Windows documentation at least "strongly discourages" rdtsc for handling VM migration issues as well as some supposed hardware. If that's the case I think we probably want to avoid that?

I figured it'd probably be best to start from a conservative position and we can always come in later as necessary and try to use atomics and/or different tricks.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't mean to use rdtsc directly. We can still defer to QueryPerformanceCounter/clock_gettime, i.e. the current Instant implementation which in the end boils down to rdtsc on many x86 systems.
Just tack on a sanity check/correction with atomics instead of a mutex.

I'm mostly concerned that thread-contention might unexpectedly hit people if they litter their code with instants because it used to be fast. I have no concrete examples, just experience with gratuitous use of timing functions that were fast on linux suddenly making an application slow on windows because it decided to not use rdtsc.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I'm all for adding a more lightweight implementation, do you have one in mind? Instant has a varying size across platforms, which makes it difficult to select an appropriate atomic and/or more lightweight method

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had sizeof checks, some type punning and AtomicU128/U64 in mind. Beyond that it would be your standard read, check, CAS. similar to what you're now doing in the lock's critical section, except in a loop.

The mutex would still be needed as fallback if the checks don't work out.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok, that's what I thought, and yeah my worry about that is that it wouldn't solve the original problem of monotonic clocks going backwards, so I'm afraid we'd still end up a the solution proposed in this PR.

We, as far as I know, don't have a great handle on how big the errors are.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have thought some more about optimization potential

  1. we can use relaxed atomics everywhere. Justification: One thread cannot observe another thread's Instants without some external synchronization happening, e.g. other ordered loads and stores. So until those happen only intra-thread ordering is relevant, Relaxed is sufficient for that.
  2. in the good case we only need to do a test and return from the perspective of the main sequence of instructions. There's no dependency on the writes to global state happening, so this should be friendly to instruction parallelism.
  3. we can limit the XCHG loop by bailing out early if it fails because another thread updated it to a larger value than we are trying. it doesn't prevent the cache-line from bouncing around but at least can allow multiple threads to make progress simultaneously.

It could approximately look like this:

static mut LAST_NOW: AtomicU128 = 0.into();
let last_now = LAST_NOW.load(Relaxed);  // insert type punning here
let os_now = time::Instant::now();
if likely(os_now > last_now) {
  loop {
    match LAST_NOW.compare_exchange_weak(last_now, os_now, Relaxed, Relaxed) {
      Ok(_) => break,
      Err(x) if x >= os_now => break, // some other thread is ahead of us, no need to update
      _ => {}
    }
  return os_now;
}
return last_now

It's a bit smaller hammer but still not the rubber kind. To soften it further we either need a I (don't) care about broken systems switch somewhere or use better platform detection.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@the8472 yes that's all possible but 128-bit atomics are only available on a few platforms, so we can't use them generally.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok, that's what I thought, and yeah my worry about that is that it wouldn't solve the original problem of monotonic clocks going backwards, so I'm afraid we'd still end up a the solution proposed in this PR.

Wouldn't it still make sense to try the cheaper thread-local version first and switch to a full lock if it does turn out to be insufficient? If we directly go to the lock, then we will not be able to determine whether a cheaper thread-local variant would also have been sufficient. @the8472's theory that this arises only due to migration between cores at least sounds very plausible, so I think it would be worthwhile to try this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nikic I think it's incorrect to avoid the full lock though? If the time is less than a thread-local version than you definitely have to acquire the lock, but even if it's greater than a thread local version you need to check the lock for the global one as well. Right now the bug primarily happens on one thread, but the documented guarantees of this API are that it works across all threads

}

/// Returns the amount of time elapsed from another instant to this one.
Expand Down