Skip to content

Fix some videos having offsetted (incorrect) timestamps #8029

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Nov 8, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ table VideoFrameReference (
///
/// Note that this uses the closest video frame instead of the latest at this timestamp
/// in order to be more forgiving of rounding errors for inprecise timestamp types.
///
/// Timestamps are relative to the start of the video, i.e. a timestamp of 0 always corresponds to the first frame.
/// This is oftentimes equivalent to presentation timestamps (known as PTS), but in the presence of B-frames
/// (bidirectionally predicted frames) there may be an offset on the first presentation timestamp in the video.
timestamp: rerun.components.VideoTimestamp ("attr.rerun.component_required", required, order: 1000);

// --- Optional ---
Expand Down
4 changes: 4 additions & 0 deletions crates/store/re_types/src/archetypes/video_frame_reference.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/store/re_video/src/decode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ impl Default for FrameInfo {

impl FrameInfo {
/// Presentation timestamp range in which this frame is valid.
pub fn time_range(&self) -> std::ops::Range<Time> {
pub fn presentation_time_range(&self) -> std::ops::Range<Time> {
self.presentation_timestamp..self.presentation_timestamp + self.duration
}
}
Expand Down
57 changes: 55 additions & 2 deletions crates/store/re_video/src/demux/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,57 @@
/// and should be presented in composition-timestamp order.
pub samples: Vec<Sample>,

/// Meta information about the samples.
pub sample_statistics: SampleStatistics,

/// All the tracks in the mp4; not just the video track.
///
/// Can be nice to show in a UI.
pub mp4_tracks: BTreeMap<TrackId, Option<TrackKind>>,
}

/// Meta informationa about the video samples.
#[derive(Clone, Debug)]
pub struct SampleStatistics {
/// The smallest presentation timestamp observed in this video.
///
/// This is typically 0, but in the presence of B-frames, it may be non-zero.
/// In fact, many formats don't require this to be zero, but video players typically
/// normalize the shown time to start at zero.
/// Note that timestamps in the [`Sample`]s are *not* automatically adjusted with this value.
// This is roughly equivalent to FFmpeg's internal `min_corrected_pts`
// https://github.com/FFmpeg/FFmpeg/blob/4047b887fc44b110bccb1da09bcb79d6e454b88b/libavformat/isom.h#L202
// (unlike us, this handles a bunch more edge cases but it fullfills the same role)

Check warning on line 83 in crates/store/re_video/src/demux/mod.rs

View workflow job for this annotation

GitHub Actions / Checks / Spell Check

"fullfills" should be "fulfills".
// To learn more about this I recommend reading the patch that introduced this in FFmpeg:
// https://patchwork.ffmpeg.org/project/ffmpeg/patch/[email protected]/#12592
pub minimum_presentation_timestamp: Time,

/// Whether all decode timestamps are equal to presentation timestamps.
///
/// If true, the video typically has no B-frames as those require frame reordering.
pub dts_always_equal_pts: bool,
}

impl SampleStatistics {
pub fn new(samples: &[Sample]) -> Self {
re_tracing::profile_function!();

let minimum_presentation_timestamp = samples
.iter()
.map(|s| s.presentation_timestamp)
.min()
.unwrap_or_default();
let dts_always_equal_pts = samples
.iter()
.all(|s| s.decode_timestamp == s.presentation_timestamp);

Self {
minimum_presentation_timestamp,
dts_always_equal_pts,
}
}
}

impl VideoData {
/// Loads a video from the given data.
///
Expand Down Expand Up @@ -229,17 +274,25 @@
}
}

/// Determines the presentation timestamps of all frames inside a video, returning raw time values.
/// Determines the video timestamps of all frames inside a video, returning raw time values.
///
/// Returned timestamps are in nanoseconds since start and are guaranteed to be monotonically increasing.
/// These are *not* necessarily the same as the presentation timestamps, as the returned timestamps are
/// normalized respect to the start of the video, see [`Self::minimum_presentation_timestamp`].
pub fn frame_timestamps_ns(&self) -> impl Iterator<Item = i64> + '_ {
re_tracing::profile_function!();

// Segments are guaranteed to be sorted among each other, but within a segment,
// presentation timestamps may not be sorted since this is sorted by decode timestamps.
self.gops.iter().flat_map(|seg| {
self.samples[seg.range()]
.iter()
.map(|sample| sample.presentation_timestamp.into_nanos(self.timescale))
.map(|sample| sample.presentation_timestamp)
.sorted()
.map(|pts| {
(pts - self.sample_statistics.minimum_presentation_timestamp)
.into_nanos(self.timescale)
})
})
}
}
Expand Down
57 changes: 32 additions & 25 deletions crates/store/re_video/src/demux/mp4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use super::{Config, GroupOfPictures, Sample, VideoData, VideoLoadError};

use crate::{Time, Timescale};
use crate::{demux::SampleStatistics, Time, Timescale};

impl VideoData {
pub fn load_mp4(bytes: &[u8]) -> Result<Self, VideoLoadError> {
Expand Down Expand Up @@ -42,32 +42,36 @@ impl VideoData {
let mut gops = Vec::<GroupOfPictures>::new();
let mut gop_sample_start_index = 0;

for sample in &track.samples {
if sample.is_sync && !samples.is_empty() {
let start = samples[gop_sample_start_index].decode_timestamp;
let sample_range = gop_sample_start_index as u32..samples.len() as u32;
gops.push(GroupOfPictures {
start,
sample_range,
{
re_tracing::profile_scope!("copy samples & build gops");

for sample in &track.samples {
if sample.is_sync && !samples.is_empty() {
let start = samples[gop_sample_start_index].decode_timestamp;
let sample_range = gop_sample_start_index as u32..samples.len() as u32;
gops.push(GroupOfPictures {
start,
sample_range,
});
gop_sample_start_index = samples.len();
}

let decode_timestamp = Time::new(sample.decode_timestamp as i64);
let presentation_timestamp = Time::new(sample.composition_timestamp as i64);
let duration = Time::new(sample.duration as i64);

let byte_offset = sample.offset as u32;
let byte_length = sample.size as u32;

samples.push(Sample {
is_sync: sample.is_sync,
decode_timestamp,
presentation_timestamp,
duration,
byte_offset,
byte_length,
});
gop_sample_start_index = samples.len();
}

let decode_timestamp = Time::new(sample.decode_timestamp as i64);
let presentation_timestamp = Time::new(sample.composition_timestamp as i64);
let duration = Time::new(sample.duration as i64);

let byte_offset = sample.offset as u32;
let byte_length = sample.size as u32;

samples.push(Sample {
is_sync: sample.is_sync,
decode_timestamp,
presentation_timestamp,
duration,
byte_offset,
byte_length,
});
}

if !samples.is_empty() {
Expand All @@ -79,10 +83,13 @@ impl VideoData {
});
}

let sample_statistics = SampleStatistics::new(&samples);

Ok(Self {
config,
timescale,
duration,
sample_statistics,
gops,
samples,
mp4_tracks,
Expand Down
69 changes: 45 additions & 24 deletions crates/viewer/re_data_ui/src/video.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use re_renderer::{
};
use re_types::components::VideoTimestamp;
use re_ui::{list_item::PropertyContent, UiExt};
use re_video::decode::FrameInfo;
use re_video::{decode::FrameInfo, demux::SampleStatistics};
use re_viewer_context::UiLayout;

pub fn show_video_blob_info(
Expand Down Expand Up @@ -65,11 +65,11 @@ pub fn show_video_blob_info(
.value_text(format!("{}", re_log_types::Duration::from(data.duration()))),
);
// Some people may think that num_frames / duration = fps, but that's not true, videos may have variable frame rate.
// At the same time, we don't want to overload users with video codec/container specific stuff that they have to understand,
// and for all intents and purposes one sample = one frame.
// So the compromise is that we truthfully show the number of *samples* here and don't talk about frames.
// Video containers and codecs like talking about samples or chunks rather than frames, but for how we define a chunk today,
// a frame is always a single chunk of data is always a single sample, see [`re_video::decode::Chunk`].
// So for all practical purposes the sample count _is_ the number of frames, at least how we use it today.
ui.list_item_flat_noninteractive(
PropertyContent::new("Sample count")
PropertyContent::new("Frame count")
.value_text(re_format::format_uint(data.num_samples())),
);
ui.list_item_flat_noninteractive(
Expand All @@ -91,6 +91,13 @@ pub fn show_video_blob_info(
);
}
});
ui.list_item_collapsible_noninteractive_label(
"Extended timing info",
false,
|ui| {
sample_statistics_ui(ui, &data.sample_statistics);
},
);
}

if let Some(render_ctx) = render_ctx {
Expand Down Expand Up @@ -144,12 +151,7 @@ pub fn show_video_blob_info(
egui::Spinner::new().paint_at(ui, smaller_rect);
}

decoded_frame_ui(
ui,
&frame_info,
video.data().timescale,
&source_pixel_format,
);
decoded_frame_ui(ui, &frame_info, video.data(), &source_pixel_format);
}

Err(err) => {
Expand Down Expand Up @@ -188,52 +190,71 @@ pub fn show_video_blob_info(
}
}

fn sample_statistics_ui(ui: &mut egui::Ui, sample_statistics: &SampleStatistics) {
ui.list_item_flat_noninteractive(
PropertyContent::new("Minimum PTS").value_text(sample_statistics.minimum_presentation_timestamp.0.to_string())
).on_hover_text("The smallest presentation timestamp (PTS) observed in this video.\n\
A non-zero value indicates that there are B-frames in the video.\n\
Rerun will place the 0:00:00 time at this timestamp.");
ui.list_item_flat_noninteractive(
// `value_bool` doesn't look great for static values.
PropertyContent::new("PTS equivalent to DTS").value_text(sample_statistics.dts_always_equal_pts.to_string())
).on_hover_text("Whether all decode timestamps are equal to presentation timestamps. If true, the video typically has no B-frames.");
}

fn decoded_frame_ui(
ui: &mut egui::Ui,
frame_info: &FrameInfo,
timescale: re_video::Timescale,
video_data: &re_video::VideoData,
source_image_format: &SourceImageDataFormat,
) {
re_ui::list_item::list_item_scope(ui, "decoded_frame_ui", |ui| {
let default_open = false;
ui.list_item_collapsible_noninteractive_label("Decoded frame info", default_open, |ui| {
frame_info_ui(ui, frame_info, timescale);
frame_info_ui(ui, frame_info, video_data);
source_image_data_format_ui(ui, source_image_format);
});
});
}

fn frame_info_ui(ui: &mut egui::Ui, frame_info: &FrameInfo, timescale: re_video::Timescale) {
let time_range = frame_info.time_range();
fn frame_info_ui(ui: &mut egui::Ui, frame_info: &FrameInfo, video_data: &re_video::VideoData) {
let time_range = frame_info.presentation_time_range();
ui.list_item_flat_noninteractive(PropertyContent::new("Time range").value_text(format!(
"{} - {}",
re_format::format_timestamp_seconds(time_range.start.into_secs(timescale)),
re_format::format_timestamp_seconds(time_range.end.into_secs(timescale)),
)))
"{} - {}",
re_format::format_timestamp_seconds(
(time_range.start - video_data.sample_statistics.minimum_presentation_timestamp)
.into_secs(video_data.timescale)
),
re_format::format_timestamp_seconds(
(time_range.end - video_data.sample_statistics.minimum_presentation_timestamp)
.into_secs(video_data.timescale)
),
)))
.on_hover_text("Time range in which this frame is valid.");

fn value_fn_for_time(
time: re_video::Time,
timescale: re_video::Timescale,
) -> impl FnOnce(&mut egui::Ui, egui::style::WidgetVisuals) {
video_data: &re_video::VideoData,
) -> impl FnOnce(&mut egui::Ui, egui::style::WidgetVisuals) + '_ {
move |ui, _| {
ui.add(egui::Label::new(time.0.to_string()).truncate())
.on_hover_text(re_format::format_timestamp_seconds(
time.into_secs(timescale),
(time - video_data.sample_statistics.minimum_presentation_timestamp)
.into_secs(video_data.timescale),
));
}
}

if let Some(dts) = frame_info.latest_decode_timestamp {
ui.list_item_flat_noninteractive(
PropertyContent::new("DTS").value_fn(value_fn_for_time(dts, timescale)),
PropertyContent::new("DTS").value_fn(value_fn_for_time(dts, video_data)),
)
.on_hover_text("Raw decode timestamp prior to applying the timescale.\n\
If a frame is made up of multiple chunks, this is the last decode timestamp that was needed to decode the frame.");
}

ui.list_item_flat_noninteractive(
PropertyContent::new("PTS").value_fn(value_fn_for_time(frame_info.presentation_timestamp, timescale)),
PropertyContent::new("PTS").value_fn(value_fn_for_time(frame_info.presentation_timestamp, video_data)),
)
.on_hover_text("Raw presentation timestamp prior to applying the timescale.\n\
This specifies the time at which the frame should be shown relative to the start of a video stream.");
Expand Down
9 changes: 6 additions & 3 deletions crates/viewer/re_renderer/src/video/chunk_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ impl VideoChunkDecoder {
let decoder_output = decoder_output.clone();
move |frame: re_video::decode::Result<Frame>| match frame {
Ok(frame) => {
re_log::trace!("Decoded frame at {:?}", frame.info.presentation_timestamp);
re_log::trace!(
"Decoded frame at PTS {:?}",
frame.info.presentation_timestamp
);
let mut output = decoder_output.lock();
output.frames.push(frame);
output.error = None; // We successfully decoded a frame, reset the error state.
Expand Down Expand Up @@ -114,10 +117,10 @@ impl VideoChunkDecoder {
let frame_idx = 0;
let frame = &frames[frame_idx];

let frame_time_range = frame.info.time_range();
let frame_time_range = frame.info.presentation_time_range();

if frame_time_range.contains(&presentation_timestamp)
&& video_texture.frame_info.time_range() != frame_time_range
&& video_texture.frame_info.presentation_time_range() != frame_time_range
{
#[cfg(target_arch = "wasm32")]
{
Expand Down
Loading
Loading