Skip to content

Commit 9b3af30

Browse files
authored
Fix video backward seeking / stepping back sometimes getting stuck (in the presence of b-frames) (#8053)
### What * Fixes #7956 Our PTS -> sample search algorithm wasn't quite right. Fixed this here and added a unit test. Unfortunately, I had to introduce a small auxiliary data structure to keep this snappy / not O(n) for n samples, but it's very simple and rather contained. Tested on native & web. Web still suffers from * #7961 but on Chrome this works fine now! https://github.com/user-attachments/assets/7a20467d-4d66-456f-be99-debba4ff7a31 ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested the web demo (if applicable): * Using examples from latest `main` build: [rerun.io/viewer](https://rerun.io/viewer/pr/8053?manifest_url=https://app.rerun.io/version/main/examples_manifest.json) * Using full set of examples from `nightly` build: [rerun.io/viewer](https://rerun.io/viewer/pr/8053?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json) * [x] The PR title and labels are set such as to maximize their usefulness for the next release's CHANGELOG * [x] If applicable, add a new check to the [release checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)! * [x] If have noted any breaking changes to the log API in `CHANGELOG.md` and the migration guide - [PR Build Summary](https://build.rerun.io/pr/8053) - [Recent benchmark results](https://build.rerun.io/graphs/crates.html) - [Wasm size tracking](https://build.rerun.io/graphs/sizes.html) To run all checks from `main`, comment on the PR with `@rerun-bot full-check`.
1 parent 6c15781 commit 9b3af30

File tree

6 files changed

+214
-23
lines changed

6 files changed

+214
-23
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -6477,6 +6477,7 @@ dependencies = [
64776477
name = "re_video"
64786478
version = "0.20.0-alpha.1+dev"
64796479
dependencies = [
6480+
"bit-vec",
64806481
"cfg_aliases 0.2.1",
64816482
"criterion",
64826483
"crossbeam",

crates/store/re_video/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ re_build_info.workspace = true
4747
re_log.workspace = true
4848
re_tracing.workspace = true
4949

50+
bit-vec.workspace = true
5051
crossbeam.workspace = true
5152
econtext.workspace = true
5253
itertools.workspace = true

crates/store/re_video/src/decode/ffmpeg_h264/ffmpeg.rs

-6
Original file line numberDiff line numberDiff line change
@@ -516,12 +516,6 @@ fn read_ffmpeg_output(
516516
// How do we know how large this buffer needs to be?
517517
// Whenever the highest known DTS is behind the PTS, we need to wait until the DTS catches up.
518518
// Otherwise, we'd assign the wrong PTS to the frame that just came in.
519-
//
520-
// Example how presentation timestamps and decode timestamps
521-
// can play out in the presence of B-frames to illustrate this:
522-
// PTS: 1 4 2 3
523-
// DTS: 1 2 3 4
524-
// Stream: I P B B
525519
let frame_info = loop {
526520
let oldest_pts_in_buffer =
527521
pending_frame_infos.first_key_value().map(|(pts, _)| *pts);

crates/store/re_video/src/demux/mod.rs

+185-16
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub mod mp4;
99

1010
use std::{collections::BTreeMap, ops::Range};
1111

12+
use bit_vec::BitVec;
1213
use itertools::Itertools as _;
1314

1415
use super::{Time, Timescale};
@@ -89,6 +90,11 @@ pub struct SamplesStatistics {
8990
///
9091
/// If true, the video typically has no B-frames as those require frame reordering.
9192
pub dts_always_equal_pts: bool,
93+
94+
/// If `dts_always_equal_pts` is false, then this gives for each sample whether its PTS is the highest seen so far.
95+
/// If `dts_always_equal_pts` is true, then this is left as `None`.
96+
/// This is used for optimizing PTS search.
97+
pub has_sample_highest_pts_so_far: Option<BitVec>,
9298
}
9399

94100
impl SamplesStatistics {
@@ -104,9 +110,25 @@ impl SamplesStatistics {
104110
.iter()
105111
.all(|s| s.decode_timestamp == s.presentation_timestamp);
106112

113+
let mut biggest_pts_so_far = Time::MIN;
114+
let has_sample_highest_pts_so_far = (!dts_always_equal_pts).then(|| {
115+
samples
116+
.iter()
117+
.map(move |sample| {
118+
if sample.presentation_timestamp > biggest_pts_so_far {
119+
biggest_pts_so_far = sample.presentation_timestamp;
120+
true
121+
} else {
122+
false
123+
}
124+
})
125+
.collect()
126+
});
127+
107128
Self {
108129
minimum_presentation_timestamp,
109130
dts_always_equal_pts,
131+
has_sample_highest_pts_so_far,
110132
}
111133
}
112134
}
@@ -296,34 +318,83 @@ impl VideoData {
296318
})
297319
}
298320

299-
/// For a given decode (!) timestamp, returns the index of the latest sample whose
321+
/// For a given decode (!) timestamp, returns the index of the first sample whose
300322
/// decode timestamp is lesser than or equal to the given timestamp.
301-
pub fn latest_sample_index_at_decode_timestamp(&self, decode_time: Time) -> Option<usize> {
302-
latest_at_idx(
303-
&self.samples,
304-
|sample| sample.decode_timestamp,
305-
&decode_time,
306-
)
323+
fn latest_sample_index_at_decode_timestamp(
324+
samples: &[Sample],
325+
decode_time: Time,
326+
) -> Option<usize> {
327+
latest_at_idx(samples, |sample| sample.decode_timestamp, &decode_time)
307328
}
308329

309-
/// For a given presentation timestamp, return the index of the latest sample
310-
/// whose presentation timestamp is lesser than or equal to the given timestamp.
311-
pub fn latest_sample_index_at_presentation_timestamp(
312-
&self,
330+
/// See [`Self::latest_sample_index_at_presentation_timestamp`], split out for testing purposes.
331+
fn latest_sample_index_at_presentation_timestamp_internal(
332+
samples: &[Sample],
333+
sample_statistics: &SamplesStatistics,
313334
presentation_timestamp: Time,
314335
) -> Option<usize> {
315336
// Find the latest sample where `decode_timestamp <= presentation_timestamp`.
316337
// Because `decode <= presentation`, we never have to look further backwards in the
317338
// video than this.
318339
let decode_sample_idx =
319-
self.latest_sample_index_at_decode_timestamp(presentation_timestamp)?;
340+
Self::latest_sample_index_at_decode_timestamp(samples, presentation_timestamp)?;
341+
342+
// It's very common that dts==pts in which case we're done!
343+
let Some(has_sample_highest_pts_so_far) =
344+
sample_statistics.has_sample_highest_pts_so_far.as_ref()
345+
else {
346+
debug_assert!(!sample_statistics.dts_always_equal_pts);
347+
return Some(decode_sample_idx);
348+
};
349+
debug_assert!(has_sample_highest_pts_so_far.len() == samples.len());
320350

321351
// Search backwards, starting at `decode_sample_idx`, looking for
322352
// the first sample where `sample.presentation_timestamp <= presentation_timestamp`.
323-
// this is the sample which when decoded will be presented at the timestamp the user requested.
324-
self.samples[..=decode_sample_idx]
325-
.iter()
326-
.rposition(|sample| sample.presentation_timestamp <= presentation_timestamp)
353+
// I.e. the sample with the biggest PTS that is smaller or equal to the requested PTS.
354+
//
355+
// The tricky part is that we can't just take the first sample with a presentation timestamp that matches
356+
// since smaller presentation timestamps may still show up further back!
357+
let mut best_index = usize::MAX;
358+
let mut best_pts = Time::MIN;
359+
for sample_idx in (0..=decode_sample_idx).rev() {
360+
let sample = &samples[sample_idx];
361+
362+
if sample.presentation_timestamp == presentation_timestamp {
363+
// Clean hit. Take this one, no questions asked :)
364+
// (assuming that each PTS is unique!)
365+
return Some(sample_idx);
366+
}
367+
368+
if sample.presentation_timestamp < presentation_timestamp
369+
&& sample.presentation_timestamp > best_pts
370+
{
371+
best_pts = sample.presentation_timestamp;
372+
best_index = sample_idx;
373+
}
374+
375+
if best_pts != Time::MIN && has_sample_highest_pts_so_far[sample_idx] {
376+
// We won't see any bigger PTS values anymore, meaning we're as close as we can get to the requested PTS!
377+
return Some(best_index);
378+
}
379+
}
380+
381+
None
382+
}
383+
384+
/// For a given presentation timestamp, return the index of the first sample
385+
/// whose presentation timestamp is lesser than or equal to the given timestamp.
386+
///
387+
/// Remember that samples after (i.e. with higher index) may have a *lower* presentation time
388+
/// if the stream has sample reordering!
389+
pub fn latest_sample_index_at_presentation_timestamp(
390+
&self,
391+
presentation_timestamp: Time,
392+
) -> Option<usize> {
393+
Self::latest_sample_index_at_presentation_timestamp_internal(
394+
&self.samples,
395+
&self.samples_statistics,
396+
presentation_timestamp,
397+
)
327398
}
328399

329400
/// For a given decode (!) timestamp, return the index of the group of pictures (GOP) index containing the given timestamp.
@@ -553,4 +624,102 @@ mod tests {
553624
assert_eq!(latest_at_idx(&v, |v| *v, &11), Some(9));
554625
assert_eq!(latest_at_idx(&v, |v| *v, &1000), Some(9));
555626
}
627+
628+
#[test]
629+
fn test_latest_sample_index_at_presentation_timestamp() {
630+
// This is a snippet of real world data!
631+
let pts = [
632+
512, 1536, 1024, 768, 1280, 2560, 2048, 1792, 2304, 3584, 3072, 2816, 3328, 4608, 4096,
633+
3840, 4352, 5376, 4864, 5120, 6400, 5888, 5632, 6144, 7424, 6912, 6656, 7168, 8448,
634+
7936, 7680, 8192, 9472, 8960, 8704, 9216, 10496, 9984, 9728, 10240, 11520, 11008,
635+
10752, 11264, 12544, 12032, 11776, 12288, 13568, 13056,
636+
];
637+
let dts = [
638+
0, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584,
639+
3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632, 5888, 6144, 6400, 6656, 6912, 7168,
640+
7424, 7680, 7936, 8192, 8448, 8704, 8960, 9216, 9472, 9728, 9984, 10240, 10496, 10752,
641+
11008, 11264, 11520, 11776, 12032, 12288, 12544,
642+
];
643+
644+
// Checking our basic assumptions about this data:
645+
assert_eq!(pts.len(), dts.len());
646+
assert!(pts.iter().zip(dts.iter()).all(|(pts, dts)| dts <= pts));
647+
648+
// Create fake samples from this.
649+
let samples = pts
650+
.into_iter()
651+
.zip(dts)
652+
.map(|(pts, dts)| Sample {
653+
is_sync: false,
654+
decode_timestamp: Time(dts),
655+
presentation_timestamp: Time(pts),
656+
duration: Time(1),
657+
byte_offset: 0,
658+
byte_length: 0,
659+
})
660+
.collect::<Vec<_>>();
661+
662+
let sample_statistics = SamplesStatistics::new(&samples);
663+
assert_eq!(sample_statistics.minimum_presentation_timestamp, Time(512));
664+
assert!(!sample_statistics.dts_always_equal_pts);
665+
666+
// Test queries on the samples.
667+
let query_pts = |pts| {
668+
VideoData::latest_sample_index_at_presentation_timestamp_internal(
669+
&samples,
670+
&sample_statistics,
671+
pts,
672+
)
673+
};
674+
675+
// Check that query for all exact positions works as expected using brute force search as the reference.
676+
for (idx, sample) in samples.iter().enumerate() {
677+
assert_eq!(Some(idx), query_pts(sample.presentation_timestamp));
678+
}
679+
680+
// Check that for slightly offsetted positions the query is still correct.
681+
// This works because for this dataset we know the minimum presentation timesetampe distance is always 256.
682+
for (idx, sample) in samples.iter().enumerate() {
683+
assert_eq!(
684+
Some(idx),
685+
query_pts(sample.presentation_timestamp + Time(1))
686+
);
687+
assert_eq!(
688+
Some(idx),
689+
query_pts(sample.presentation_timestamp + Time(255))
690+
);
691+
}
692+
693+
// A few hardcoded cases - both for illustrative purposes and to make sure the generic tests above are correct.
694+
695+
// Querying before the first sample.
696+
assert_eq!(None, query_pts(Time(0)));
697+
assert_eq!(None, query_pts(Time(123)));
698+
699+
// Querying for the first sample
700+
assert_eq!(Some(0), query_pts(Time(512)));
701+
assert_eq!(Some(0), query_pts(Time(513)));
702+
assert_eq!(Some(0), query_pts(Time(600)));
703+
assert_eq!(Some(0), query_pts(Time(767)));
704+
705+
// The next sample is a jump in index!
706+
assert_eq!(Some(3), query_pts(Time(768)));
707+
assert_eq!(Some(3), query_pts(Time(769)));
708+
assert_eq!(Some(3), query_pts(Time(800)));
709+
assert_eq!(Some(3), query_pts(Time(1023)));
710+
711+
// And the one after that should jump back again.
712+
assert_eq!(Some(2), query_pts(Time(1024)));
713+
assert_eq!(Some(2), query_pts(Time(1025)));
714+
assert_eq!(Some(2), query_pts(Time(1100)));
715+
assert_eq!(Some(2), query_pts(Time(1279)));
716+
717+
// And another one!
718+
assert_eq!(Some(4), query_pts(Time(1280)));
719+
assert_eq!(Some(4), query_pts(Time(1281)));
720+
721+
// Test way outside of the range.
722+
// (this is not the last element in the list since that one doesn't have the highest PTS)
723+
assert_eq!(Some(48), query_pts(Time(123123123123123123)));
724+
}
556725
}

crates/viewer/re_data_ui/src/video.rs

+18
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,24 @@ fn frame_info_ui(ui: &mut egui::Ui, frame_info: &FrameInfo, video_data: &re_vide
268268
.on_hover_text("Raw presentation timestamp prior to applying the timescale.\n\
269269
This specifies the time at which the frame should be shown relative to the start of a video stream.");
270270

271+
// Judging the following to be a bit too obscure to be of relevance outside of debugging Rerun itself.
272+
#[cfg(debug_assertions)]
273+
{
274+
if let Some(has_sample_highest_pts_so_far) = video_data
275+
.samples_statistics
276+
.has_sample_highest_pts_so_far
277+
.as_ref()
278+
{
279+
if let Some(sample_idx) = video_data
280+
.latest_sample_index_at_presentation_timestamp(frame_info.presentation_timestamp)
281+
{
282+
ui.list_item_flat_noninteractive(
283+
PropertyContent::new("Highest PTS so far").value_text(has_sample_highest_pts_so_far[sample_idx].to_string())
284+
).on_hover_text("Whether the presentation timestamp (PTS) at the this frame is the highest encountered so far. If false there are lower PTS values prior in the list.");
285+
}
286+
}
287+
}
288+
271289
// Information about the current group of pictures this frame is part of.
272290
// Lookup via decode timestamp is faster, but it may not always be available.
273291
if let Some(gop_index) =

crates/viewer/re_renderer/src/video/player.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,15 @@ impl VideoPlayer {
258258
// special case: handle seeking backwards within a single GOP
259259
// this is super inefficient, but it's the only way to handle it
260260
// while maintaining a buffer of only 2 GOPs
261-
if requested_sample_idx < self.current_sample_idx {
261+
//
262+
// Note that due to sample reordering (in the presence of b-frames), if can happen
263+
// that `self.current_sample_idx` is *behind* the `requested_sample_idx` even if we're
264+
// seeking backwards!
265+
// Therefore, it's important to compare presentation timestamps instead of sample indices.
266+
// (comparing decode timestamps should be equivalent to comparing sample indices)
267+
let current_pts = self.data.samples[self.current_sample_idx].presentation_timestamp;
268+
let requested_pts = self.data.samples[requested_sample_idx].presentation_timestamp;
269+
if requested_pts < current_pts {
262270
self.reset()?;
263271
self.enqueue_gop(requested_gop_idx, video_data)?;
264272
self.enqueue_gop(requested_gop_idx + 1, video_data)?;

0 commit comments

Comments
 (0)