Skip to content

Commit 28d8336

Browse files
authored
GC improvements 5: Store{Diff,Event} optimizations (#4399)
Optimize the creation of `StoreDiff`s and `StoreEvent`s, which turns out to be a major cost in time series use cases, when it is common to generate several millions of those on any single GC run. Once again some pretty significant wins. ### Benchmarks Compared to `main`: ``` group gc_improvements_0 gc_improvements_5 ----- ----------------- ----------------- .../plotting_dashboard/drop_at_least=0.3/bucketsz=1024 13.00 1084.0±4.47ms 54.1 KElem/sec 1.00 83.4±1.16ms 702.9 KElem/sec .../plotting_dashboard/drop_at_least=0.3/bucketsz=2048 25.37 2.1±0.02s 27.6 KElem/sec 1.00 83.7±0.61ms 700.0 KElem/sec .../plotting_dashboard/drop_at_least=0.3/bucketsz=256 5.55 465.8±2.50ms 125.8 KElem/sec 1.00 84.0±0.50ms 697.8 KElem/sec .../plotting_dashboard/drop_at_least=0.3/bucketsz=512 7.94 655.3±2.61ms 89.4 KElem/sec 1.00 82.5±1.33ms 710.0 KElem/sec .../plotting_dashboard/drop_at_least=0.3/default 8.02 652.8±4.12ms 89.8 KElem/sec 1.00 81.4±0.94ms 720.0 KElem/sec .../timeless_logs/drop_at_least=0.3/bucketsz=1024 35.87 2.4±0.05s 24.2 KElem/sec 1.00 67.5±2.21ms 867.5 KElem/sec .../timeless_logs/drop_at_least=0.3/bucketsz=2048 35.91 2.4±0.03s 24.1 KElem/sec 1.00 67.8±1.86ms 863.9 KElem/sec .../timeless_logs/drop_at_least=0.3/bucketsz=256 37.02 2.5±0.08s 23.5 KElem/sec 1.00 67.5±1.43ms 868.2 KElem/sec .../timeless_logs/drop_at_least=0.3/bucketsz=512 35.47 2.4±0.02s 24.5 KElem/sec 1.00 67.4±1.40ms 869.4 KElem/sec .../timeless_logs/drop_at_least=0.3/default 36.00 2.4±0.03s 24.4 KElem/sec 1.00 66.8±0.85ms 877.3 KElem/sec ``` Compared to previous PR: ``` group gc_improvements_4 gc_improvements_5 ----- ----------------- ----------------- .../plotting_dashboard/drop_at_least=0.3/bucketsz=1024 1.26 105.0±0.91ms 558.1 KElem/sec 1.00 83.4±1.16ms 702.9 KElem/sec .../plotting_dashboard/drop_at_least=0.3/bucketsz=2048 1.28 107.3±0.83ms 546.2 KElem/sec 1.00 83.7±0.61ms 700.0 KElem/sec .../plotting_dashboard/drop_at_least=0.3/bucketsz=256 1.27 106.3±0.74ms 551.3 KElem/sec 1.00 84.0±0.50ms 697.8 KElem/sec .../plotting_dashboard/drop_at_least=0.3/bucketsz=512 1.29 106.4±0.94ms 550.6 KElem/sec 1.00 82.5±1.33ms 710.0 KElem/sec .../plotting_dashboard/drop_at_least=0.3/default 1.26 102.9±0.75ms 569.4 KElem/sec 1.00 81.4±0.94ms 720.0 KElem/sec .../timeless_logs/drop_at_least=0.3/bucketsz=1024 1.00 65.3±0.81ms 897.6 KElem/sec 1.03 67.5±2.21ms 867.5 KElem/sec .../timeless_logs/drop_at_least=0.3/bucketsz=2048 1.00 64.9±1.07ms 903.2 KElem/sec 1.05 67.8±1.86ms 863.9 KElem/sec .../timeless_logs/drop_at_least=0.3/bucketsz=256 1.00 64.4±0.99ms 910.2 KElem/sec 1.05 67.5±1.43ms 868.2 KElem/sec .../timeless_logs/drop_at_least=0.3/bucketsz=512 1.00 64.6±1.08ms 906.9 KElem/sec 1.04 67.4±1.40ms 869.4 KElem/sec .../timeless_logs/drop_at_least=0.3/default 1.00 65.3±1.29ms 897.3 KElem/sec 1.02 66.8±0.85ms 877.3 KElem/sec ``` --- Part of the GC improvements series: - #4394 - #4395 - #4396 - #4397 - #4398 - #4399 - #4400 - #4401
1 parent 1ce4563 commit 28d8336

File tree

7 files changed

+50
-77
lines changed

7 files changed

+50
-77
lines changed

crates/re_arrow_store/src/store_event.rs

+20-46
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,17 @@ pub struct StoreDiff {
112112
/// one addition and (optionally) one deletion (in that order!).
113113
pub row_id: RowId,
114114

115-
/// The [`TimePoint`] associated with that row.
115+
/// The time data associated with that row.
116116
///
117117
/// Since insertions and deletions both work on a row-level basis, this is guaranteed to be the
118118
/// same value for both the insertion and deletion events (if any).
119-
pub timepoint: TimePoint,
119+
///
120+
/// This is not a [`TimePoint`] for performance reasons.
121+
//
122+
// NOTE: Empirical testing shows that a SmallVec isn't any better in the best case, and can be a
123+
// significant performant drop at worst.
124+
// pub times: SmallVec<[(Timeline, TimeInt); 5]>, // "5 timelines ought to be enough for anyone"
125+
pub times: Vec<(Timeline, TimeInt)>,
120126

121127
/// The [`EntityPath`] associated with that row.
122128
///
@@ -137,7 +143,7 @@ impl StoreDiff {
137143
Self {
138144
kind: StoreDiffKind::Addition,
139145
row_id: row_id.into(),
140-
timepoint: TimePoint::timeless(),
146+
times: Default::default(),
141147
entity_path: entity_path.into(),
142148
cells: Default::default(),
143149
}
@@ -148,75 +154,43 @@ impl StoreDiff {
148154
Self {
149155
kind: StoreDiffKind::Deletion,
150156
row_id: row_id.into(),
151-
timepoint: TimePoint::timeless(),
157+
times: Default::default(),
152158
entity_path: entity_path.into(),
153159
cells: Default::default(),
154160
}
155161
}
156162

157163
#[inline]
158-
pub fn at_timepoint(mut self, timepoint: impl Into<TimePoint>) -> StoreDiff {
159-
self.timepoint = self.timepoint.union_max(&timepoint.into());
164+
pub fn at_timepoint(&mut self, timepoint: impl Into<TimePoint>) -> &mut Self {
165+
self.times.extend(timepoint.into());
160166
self
161167
}
162168

163169
#[inline]
164170
pub fn at_timestamp(
165-
mut self,
171+
&mut self,
166172
timeline: impl Into<Timeline>,
167173
time: impl Into<TimeInt>,
168-
) -> StoreDiff {
169-
self.timepoint.insert(timeline.into(), time.into());
174+
) -> &mut Self {
175+
self.times.push((timeline.into(), time.into()));
170176
self
171177
}
172178

173179
#[inline]
174-
pub fn with_cells(mut self, cells: impl IntoIterator<Item = DataCell>) -> Self {
180+
pub fn with_cells(&mut self, cells: impl IntoIterator<Item = DataCell>) -> &mut Self {
175181
self.cells
176182
.extend(cells.into_iter().map(|cell| (cell.component_name(), cell)));
177183
self
178184
}
179185

180-
/// Returns the union of two [`StoreDiff`]s.
181-
///
182-
/// They must share the same [`RowId`], [`EntityPath`] and [`StoreDiffKind`].
183186
#[inline]
184-
pub fn union(&self, rhs: &Self) -> Option<Self> {
185-
let Self {
186-
kind: lhs_kind,
187-
row_id: lhs_row_id,
188-
timepoint: lhs_timepoint,
189-
entity_path: lhs_entity_path,
190-
cells: lhs_cells,
191-
} = self;
192-
let Self {
193-
kind: rhs_kind,
194-
row_id: rhs_row_id,
195-
timepoint: rhs_timepoint,
196-
entity_path: rhs_entity_path,
197-
cells: rhs_cells,
198-
} = rhs;
199-
200-
let same_kind = lhs_kind == rhs_kind;
201-
let same_row_id = lhs_row_id == rhs_row_id;
202-
let same_entity_path = lhs_entity_path == rhs_entity_path;
203-
204-
(same_kind && same_row_id && same_entity_path).then(|| Self {
205-
kind: *lhs_kind,
206-
row_id: *lhs_row_id,
207-
timepoint: lhs_timepoint.clone().union_max(rhs_timepoint),
208-
entity_path: lhs_entity_path.clone(),
209-
cells: [lhs_cells.values(), rhs_cells.values()]
210-
.into_iter()
211-
.flatten()
212-
.map(|cell| (cell.component_name(), cell.clone()))
213-
.collect(),
214-
})
187+
pub fn timepoint(&self) -> TimePoint {
188+
self.times.clone().into_iter().collect()
215189
}
216190

217191
#[inline]
218192
pub fn is_timeless(&self) -> bool {
219-
self.timepoint.is_timeless()
193+
self.times.is_empty()
220194
}
221195

222196
/// `-1` for deletions, `+1` for additions.
@@ -297,7 +271,7 @@ mod tests {
297271
if event.is_timeless() {
298272
self.timeless += delta;
299273
} else {
300-
for (&timeline, &time) in &event.timepoint {
274+
for &(timeline, time) in &event.times {
301275
*self.timelines.entry(timeline).or_default() += delta;
302276
*self.times.entry(time).or_default() += delta;
303277
}

crates/re_arrow_store/src/store_gc.rs

+9-10
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ impl DataStore {
240240
table.try_drop_row(&self.cluster_cell_cache, row_id, time.as_i64());
241241
if let Some(inner) = diff.as_mut() {
242242
if let Some(removed) = removed {
243-
diff = inner.union(&removed);
243+
inner.times.extend(removed.times);
244244
}
245245
} else {
246246
diff = removed;
@@ -258,7 +258,7 @@ impl DataStore {
258258
table.try_drop_row(&self.cluster_cell_cache, row_id);
259259
if let Some(inner) = diff.as_mut() {
260260
if let Some(removed) = removed {
261-
diff = inner.union(&removed);
261+
inner.times.extend(removed.times);
262262
}
263263
} else {
264264
diff = removed;
@@ -476,7 +476,7 @@ impl DataStore {
476476
.entry(row_id)
477477
.or_insert_with(|| StoreDiff::deletion(row_id, entity_path.clone()));
478478

479-
diff.timepoint.insert(bucket.timeline, time.into());
479+
diff.times.push((bucket.timeline, time.into()));
480480

481481
for column in &mut inner.columns.values_mut() {
482482
let cell = column[i].take();
@@ -657,10 +657,9 @@ impl IndexedBucketInner {
657657
if let Some(inner) = diff.as_mut() {
658658
inner.cells.insert(cell.component_name(), cell);
659659
} else {
660-
diff = StoreDiff::deletion(removed_row_id, ent_path.clone())
661-
.at_timestamp(timeline, time)
662-
.with_cells([cell])
663-
.into();
660+
let mut d = StoreDiff::deletion(removed_row_id, ent_path.clone());
661+
d.at_timestamp(timeline, time).with_cells([cell]);
662+
diff = Some(d);
664663
}
665664
}
666665
}
@@ -752,9 +751,9 @@ impl PersistentIndexedTable {
752751
if let Some(inner) = diff.as_mut() {
753752
inner.cells.insert(cell.component_name(), cell);
754753
} else {
755-
diff = StoreDiff::deletion(removed_row_id, ent_path.clone())
756-
.with_cells([cell])
757-
.into();
754+
let mut d = StoreDiff::deletion(removed_row_id, ent_path.clone());
755+
d.cells.insert(cell.component_name(), cell);
756+
diff = Some(d);
758757
}
759758
}
760759
}

crates/re_arrow_store/src/store_write.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,8 @@ impl DataStore {
189189
}
190190
}
191191

192-
let diff = StoreDiff::addition(*row_id, entity_path.clone())
193-
.at_timepoint(timepoint.clone())
192+
let mut diff = StoreDiff::addition(*row_id, entity_path.clone());
193+
diff.at_timepoint(timepoint.clone())
194194
.with_cells(cells.iter().cloned());
195195

196196
// TODO(#4220): should we fire for auto-generated data?

crates/re_data_store/src/entity_tree.rs

+11-11
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ impl CompactedStoreEvents {
105105
event.delta().unsigned_abs();
106106
}
107107
} else {
108-
for (&timeline, &time) in &event.timepoint {
108+
for &(timeline, time) in &event.times {
109109
let per_timeline = this.timeful.entry(event.entity_path.hash()).or_default();
110110
let per_component = per_timeline.entry(timeline).or_default();
111111
for component_name in event.cells.keys() {
@@ -205,7 +205,7 @@ impl EntityTree {
205205
let leaf = self.create_subtrees_recursively(
206206
event.diff.entity_path.as_slice(),
207207
0,
208-
&event.diff.timepoint,
208+
&event.diff.times,
209209
event.num_components() as _,
210210
);
211211

@@ -235,7 +235,7 @@ impl EntityTree {
235235
pending_clears = self.flat_clears.clone().into_iter().collect_vec();
236236
Default::default()
237237
});
238-
per_component.add(&store_diff.timepoint, 1);
238+
per_component.add(&store_diff.times, 1);
239239

240240
// Is the newly added component under the influence of previously logged `Clear`
241241
// component?
@@ -343,7 +343,7 @@ impl EntityTree {
343343
next,
344344
is_recursive,
345345
store_diff.row_id,
346-
store_diff.timepoint.clone(),
346+
store_diff.timepoint(),
347347
));
348348
stack.extend(next.children.values_mut().collect::<Vec<&mut Self>>());
349349
}
@@ -352,7 +352,7 @@ impl EntityTree {
352352
self,
353353
is_recursive,
354354
store_diff.row_id,
355-
store_diff.timepoint.clone(),
355+
store_diff.timepoint(),
356356
));
357357
}
358358

@@ -387,7 +387,7 @@ impl EntityTree {
387387
.entry(component_path.entity_path().clone())
388388
.or_default();
389389

390-
*timepoint = store_diff.timepoint.clone().union_max(timepoint);
390+
*timepoint = store_diff.timepoint().union_max(timepoint);
391391
component_paths.insert(component_path.clone());
392392
}
393393
}
@@ -433,7 +433,7 @@ impl EntityTree {
433433
for event in filtered_events.iter().filter(|e| &e.entity_path == path) {
434434
for component_name in event.cells.keys() {
435435
if let Some(histo) = self.time_histograms_per_component.get_mut(component_name) {
436-
histo.remove(&event.timepoint, 1);
436+
histo.remove(&event.timepoint(), 1);
437437
if histo.is_empty() {
438438
self.time_histograms_per_component.remove(component_name);
439439
}
@@ -442,7 +442,7 @@ impl EntityTree {
442442
}
443443

444444
for event in &filtered_events {
445-
recursive_time_histogram.remove(&event.timepoint, event.num_components() as _);
445+
recursive_time_histogram.remove(&event.timepoint(), event.num_components() as _);
446446
}
447447

448448
children.retain(|_, child| {
@@ -458,10 +458,10 @@ impl EntityTree {
458458
&mut self,
459459
full_path: &[EntityPathPart],
460460
depth: usize,
461-
timepoint: &TimePoint,
461+
times: &[(Timeline, TimeInt)],
462462
num_components: u32,
463463
) -> &mut Self {
464-
self.recursive_time_histogram.add(timepoint, num_components);
464+
self.recursive_time_histogram.add(times, num_components);
465465

466466
match full_path.get(depth) {
467467
None => {
@@ -473,7 +473,7 @@ impl EntityTree {
473473
.or_insert_with(|| {
474474
EntityTree::new(full_path[..depth + 1].into(), self.recursive_clears.clone())
475475
})
476-
.create_subtrees_recursively(full_path, depth + 1, timepoint, num_components),
476+
.create_subtrees_recursively(full_path, depth + 1, times, num_components),
477477
}
478478
}
479479

crates/re_data_store/src/time_histogram_per_timeline.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::collections::BTreeMap;
22

33
use re_arrow_store::{StoreEvent, StoreSubscriber};
4-
use re_log_types::{TimePoint, Timeline};
4+
use re_log_types::{TimeInt, TimePoint, Timeline};
55

66
// ---
77

@@ -51,8 +51,8 @@ impl TimeHistogramPerTimeline {
5151
self.num_timeless_messages
5252
}
5353

54-
pub fn add(&mut self, timepoint: &TimePoint, n: u32) {
55-
if timepoint.is_timeless() {
54+
pub fn add(&mut self, times: &[(Timeline, TimeInt)], n: u32) {
55+
if times.is_empty() {
5656
self.num_timeless_messages = self
5757
.num_timeless_messages
5858
.checked_add(n as u64)
@@ -61,11 +61,11 @@ impl TimeHistogramPerTimeline {
6161
u64::MAX
6262
});
6363
} else {
64-
for (timeline, time_value) in timepoint.iter() {
64+
for &(timeline, time) in times {
6565
self.times
66-
.entry(*timeline)
66+
.entry(timeline)
6767
.or_default()
68-
.increment(time_value.as_i64(), n);
68+
.increment(time.as_i64(), n);
6969
}
7070
}
7171
}

crates/re_data_store/src/times_per_timeline.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ impl StoreSubscriber for TimesPerTimeline {
5454
re_tracing::profile_function!(format!("num_events={}", events.len()));
5555

5656
for event in events {
57-
for (&timeline, &time) in &event.timepoint {
57+
for &(timeline, time) in &event.times {
5858
let per_time = self.0.entry(timeline).or_default();
5959
let count = per_time.entry(time).or_default();
6060

examples/rust/custom_store_subscriber/src/main.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ impl StoreSubscriber for TimeRangesPerEntity {
161161

162162
fn on_events(&mut self, events: &[StoreEvent]) {
163163
for event in events {
164-
for (&timeline, &time) in &event.timepoint {
164+
for &(timeline, time) in &event.times {
165165
// update counters
166166
let per_timeline = self.times.entry(event.entity_path.clone()).or_default();
167167
let per_time = per_timeline.entry(timeline).or_default();

0 commit comments

Comments
 (0)