Skip to content

Commit 10992ad

Browse files
committed
wip
1 parent 7f30e5a commit 10992ad

File tree

101 files changed

+2801
-445
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+2801
-445
lines changed

INVESTIGATION.md

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
I think I know what's going on. I was looking in the wrong place: this is unrelated to https://github.com/rerun-io/rerun/pull/7182.
2+
3+
The problem stems from a nasty interaction between static data, latest-at query-time caching, and garbage collection.
4+
5+
Look at the following store stats after the recording has just loaded:
6+
7+
Without `--memory-limit`:
8+
![image](https://github.com/user-attachments/assets/65286d1f-e6b6-4695-89c7-cb3822bdf82c)
9+
10+
With `--memory-limit 300MiB`:
11+
![image](https://github.com/user-attachments/assets/8d679491-2468-4a70-897e-a33df14040a2)
12+
13+
By the time the GC enters into livelock, the stats will look like this:
14+
![image](https://github.com/user-attachments/assets/cc04a60d-efc4-450d-a6f6-5c135e4e73b5)
15+
16+
What's happening here is that all the temporal data that could be legally removed, has already been legally removed.
17+
Those 38 temporal chunks left that we see in the stats can never go away, since the GC runs with `protect_latest=1`: we're already at the point where all that's left is one temporal chunk per `(entity, timeline, component)`.
18+
The livelock has started: we can run the GC all we want, from this point onwards it will never be able to remove anything.
19+
20+
But if we only have ~35MiB of data in the store, why are we hitting the 300MiB limit and running the GC in the first place?
21+
22+
The problem is that the latest-at cache is organized by _query time_, as opposed to _data time_.
23+
That is why this cache grows a lot when you scrub the time cursor around: even though we are just caching references to chunks, all these btrees still take a lot of space on their own with this many query timestamps to keep track of.
24+
The reason we do this is performance: the viewer runs a number of latest-at queries per frame that grows linearly with the number of entities in the recording, every microsecond we can shave off on the latest-at path matters, it all compounds very quickly by the end of the frame.
25+
26+
Now here's where this gets ugly: if there's nothing to GC, then there are no `ChunkStoreEvent`s being sent, and this there is no cache invalidation going on.
27+
The cache will therefore grow indefinitely, while the GC desesperately attempts to keeps things under control.

TODO

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/home/cmc/dev/rerun-io/rerun/crates/store/re_chunk_store/src/writes.rs:40: TODO:s should be written as `TODO(yourname): what to do`

antoine.diff

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
Subject: [PATCH] lint again
2+
---
3+
Index: rerun_py/rerun_bindings/rerun_bindings.pyi
4+
IDEA additional info:
5+
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
6+
<+>UTF-8
7+
===================================================================
8+
diff --git a/rerun_py/rerun_bindings/rerun_bindings.pyi b/rerun_py/rerun_bindings/rerun_bindings.pyi
9+
--- a/rerun_py/rerun_bindings/rerun_bindings.pyi (revision 15ee2b91f40be3750ceb896dd5a6b77ce2c3fcdc)
10+
+++ b/rerun_py/rerun_bindings/rerun_bindings.pyi (date 1728313461830)
11+
@@ -4,42 +4,41 @@
12+
13+
from .types import AnyColumn, ComponentLike, ViewContentsLike
14+
15+
-class ControlColumnDescriptor:
16+
- """A control-level column such as `RowId`."""
17+
-
18+
-class ControlColumnSelector:
19+
- """A selector for a control column."""
20+
-
21+
- @staticmethod
22+
- def row_id() -> ControlColumnSelector: ...
23+
24+
class IndexColumnDescriptor:
25+
"""A column containing the index values for when the component data was updated."""
26+
27+
+
28+
class IndexColumnSelector:
29+
"""A selector for an index column."""
30+
31+
def __init__(self, timeline: str): ...
32+
33+
+
34+
class ComponentColumnDescriptor:
35+
"""A column containing the component data."""
36+
37+
def with_dictionary_encoding(self) -> ComponentColumnDescriptor: ...
38+
39+
+
40+
class ComponentColumnSelector:
41+
"""A selector for a component column."""
42+
43+
def __new__(cls, entity_path: str, component_type: ComponentLike): ...
44+
+
45+
def with_dictionary_encoding(self) -> ComponentColumnSelector: ...
46+
47+
+
48+
class Schema:
49+
"""The schema representing all columns in a [`Recording`][]."""
50+
51+
- def control_columns(self) -> list[ControlColumnDescriptor]: ...
52+
def index_columns(self) -> list[IndexColumnDescriptor]: ...
53+
+
54+
def component_columns(self) -> list[ComponentColumnDescriptor]: ...
55+
+
56+
def column_for(self, entity_path: str, component: ComponentLike) -> Optional[ComponentColumnDescriptor]: ...
57+
58+
+
59+
class RecordingView:
60+
"""
61+
A view of a recording restricted to a given index, containing a specific set of entities and components.
62+
@@ -68,18 +67,23 @@
63+
64+
def select(self, *args: AnyColumn, columns: Optional[Sequence[AnyColumn]] = None) -> pa.RecordBatchReader: ...
65+
66+
+
67+
class Recording:
68+
"""A single recording."""
69+
70+
def schema(self) -> Schema: ...
71+
+
72+
def view(self, index: str, contents: ViewContentsLike) -> RecordingView: ...
73+
74+
+
75+
class RRDArchive:
76+
"""An archive loaded from an RRD, typically containing 1 or more recordings or blueprints."""
77+
78+
def num_recordings(self) -> int: ...
79+
+
80+
def all_recordings(self) -> list[Recording]: ...
81+
82+
+
83+
def load_recording(filename: str) -> Recording:
84+
"""
85+
Load a single recording from an RRD.
86+
@@ -94,6 +98,7 @@
87+
"""
88+
...
89+
90+
+
91+
def load_archive(filename: str) -> RRDArchive:
92+
"""
93+
Load a rerun archive file from disk.
94+
Index: rerun_py/rerun_sdk/rerun/dataframe.py
95+
IDEA additional info:
96+
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
97+
<+>UTF-8
98+
===================================================================
99+
diff --git a/rerun_py/rerun_sdk/rerun/dataframe.py b/rerun_py/rerun_sdk/rerun/dataframe.py
100+
--- a/rerun_py/rerun_sdk/rerun/dataframe.py (revision 15ee2b91f40be3750ceb896dd5a6b77ce2c3fcdc)
101+
+++ b/rerun_py/rerun_sdk/rerun/dataframe.py (date 1728313401654)
102+
@@ -3,8 +3,6 @@
103+
from rerun_bindings import (
104+
ComponentColumnDescriptor as ComponentColumnDescriptor,
105+
ComponentColumnSelector as ComponentColumnSelector,
106+
- ControlColumnDescriptor as ControlColumnDescriptor,
107+
- ControlColumnSelector as ControlColumnSelector,
108+
Recording as Recording,
109+
RRDArchive as RRDArchive,
110+
Schema as Schema,
111+
Index: rerun_py/rerun_bindings/types.py
112+
IDEA additional info:
113+
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
114+
<+>UTF-8
115+
===================================================================
116+
diff --git a/rerun_py/rerun_bindings/types.py b/rerun_py/rerun_bindings/types.py
117+
--- a/rerun_py/rerun_bindings/types.py (revision 15ee2b91f40be3750ceb896dd5a6b77ce2c3fcdc)
118+
+++ b/rerun_py/rerun_bindings/types.py (date 1728313429694)
119+
@@ -8,20 +8,15 @@
120+
from .rerun_bindings import (
121+
ComponentColumnDescriptor as ComponentColumnDescriptor,
122+
ComponentColumnSelector as ComponentColumnSelector,
123+
- ControlColumnDescriptor as ControlColumnDescriptor,
124+
- ControlColumnSelector as ControlColumnSelector,
125+
TimeColumnDescriptor as TimeColumnDescriptor,
126+
TimeColumnSelector as TimeColumnSelector,
127+
)
128+
129+
-
130+
ComponentLike: TypeAlias = Union[str, type["ComponentMixin"]]
131+
132+
AnyColumn: TypeAlias = Union[
133+
- "ControlColumnDescriptor",
134+
"TimeColumnDescriptor",
135+
"ComponentColumnDescriptor",
136+
- "ControlColumnSelector",
137+
"TimeColumnSelector",
138+
"ComponentColumnSelector",
139+
]

av1.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Getting SIMD-accelerated `av1` CPU decoding to work on Linux x64

bug.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from __future__ import annotations
2+
3+
import rerun as rr
4+
5+
rr.init("rerun_example_test_recording")
6+
rr.save("/tmp/bug.rrd")
7+
8+
rr.set_time_sequence("my_index", 1)
9+
rr.log("points", rr.Points3D([[1, 2, 3], [4, 5, 6], [7, 8, 9]]))
10+
rr.set_time_sequence("my_index", 7)
11+
rr.log("points", rr.Points3D([[10, 11, 12]], colors=[[255, 0, 0]]))

buncha_transforms.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from __future__ import annotations
2+
3+
import rerun as rr
4+
5+
rr.init("rerun_example_transform3d")
6+
rr.save("/tmp/buncha_transforms_withclear_017.rrd")
7+
8+
rr.set_time_seconds("timestamp", 42)
9+
rr.log("transforms", rr.Clear(recursive=True))
10+
for i in range(0, 250):
11+
rr.log(f"transforms/{i}", rr.Transform3D(translation=[i, 0, 0]))

crates/build/re_types_builder/src/codegen/cpp/mod.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -1586,6 +1586,8 @@ fn archetype_serialize(type_ident: &Ident, obj: &Object, hpp_includes: &mut Incl
15861586
quote!(archetypes)
15871587
};
15881588

1589+
let archetype_name = &obj.fqname;
1590+
15891591
let num_fields = quote_integer(obj.fields.len() + 1); // Plus one for the indicator.
15901592
let push_batches = obj.fields.iter().map(|field| {
15911593
let field_name = field_name_identifier(field);
@@ -1596,18 +1598,20 @@ fn archetype_serialize(type_ident: &Ident, obj: &Object, hpp_includes: &mut Incl
15961598
cells.push_back(std::move(result.value));
15971599
};
15981600

1601+
let archetype_field_name = field.snake_case_name();
1602+
15991603
// TODO(andreas): Introducing MonoCollection will remove the need for distinguishing these two cases.
16001604
if field.is_nullable && !obj.attrs.has(ATTR_RERUN_LOG_MISSING_AS_EMPTY) {
16011605
quote! {
16021606
if (#field_accessor.has_value()) {
1603-
auto result = ComponentBatch::from_loggable(#field_accessor.value());
1607+
auto result = ComponentBatch::from_loggable(#field_accessor.value(), #archetype_name, #archetype_field_name);
16041608
#push_back
16051609
}
16061610
}
16071611
} else {
16081612
quote! {
16091613
{
1610-
auto result = ComponentBatch::from_loggable(#field_accessor);
1614+
auto result = ComponentBatch::from_loggable(#field_accessor, #archetype_name, #archetype_field_name);
16111615
#push_back
16121616
}
16131617
}
@@ -1632,7 +1636,7 @@ fn archetype_serialize(type_ident: &Ident, obj: &Object, hpp_includes: &mut Incl
16321636
#(#push_batches)*
16331637
{
16341638
auto indicator = #type_ident::IndicatorComponent();
1635-
auto result = ComponentBatch::from_loggable(indicator);
1639+
auto result = ComponentBatch::from_loggable(indicator, #archetype_name);
16361640
RR_RETURN_NOT_OK(result.error);
16371641
cells.emplace_back(std::move(result.value));
16381642
}

crates/top/rerun/src/commands/rrd/print.rs

+27-15
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use anyhow::Context;
2-
use itertools::Itertools as _;
2+
use itertools::Itertools;
33

44
use re_log_types::{LogMsg, SetStoreInfo};
55
use re_sdk::log::Chunk;
@@ -15,8 +15,10 @@ pub struct PrintCommand {
1515
path_to_input_rrds: Vec<String>,
1616

1717
/// If set, print out table contents.
18-
#[clap(long, short, default_value_t = false)]
19-
verbose: bool,
18+
///
19+
/// This can be specified more than once to toggle more and more verbose levels (e.g. -vvv).
20+
#[clap(long, short, action = clap::ArgAction::Count)]
21+
verbose: u8,
2022

2123
/// If set, will try to proceed even in the face of IO and/or decoding errors in the input data.
2224
#[clap(long = "continue-on-error", default_value_t = true)]
@@ -63,7 +65,7 @@ impl PrintCommand {
6365
}
6466
}
6567

66-
fn print_msg(verbose: bool, msg: LogMsg) -> anyhow::Result<()> {
68+
fn print_msg(verbose: u8, msg: LogMsg) -> anyhow::Result<()> {
6769
match msg {
6870
LogMsg::SetStoreInfo(msg) => {
6971
let SetStoreInfo { row_id: _, info } = msg;
@@ -73,21 +75,31 @@ fn print_msg(verbose: bool, msg: LogMsg) -> anyhow::Result<()> {
7375
LogMsg::ArrowMsg(_row_id, arrow_msg) => {
7476
let chunk = Chunk::from_arrow_msg(&arrow_msg).context("skipped corrupt chunk")?;
7577

76-
if verbose {
77-
println!("{chunk}");
78-
} else {
78+
print!(
79+
"Chunk({}) with {} rows ({}) - {:?} - ",
80+
chunk.id(),
81+
chunk.num_rows(),
82+
re_format::format_bytes(chunk.total_size_bytes() as _),
83+
chunk.entity_path(),
84+
);
85+
86+
if verbose == 0 {
7987
let column_names = chunk
8088
.component_names()
8189
.map(|name| name.short_name())
8290
.join(" ");
83-
84-
println!(
85-
"Chunk({}) with {} rows ({}) - {:?} - columns: [{column_names}]",
86-
chunk.id(),
87-
chunk.num_rows(),
88-
re_format::format_bytes(chunk.total_size_bytes() as _),
89-
chunk.entity_path(),
90-
);
91+
println!("columns: [{column_names}]");
92+
} else if verbose == 1 {
93+
let column_descriptors = chunk
94+
.component_descriptors()
95+
.map(|descr| descr.short_name())
96+
.collect_vec()
97+
.join(" ");
98+
println!("columns: [{column_descriptors}]",);
99+
} else if verbose == 2 {
100+
println!("\n{}", chunk.emptied()); // headers only
101+
} else {
102+
println!("\n{chunk}");
91103
}
92104
}
93105

crates/top/rerun_c/src/arrow_utils.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::{CError, CErrorCode};
22

3-
/// Converts a C-FFI arrow array into a Rust component batch, taking ownership of the underlying arrow data. ///
3+
/// Converts a C-FFI arrow array into a Rust component batch, taking ownership of the underlying arrow data.
44
///
55
/// Safety:
66
/// This must only be ever called once for a given ffi array.

crates/top/rerun_c/src/component_type_registry.rs

+9-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
use once_cell::sync::Lazy;
22
use parking_lot::RwLock;
3-
use re_sdk::ComponentName;
3+
use re_sdk::ComponentDescriptor;
44

55
use crate::{CComponentTypeHandle, CError, CErrorCode};
66

77
pub struct ComponentType {
8-
pub name: ComponentName,
8+
pub descriptor: ComponentDescriptor,
99
pub datatype: arrow2::datatypes::DataType,
1010
}
1111

@@ -18,22 +18,25 @@ pub struct ComponentTypeRegistry {
1818
impl ComponentTypeRegistry {
1919
pub fn register(
2020
&mut self,
21-
name: ComponentName,
21+
descriptor: ComponentDescriptor,
2222
datatype: arrow2::datatypes::DataType,
2323
) -> CComponentTypeHandle {
2424
#[cfg(debug_assertions)]
2525
{
2626
for ty in &self.types {
2727
assert_ne!(
28-
ty.name, name,
29-
"Component type with the same name already registered"
28+
ty.descriptor, descriptor,
29+
"Component type with the same descriptor already registered"
3030
);
3131
}
3232
}
3333

3434
let id = self.next_id;
3535
self.next_id += 1;
36-
self.types.push(ComponentType { name, datatype });
36+
self.types.push(ComponentType {
37+
descriptor,
38+
datatype,
39+
});
3740
id
3841
}
3942

0 commit comments

Comments
 (0)