Skip to content

Commit 96aa23d

Browse files
authored
docs(common): add more docs for DataChunk (risingwavelabs#8736)
1 parent f6ccfd5 commit 96aa23d

File tree

4 files changed

+49
-7
lines changed

4 files changed

+49
-7
lines changed

src/common/src/array/column.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,18 @@ use risingwave_pb::data::PbColumn;
2020
use super::{Array, ArrayError, ArrayResult, I64Array};
2121
use crate::array::{ArrayImpl, ArrayRef};
2222

23-
/// Column is owned by `DataChunk`. It consists of logic data type and physical array
24-
/// implementation.
23+
/// A [`Column`] consists of its logical data type
24+
/// and its corresponding physical array implementation,
25+
/// The array contains all the datums bound to this [`Column`].
26+
/// [`Column`] is owned by [`DataChunk`].
27+
///
28+
/// For instance, in this [`DataChunk`],
29+
/// for column `v1`, [`ArrayRef`] will contain: [1,1,1]
30+
/// | v1 | v2 |
31+
/// |----|----|
32+
/// | 1 | a |
33+
/// | 1 | b |
34+
/// | 1 | c |
2535
#[derive(Clone, Debug, PartialEq)]
2636
pub struct Column {
2737
array: ArrayRef,

src/common/src/array/data_chunk.rs

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,23 @@ use crate::util::hash_util::finalize_hashers;
3434
use crate::util::iter_util::{ZipEqDebug, ZipEqFast};
3535
use crate::util::value_encoding::{serialize_datum_into, ValueRowSerializer};
3636

37-
/// `DataChunk` is a collection of arrays with visibility mask.
37+
/// [`DataChunk`] is a collection of Columns,
38+
/// a with visibility mask for each row.
39+
/// For instance, we could have a [`DataChunk`] of this format.
40+
/// | v1 | v2 | v3 |
41+
/// |----|----|----|
42+
/// | 1 | a | t |
43+
/// | 2 | b | f |
44+
/// | 3 | c | t |
45+
/// | 4 | d | f |
46+
///
47+
/// Our columns are v1, v2, v3.
48+
/// Then, if the Visibility Mask hides rows 2 and 4,
49+
/// We will only have these rows visible:
50+
/// | v1 | v2 | v3 |
51+
/// |----|----|----|
52+
/// | 1 | a | t |
53+
/// | 3 | c | t |
3854
#[derive(Clone, PartialEq)]
3955
#[must_use]
4056
pub struct DataChunk {
@@ -170,7 +186,18 @@ impl DataChunk {
170186
}
171187

172188
/// `compact` will convert the chunk to compact format.
173-
/// Compact format means that `visibility == None`.
189+
/// Compacting removes the hidden rows, and returns a new visibility
190+
/// mask which indicates this.
191+
///
192+
/// `compact` has trade-offs:
193+
///
194+
/// Cost:
195+
/// It has to rebuild the each column, meaning it will incur cost
196+
/// of copying over bytes from the original column array to the new one.
197+
///
198+
/// Benefit:
199+
/// The main benefit is that the data chunk is smaller, taking up less memory.
200+
/// We can also save the cost of iterating over many hidden rows.
174201
pub fn compact(self) -> Self {
175202
match &self.vis2 {
176203
Vis::Compact(_) => self,

src/common/src/array/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,8 @@ macro_rules! impl_array_builder {
496496
}
497497
}
498498

499-
/// Append a [`Datum`] or [`DatumRef`] multiple times, return error while type not match.
499+
/// Append a [`Datum`] or [`DatumRef`] multiple times,
500+
/// panicking if the datum's type does not match the array builder's type.
500501
pub fn append_datum_n(&mut self, n: usize, datum: impl ToDatumRef) {
501502
match datum.to_datum_ref() {
502503
None => match self {

src/common/src/array/vis.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,15 @@ use itertools::repeat_n;
1717

1818
use crate::buffer::{Bitmap, BitmapBuilder};
1919

20-
/// `Vis` is a visibility bitmap of rows. When all rows are visible, it is considered compact and
21-
/// is represented by a single cardinality number rather than that many of ones.
20+
/// `Vis` is a visibility bitmap of rows.
2221
#[derive(Clone, PartialEq, Debug)]
2322
pub enum Vis {
23+
/// Non-compact variant.
24+
/// Certain rows are hidden using this bitmap.
2425
Bitmap(Bitmap),
26+
27+
/// Compact variant which just stores cardinality of rows.
28+
/// This can be used when all rows are visible.
2529
Compact(usize), // equivalent to all ones of this size
2630
}
2731

0 commit comments

Comments
 (0)