Skip to content

Commit 64a5f88

Browse files
authored
feat(storage): do not compress table_id (risingwavelabs#8512)
1 parent 7641b15 commit 64a5f88

File tree

4 files changed

+77
-27
lines changed

4 files changed

+77
-27
lines changed

src/storage/hummock_sdk/src/key.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,10 @@ impl<T: AsRef<[u8]>> UserKey<T> {
438438
buf.put_slice(self.table_key.as_ref());
439439
}
440440

441+
pub fn encode_table_key_into(&self, buf: &mut impl BufMut) {
442+
buf.put_slice(self.table_key.as_ref());
443+
}
444+
441445
/// Encode in to a buffer.
442446
pub fn encode_length_prefixed(&self, buf: &mut impl BufMut) {
443447
buf.put_u32(self.table_id.table_id());
@@ -583,6 +587,12 @@ impl<T: AsRef<[u8]>> FullKey<T> {
583587
buf
584588
}
585589

590+
// Encode in to a buffer.
591+
pub fn encode_into_without_table_id(&self, buf: &mut impl BufMut) {
592+
self.user_key.encode_table_key_into(buf);
593+
buf.put_u64(self.epoch);
594+
}
595+
586596
pub fn encode_reverse_epoch(&self) -> Vec<u8> {
587597
let mut buf = Vec::with_capacity(
588598
TABLE_PREFIX_LEN + self.user_key.table_key.as_ref().len() + EPOCH_LEN,
@@ -614,6 +624,20 @@ impl<'a> FullKey<&'a [u8]> {
614624
}
615625
}
616626

627+
/// Construct a [`FullKey`] from a byte slice without `table_id` encoded.
628+
pub fn from_slice_without_table_id(
629+
table_id: TableId,
630+
slice_without_table_id: &'a [u8],
631+
) -> Self {
632+
let epoch_pos = slice_without_table_id.len() - EPOCH_LEN;
633+
let epoch = (&slice_without_table_id[epoch_pos..]).get_u64();
634+
635+
Self {
636+
user_key: UserKey::new(table_id, TableKey(&slice_without_table_id[..epoch_pos])),
637+
epoch,
638+
}
639+
}
640+
617641
/// Construct a [`FullKey`] from a byte slice.
618642
pub fn decode_reverse_epoch(slice: &'a [u8]) -> Self {
619643
let epoch_pos = slice.len() - EPOCH_LEN;

src/storage/src/hummock/compactor/iterator.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ impl SstableStreamIterator {
9191

9292
if let (Some(block_iter), Some(seek_key)) = (self.block_iter.as_mut(), seek_key) {
9393
block_iter.seek(seek_key);
94+
9495
if !block_iter.is_valid() {
9596
// `seek_key` is larger than everything in the first block.
9697
self.next_block().await?;

src/storage/src/hummock/sstable/block.rs

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use std::mem::size_of;
1818
use std::ops::Range;
1919

2020
use bytes::{Buf, BufMut, Bytes, BytesMut};
21+
use risingwave_common::catalog::TableId;
2122
use risingwave_hummock_sdk::key::FullKey;
2223
use risingwave_hummock_sdk::KeyComparator;
2324
use {lz4, zstd};
@@ -142,13 +143,18 @@ pub struct Block {
142143
pub data: Bytes,
143144
/// Uncompressed entried data length.
144145
data_len: usize,
146+
147+
/// Table id of this block.
148+
table_id: TableId,
149+
145150
/// Restart points.
146151
restart_points: Vec<RestartPoint>,
147152
}
148153

149154
impl Block {
150155
pub fn decode(buf: Bytes, uncompressed_capacity: usize) -> HummockResult<Self> {
151156
// Verify checksum.
157+
152158
let xxhash64_checksum = (&buf[buf.len() - 8..]).get_u64_le();
153159
xxhash64_verify(&buf[..buf.len() - 8], xxhash64_checksum)?;
154160

@@ -184,11 +190,12 @@ impl Block {
184190
}
185191

186192
pub fn decode_from_raw(buf: Bytes) -> Self {
193+
let table_id = (&buf[buf.len() - 4..]).get_u32_le();
187194
// decode restart_points_type_index
188-
let n_index = ((&buf[buf.len() - 4..]).get_u32_le()) as usize;
195+
let n_index = ((&buf[buf.len() - 8..buf.len() - 4]).get_u32_le()) as usize;
189196
let index_data_len = size_of::<u32>() + n_index * RestartPoint::size_of();
190-
let data_len = buf.len() - index_data_len;
191-
let mut restart_points_type_index_buf = &buf[data_len..buf.len() - 4];
197+
let data_len = buf.len() - 4 - index_data_len;
198+
let mut restart_points_type_index_buf = &buf[data_len..buf.len() - 8];
192199

193200
let mut index_key_vec = Vec::with_capacity(n_index);
194201
for _ in 0..n_index {
@@ -213,6 +220,7 @@ impl Block {
213220
let mut restart_points_buf = &buf[data_len..restarts_end];
214221

215222
let mut type_index: usize = 0;
223+
216224
for _ in 0..n_restarts {
217225
let offset = restart_points_buf.get_u32_le();
218226
if type_index < index_key_vec.len() - 1
@@ -232,6 +240,7 @@ impl Block {
232240
data: buf,
233241
data_len,
234242
restart_points,
243+
table_id: TableId::new(table_id),
235244
}
236245
}
237246

@@ -243,7 +252,13 @@ impl Block {
243252
}
244253

245254
pub fn capacity(&self) -> usize {
246-
self.data.len() + self.restart_points.capacity() * std::mem::size_of::<u32>()
255+
self.data.len()
256+
+ self.restart_points.capacity() * std::mem::size_of::<u32>()
257+
+ std::mem::size_of::<u32>()
258+
}
259+
260+
pub fn table_id(&self) -> TableId {
261+
self.table_id
247262
}
248263

249264
/// Gets restart point by index.
@@ -385,6 +400,7 @@ pub struct BlockBuilder {
385400
/// Compression algorithm.
386401
compression_algorithm: CompressionAlgorithm,
387402

403+
table_id: Option<u32>,
388404
// restart_points_type_index stores only the restart_point corresponding to each type change,
389405
// as an index, in order to reduce space usage
390406
restart_points_type_index: Vec<RestartPoint>,
@@ -402,6 +418,7 @@ impl BlockBuilder {
402418
last_key: vec![],
403419
entry_count: 0,
404420
compression_algorithm: options.compression_algorithm,
421+
table_id: None,
405422
restart_points_type_index: Vec::default(),
406423
}
407424
}
@@ -420,15 +437,20 @@ impl BlockBuilder {
420437
///
421438
/// Panic if key is not added in ASCEND order.
422439
pub fn add(&mut self, full_key: FullKey<&[u8]>, value: &[u8]) {
440+
let input_table_id = full_key.user_key.table_id.table_id();
441+
match self.table_id {
442+
Some(current_table_id) => debug_assert_eq!(current_table_id, input_table_id),
443+
None => self.table_id = Some(input_table_id),
444+
}
423445
#[cfg(debug_assertions)]
424446
self.debug_valid();
425447

426448
let mut key: BytesMut = Default::default();
427-
full_key.encode_into(&mut key);
449+
full_key.encode_into_without_table_id(&mut key);
428450
if self.entry_count > 0 {
429451
debug_assert!(!key.is_empty());
430452
debug_assert_eq!(
431-
KeyComparator::compare_encoded_full_key(&self.last_key[..], &key),
453+
KeyComparator::compare_encoded_full_key(&self.last_key[..], &key[..]),
432454
Ordering::Less
433455
);
434456
}
@@ -462,7 +484,7 @@ impl BlockBuilder {
462484

463485
key.as_ref()
464486
} else {
465-
bytes_diff_below_max_key_length(&self.last_key, &key)
487+
bytes_diff_below_max_key_length(&self.last_key, &key[..])
466488
};
467489

468490
let prefix = KeyPrefix::new_without_len(
@@ -492,6 +514,7 @@ impl BlockBuilder {
492514
pub fn clear(&mut self) {
493515
self.buf.clear();
494516
self.restart_points.clear();
517+
self.table_id = None;
495518
self.restart_points_type_index.clear();
496519
self.last_key.clear();
497520
self.entry_count = 0;
@@ -504,6 +527,7 @@ impl BlockBuilder {
504527
+ (RestartPoint::size_of()) // (offset + len_type(u8)) * len
505528
* self.restart_points_type_index.len()
506529
+ std::mem::size_of::<u32>() // restart_points_type_index len
530+
+ std::mem::size_of::<u32>() // table_id len
507531
}
508532

509533
/// Finishes building block.
@@ -545,6 +569,7 @@ impl BlockBuilder {
545569
self.buf
546570
.put_u32_le(self.restart_points_type_index.len() as u32);
547571

572+
self.buf.put_u32_le(self.table_id.unwrap());
548573
match self.compression_algorithm {
549574
CompressionAlgorithm::None => (),
550575
CompressionAlgorithm::Lz4 => {
@@ -581,6 +606,7 @@ impl BlockBuilder {
581606
self.compression_algorithm.encode(&mut self.buf);
582607
let checksum = xxhash64_checksum(&self.buf);
583608
self.buf.put_u64_le(checksum);
609+
584610
self.buf.as_ref()
585611
}
586612

@@ -595,6 +621,7 @@ impl BlockBuilder {
595621
+ std::mem::size_of::<u32>() // restart_points_type_indics.len
596622
+ std::mem::size_of::<CompressionAlgorithm>() // compression_algorithm
597623
+ std::mem::size_of::<u64>() // checksum
624+
+ std::mem::size_of::<u32>() // table_id
598625
}
599626

600627
pub fn debug_valid(&self) {

src/storage/src/hummock/sstable/block_iterator.rs

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ use std::ops::Range;
1717

1818
use bytes::BytesMut;
1919
use risingwave_hummock_sdk::key::FullKey;
20-
use risingwave_hummock_sdk::KeyComparator;
2120

2221
use super::{KeyPrefix, LenType, RestartPoint};
2322
use crate::hummock::BlockHolder;
@@ -77,7 +76,8 @@ impl BlockIterator {
7776

7877
pub fn key(&self) -> FullKey<&[u8]> {
7978
assert!(self.is_valid());
80-
FullKey::decode(&self.key)
79+
80+
FullKey::from_slice_without_table_id(self.block.table_id(), &self.key[..])
8181
}
8282

8383
pub fn value(&self) -> &[u8] {
@@ -99,19 +99,19 @@ impl BlockIterator {
9999
}
100100

101101
pub fn seek(&mut self, key: FullKey<&[u8]>) {
102-
let full_key_encoded = key.encode();
103-
self.seek_restart_point_by_key(&full_key_encoded);
104-
self.next_until_key(&full_key_encoded);
102+
self.seek_restart_point_by_key(key);
103+
104+
self.next_until_key(key);
105105
}
106106

107107
pub fn seek_le(&mut self, key: FullKey<&[u8]>) {
108-
let full_key_encoded = key.encode();
109-
self.seek_restart_point_by_key(&full_key_encoded);
110-
self.next_until_key(&full_key_encoded);
108+
self.seek_restart_point_by_key(key);
109+
110+
self.next_until_key(key);
111111
if !self.is_valid() {
112112
self.seek_to_last();
113113
}
114-
self.prev_until_key(&full_key_encoded);
114+
self.prev_until_key(key);
115115
}
116116
}
117117

@@ -171,19 +171,15 @@ impl BlockIterator {
171171
}
172172

173173
/// Moves forward until reaching the first that equals or larger than the given `key`.
174-
fn next_until_key(&mut self, key: &[u8]) {
175-
while self.is_valid()
176-
&& KeyComparator::compare_encoded_full_key(&self.key[..], key) == Ordering::Less
177-
{
174+
fn next_until_key(&mut self, key: FullKey<&[u8]>) {
175+
while self.is_valid() && self.key().cmp(&key) == Ordering::Less {
178176
self.next_inner();
179177
}
180178
}
181179

182180
/// Moves backward until reaching the first key that equals or smaller than the given `key`.
183-
fn prev_until_key(&mut self, key: &[u8]) {
184-
while self.is_valid()
185-
&& KeyComparator::compare_encoded_full_key(&self.key[..], key) == Ordering::Greater
186-
{
181+
fn prev_until_key(&mut self, key: FullKey<&[u8]>) {
182+
while self.is_valid() && self.key().cmp(&key) == Ordering::Greater {
187183
self.prev_inner();
188184
}
189185
}
@@ -240,7 +236,7 @@ impl BlockIterator {
240236
}
241237

242238
/// Searches the restart point index that the given `key` belongs to.
243-
fn search_restart_point_index_by_key(&self, key: &[u8]) -> usize {
239+
fn search_restart_point_index_by_key(&self, key: FullKey<&[u8]>) -> usize {
244240
// Find the largest restart point that restart key equals or less than the given key.
245241
self.block
246242
.search_restart_partition_point(
@@ -252,7 +248,9 @@ impl BlockIterator {
252248
let prefix =
253249
self.decode_prefix_at(probe as usize, key_len_type, value_len_type);
254250
let probe_key = &self.block.data()[prefix.diff_key_range()];
255-
match KeyComparator::compare_encoded_full_key(probe_key, key) {
251+
let full_probe_key =
252+
FullKey::from_slice_without_table_id(self.block.table_id(), probe_key);
253+
match full_probe_key.cmp(&key) {
256254
Ordering::Less | Ordering::Equal => true,
257255
Ordering::Greater => false,
258256
}
@@ -262,7 +260,7 @@ impl BlockIterator {
262260
}
263261

264262
/// Seeks to the restart point that the given `key` belongs to.
265-
fn seek_restart_point_by_key(&mut self, key: &[u8]) {
263+
fn seek_restart_point_by_key(&mut self, key: FullKey<&[u8]>) {
266264
let index = self.search_restart_point_index_by_key(key);
267265
self.seek_restart_point_by_index(index)
268266
}

0 commit comments

Comments
 (0)