Skip to content

Commit 8c5489e

Browse files
authored
fix(meta): correctly resolve update of vnode mapping after scaling (#8652)
Signed-off-by: Bugen Zhao <[email protected]>
1 parent cc6e687 commit 8c5489e

File tree

4 files changed

+47
-33
lines changed

4 files changed

+47
-33
lines changed

src/common/src/hash/consistent_hash/mapping.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -268,10 +268,10 @@ pub type ExpandedParallelUnitMapping = ExpandedMapping<marker::ParallelUnit>;
268268

269269
impl ActorMapping {
270270
/// Transform this actor mapping to a parallel unit mapping, essentially `transform`.
271-
pub fn to_parallel_unit(
272-
&self,
273-
to_map: &HashMap<ActorId, ParallelUnitId>,
274-
) -> ParallelUnitMapping {
271+
pub fn to_parallel_unit<M>(&self, to_map: &M) -> ParallelUnitMapping
272+
where
273+
M: for<'a> Index<&'a ActorId, Output = ParallelUnitId>,
274+
{
275275
self.transform(to_map)
276276
}
277277

src/meta/src/barrier/command.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ pub struct Reschedule {
5858
/// The upstream fragments of this fragment, and the dispatchers that should be updated.
5959
pub upstream_fragment_dispatcher_ids: Vec<(FragmentId, DispatcherId)>,
6060
/// New hash mapping of the upstream dispatcher to be updated.
61+
///
62+
/// This field exists only when there's upstream fragment and the current fragment is
63+
/// hash-sharded.
6164
pub upstream_dispatcher_mapping: Option<ActorMapping>,
6265

6366
/// The downstream fragments of this fragment.

src/meta/src/manager/catalog/fragment.rs

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ use std::sync::Arc;
1818

1919
use anyhow::{anyhow, Context};
2020
use itertools::Itertools;
21+
use risingwave_common::buffer::Bitmap;
2122
use risingwave_common::catalog::TableId;
22-
use risingwave_common::hash::ParallelUnitId;
23+
use risingwave_common::hash::{ActorMapping, ParallelUnitMapping};
2324
use risingwave_common::{bail, try_match_expand};
2425
use risingwave_connector::source::SplitImpl;
2526
use risingwave_pb::common::{ParallelUnit, WorkerNode};
@@ -736,44 +737,54 @@ where
736737
let actor_status = table_fragment.actor_status.clone();
737738
let fragment = table_fragment.fragments.get_mut(&fragment_id).unwrap();
738739

740+
fragment
741+
.actors
742+
.retain(|a| !removed_actor_ids.contains(&a.actor_id));
743+
739744
// update vnode mapping for actors.
740745
for actor in &mut fragment.actors {
741746
if let Some(bitmap) = vnode_bitmap_updates.get(&actor.actor_id) {
742747
actor.vnode_bitmap = Some(bitmap.to_protobuf());
743748
}
744749
}
745750

746-
fragment
747-
.actors
748-
.retain(|a| !removed_actor_ids.contains(&a.actor_id));
749-
750751
// update fragment's vnode mapping
751-
if let Some(vnode_mapping) = fragment.vnode_mapping.as_mut() {
752-
let mut actor_to_parallel_unit = HashMap::with_capacity(fragment.actors.len());
753-
for actor in &fragment.actors {
754-
if let Some(actor_status) = actor_status.get(&actor.actor_id) {
755-
if let Some(parallel_unit) = actor_status.parallel_unit.as_ref() {
756-
actor_to_parallel_unit.insert(
757-
actor.actor_id as ActorId,
758-
parallel_unit.id as ParallelUnitId,
759-
);
760-
}
761-
}
752+
let mut actor_to_parallel_unit = HashMap::with_capacity(fragment.actors.len());
753+
let mut actor_to_vnode_bitmap = HashMap::with_capacity(fragment.actors.len());
754+
for actor in &fragment.actors {
755+
let actor_status = &actor_status[&actor.actor_id];
756+
let parallel_unit_id = actor_status.parallel_unit.as_ref().unwrap().id;
757+
actor_to_parallel_unit.insert(actor.actor_id, parallel_unit_id);
758+
759+
if let Some(vnode_bitmap) = &actor.vnode_bitmap {
760+
let bitmap = Bitmap::from(vnode_bitmap);
761+
actor_to_vnode_bitmap.insert(actor.actor_id, bitmap);
762762
}
763+
}
763764

764-
if let Some(actor_mapping) = upstream_dispatcher_mapping.as_ref() {
765-
*vnode_mapping = actor_mapping
766-
.to_parallel_unit(&actor_to_parallel_unit)
767-
.to_protobuf();
768-
}
765+
let vnode_mapping = if actor_to_vnode_bitmap.is_empty() {
766+
// If there's no `vnode_bitmap`, then the fragment must be a singleton fragment.
767+
// We directly use the single parallel unit to construct the mapping.
768+
// TODO: also fill `vnode_bitmap` for the actor of singleton fragment so that we
769+
// don't need this branch.
770+
let parallel_unit = *actor_to_parallel_unit.values().exactly_one().unwrap();
771+
ParallelUnitMapping::new_single(parallel_unit)
772+
} else {
773+
// Generate the parallel unit mapping from the fragment's actor bitmaps.
774+
assert_eq!(actor_to_vnode_bitmap.len(), actor_to_parallel_unit.len());
775+
ActorMapping::from_bitmaps(&actor_to_vnode_bitmap)
776+
.to_parallel_unit(&actor_to_parallel_unit)
777+
}
778+
.to_protobuf();
769779

770-
if !fragment.state_table_ids.is_empty() {
771-
let fragment_mapping = FragmentParallelUnitMapping {
772-
fragment_id: fragment_id as FragmentId,
773-
mapping: Some(vnode_mapping.clone()),
774-
};
775-
fragment_mapping_to_notify.push(fragment_mapping);
776-
}
780+
*fragment.vnode_mapping.as_mut().unwrap() = vnode_mapping.clone();
781+
782+
if !fragment.state_table_ids.is_empty() {
783+
let fragment_mapping = FragmentParallelUnitMapping {
784+
fragment_id: fragment_id as FragmentId,
785+
mapping: Some(vnode_mapping),
786+
};
787+
fragment_mapping_to_notify.push(fragment_mapping);
777788
}
778789

779790
// Second step, update upstream fragments

src/meta/src/stream/test_scale.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ mod tests {
143143
for parallel_unit_num in simulated_parallel_unit_nums(None, None) {
144144
let (actor_mapping, _) = generate_actor_mapping(parallel_unit_num);
145145

146-
let actor_to_parallel_unit_map = (0..parallel_unit_num)
146+
let actor_to_parallel_unit_map: HashMap<_, _> = (0..parallel_unit_num)
147147
.map(|i| (i as ActorId, i as ParallelUnitId))
148148
.collect();
149149
let parallel_unit_mapping = actor_mapping.to_parallel_unit(&actor_to_parallel_unit_map);

0 commit comments

Comments
 (0)