Skip to content

Commit ad6877e

Browse files
derekcollisonMauriceVanVeen
authored andcommitted
[FIXED] Don't InstallSnapshot during shutdown, would race with monitorStream/monitorConsumer (#6153)
When stopping a stream or consumer, we would attempt to install a snapshot. However, this would race with what's happening in `monitorStream`/`monitorConsumer` at that time. For example: 1. In `applyStreamEntries` we call into `mset.processJetStreamMsg` to persist one or multiple messages. 2. We call `mset.stop(..)` either before or during the above. 3. In `mset.stop(..)` we'd wait for `mset.processJetStreamMsg` to release the lock so we can enter `mset.stateSnapshotLocked()`. **We create a snapshot with new state here!** 4. Now we call into `InstallSnapshot` to persist above snapshot, but `n.applied` does not contain the right value, the value will be lower. 5. Then `applyStreamEntries` finishes and we end with calling `n.Applied(..)`. This would be a race condition depending on if 4 happened before or after 5. It's essential that the snapshot we make is aligned with the `n.applied` value. If we don't that means we'll replay and need to increase `mset.clfs` which will snowball into stream desync due to this shift. The only place where we can guarantee that the snapshot and applied are aligned is in `doSnapshot` of `monitorStream` and `monitorConsumer` (and `monitorCluster`), so we must not attempt installing snapshots outside of those. Signed-off-by: Maurice van Veen <[email protected]>
1 parent f6afc3e commit ad6877e

File tree

4 files changed

+185
-9
lines changed

4 files changed

+185
-9
lines changed

server/consumer.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5245,12 +5245,6 @@ func (o *consumer) stopWithFlags(dflag, sdflag, doSignal, advisory bool) error {
52455245
if dflag {
52465246
n.Delete()
52475247
} else {
5248-
// Try to install snapshot on clean exit
5249-
if o.store != nil && (o.retention != LimitsPolicy || n.NeedSnapshot()) {
5250-
if snap, err := o.store.EncodedState(); err == nil {
5251-
n.InstallSnapshot(snap)
5252-
}
5253-
}
52545248
n.Stop()
52555249
}
52565250
}

server/jetstream_cluster.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2405,7 +2405,6 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
24052405
// fully recovered from disk.
24062406
isRecovering := true
24072407

2408-
// Should only to be called from leader.
24092408
doSnapshot := func() {
24102409
if mset == nil || isRecovering || isRestore || time.Since(lastSnapTime) < minSnapDelta {
24112410
return

server/jetstream_cluster_4_test.go

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4118,3 +4118,188 @@ func TestJetStreamClusterConsumerDontSendSnapshotOnLeaderChange(t *testing.T) {
41184118
}
41194119
}
41204120
}
4121+
4122+
func TestJetStreamClusterDontInstallSnapshotWhenStoppingStream(t *testing.T) {
4123+
c := createJetStreamClusterExplicit(t, "R3S", 3)
4124+
defer c.shutdown()
4125+
4126+
nc, js := jsClientConnect(t, c.randomServer())
4127+
defer nc.Close()
4128+
4129+
_, err := js.AddStream(&nats.StreamConfig{
4130+
Name: "TEST",
4131+
Subjects: []string{"foo"},
4132+
Retention: nats.WorkQueuePolicy,
4133+
Replicas: 3,
4134+
})
4135+
require_NoError(t, err)
4136+
4137+
_, err = js.Publish("foo", nil)
4138+
require_NoError(t, err)
4139+
4140+
// Wait for all servers to have applied everything.
4141+
var maxApplied uint64
4142+
checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
4143+
maxApplied = 0
4144+
for _, s := range c.servers {
4145+
acc, err := s.lookupAccount(globalAccountName)
4146+
if err != nil {
4147+
return err
4148+
}
4149+
mset, err := acc.lookupStream("TEST")
4150+
if err != nil {
4151+
return err
4152+
}
4153+
_, _, applied := mset.node.Progress()
4154+
if maxApplied == 0 {
4155+
maxApplied = applied
4156+
} else if applied < maxApplied {
4157+
return fmt.Errorf("applied not high enough, expected %d, got %d", applied, maxApplied)
4158+
} else if applied > maxApplied {
4159+
return fmt.Errorf("applied higher on one server, expected %d, got %d", applied, maxApplied)
4160+
}
4161+
}
4162+
return nil
4163+
})
4164+
4165+
// Install a snapshot on a follower.
4166+
s := c.randomNonStreamLeader(globalAccountName, "TEST")
4167+
acc, err := s.lookupAccount(globalAccountName)
4168+
require_NoError(t, err)
4169+
mset, err := acc.lookupStream("TEST")
4170+
require_NoError(t, err)
4171+
err = mset.node.InstallSnapshot(mset.stateSnapshotLocked())
4172+
require_NoError(t, err)
4173+
4174+
// Validate the snapshot reflects applied.
4175+
validateStreamState := func(snap *snapshot) {
4176+
t.Helper()
4177+
require_Equal(t, snap.lastIndex, maxApplied)
4178+
ss, err := DecodeStreamState(snap.data)
4179+
require_NoError(t, err)
4180+
require_Equal(t, ss.FirstSeq, 1)
4181+
require_Equal(t, ss.LastSeq, 1)
4182+
}
4183+
snap, err := mset.node.(*raft).loadLastSnapshot()
4184+
require_NoError(t, err)
4185+
validateStreamState(snap)
4186+
4187+
// Simulate a message being stored, but not calling Applied yet.
4188+
err = mset.processJetStreamMsg("foo", _EMPTY_, nil, nil, 1, time.Now().UnixNano(), nil)
4189+
require_NoError(t, err)
4190+
4191+
// Simulate the stream being stopped before we're able to call Applied.
4192+
// If we'd install a snapshot during this, which would be a race condition,
4193+
// we'd store a snapshot with state that's ahead of applied.
4194+
err = mset.stop(false, false)
4195+
require_NoError(t, err)
4196+
4197+
// Validate the snapshot is the same as before.
4198+
snap, err = mset.node.(*raft).loadLastSnapshot()
4199+
require_NoError(t, err)
4200+
validateStreamState(snap)
4201+
}
4202+
4203+
func TestJetStreamClusterDontInstallSnapshotWhenStoppingConsumer(t *testing.T) {
4204+
c := createJetStreamClusterExplicit(t, "R3S", 3)
4205+
defer c.shutdown()
4206+
4207+
nc, js := jsClientConnect(t, c.randomServer())
4208+
defer nc.Close()
4209+
4210+
_, err := js.AddStream(&nats.StreamConfig{
4211+
Name: "TEST",
4212+
Subjects: []string{"foo"},
4213+
Retention: nats.WorkQueuePolicy,
4214+
Replicas: 3,
4215+
})
4216+
require_NoError(t, err)
4217+
4218+
_, err = js.AddConsumer("TEST", &nats.ConsumerConfig{
4219+
Durable: "CONSUMER",
4220+
Replicas: 3,
4221+
AckPolicy: nats.AckExplicitPolicy,
4222+
})
4223+
require_NoError(t, err)
4224+
4225+
// Add a message and let the consumer ack it, this moves the consumer's RAFT applied up.
4226+
_, err = js.Publish("foo", nil)
4227+
require_NoError(t, err)
4228+
sub, err := js.PullSubscribe("foo", "CONSUMER")
4229+
require_NoError(t, err)
4230+
msgs, err := sub.Fetch(1)
4231+
require_NoError(t, err)
4232+
require_Len(t, len(msgs), 1)
4233+
err = msgs[0].AckSync()
4234+
require_NoError(t, err)
4235+
4236+
// Wait for all servers to have applied everything.
4237+
var maxApplied uint64
4238+
checkFor(t, 5*time.Second, 100*time.Millisecond, func() error {
4239+
maxApplied = 0
4240+
for _, s := range c.servers {
4241+
acc, err := s.lookupAccount(globalAccountName)
4242+
if err != nil {
4243+
return err
4244+
}
4245+
mset, err := acc.lookupStream("TEST")
4246+
if err != nil {
4247+
return err
4248+
}
4249+
o := mset.lookupConsumer("CONSUMER")
4250+
if o == nil {
4251+
return errors.New("consumer not found")
4252+
}
4253+
_, _, applied := o.node.Progress()
4254+
if maxApplied == 0 {
4255+
maxApplied = applied
4256+
} else if applied < maxApplied {
4257+
return fmt.Errorf("applied not high enough, expected %d, got %d", applied, maxApplied)
4258+
} else if applied > maxApplied {
4259+
return fmt.Errorf("applied higher on one server, expected %d, got %d", applied, maxApplied)
4260+
}
4261+
}
4262+
return nil
4263+
})
4264+
4265+
// Install a snapshot on a follower.
4266+
s := c.randomNonStreamLeader(globalAccountName, "TEST")
4267+
acc, err := s.lookupAccount(globalAccountName)
4268+
require_NoError(t, err)
4269+
mset, err := acc.lookupStream("TEST")
4270+
require_NoError(t, err)
4271+
o := mset.lookupConsumer("CONSUMER")
4272+
require_NotNil(t, o)
4273+
snapBytes, err := o.store.EncodedState()
4274+
require_NoError(t, err)
4275+
err = o.node.InstallSnapshot(snapBytes)
4276+
require_NoError(t, err)
4277+
4278+
// Validate the snapshot reflects applied.
4279+
validateStreamState := func(snap *snapshot) {
4280+
t.Helper()
4281+
require_Equal(t, snap.lastIndex, maxApplied)
4282+
state, err := decodeConsumerState(snap.data)
4283+
require_NoError(t, err)
4284+
require_Equal(t, state.Delivered.Consumer, 1)
4285+
require_Equal(t, state.Delivered.Stream, 1)
4286+
}
4287+
snap, err := o.node.(*raft).loadLastSnapshot()
4288+
require_NoError(t, err)
4289+
validateStreamState(snap)
4290+
4291+
// Simulate a message being delivered, but not calling Applied yet.
4292+
err = o.store.UpdateDelivered(2, 2, 1, time.Now().UnixNano())
4293+
require_NoError(t, err)
4294+
4295+
// Simulate the consumer being stopped before we're able to call Applied.
4296+
// If we'd install a snapshot during this, which would be a race condition,
4297+
// we'd store a snapshot with state that's ahead of applied.
4298+
err = o.stop()
4299+
require_NoError(t, err)
4300+
4301+
// Validate the snapshot is the same as before.
4302+
snap, err = o.node.(*raft).loadLastSnapshot()
4303+
require_NoError(t, err)
4304+
validateStreamState(snap)
4305+
}

server/stream.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5183,8 +5183,6 @@ func (mset *stream) stop(deleteFlag, advisory bool) error {
51835183
n.Delete()
51845184
sa = mset.sa
51855185
} else {
5186-
// Always attempt snapshot on clean exit.
5187-
n.InstallSnapshot(mset.stateSnapshotLocked())
51885186
n.Stop()
51895187
}
51905188
}

0 commit comments

Comments
 (0)