Skip to content

Commit 9f44086

Browse files
committed
Fix deletion to use correct user ID listing and add unit test.
Signed-off-by: Steve Simpson <[email protected]>
1 parent 7304ba0 commit 9f44086

File tree

2 files changed

+42
-26
lines changed

2 files changed

+42
-26
lines changed

pkg/alertmanager/multitenant.go

+8-9
Original file line numberDiff line numberDiff line change
@@ -704,22 +704,21 @@ func (am *MultitenantAlertmanager) stopping(_ error) error {
704704
}
705705

706706
// loadAlertmanagerConfigs Loads (and filters) the alertmanagers configuration from object storage, taking into consideration the sharding strategy.
707+
// Returns the list of all users with a configuration, and the set of users which should be configured in this instance.
707708
func (am *MultitenantAlertmanager) loadAlertmanagerConfigs(ctx context.Context) ([]string, map[string]alertspb.AlertConfigDesc, error) {
708709
// Find all users with an alertmanager config.
709-
userIDs, err := am.store.ListAllUsers(ctx)
710+
allUserIDs, err := am.store.ListAllUsers(ctx)
710711
if err != nil {
711712
return nil, nil, errors.Wrap(err, "failed to list users with alertmanager configuration")
712713
}
713-
numUsersDiscovered := len(userIDs)
714+
numUsersDiscovered := len(allUserIDs)
715+
userIDs := make([]string, 0, len(allUserIDs))
714716

715717
// Filter out users not owned by this shard.
716-
for i := 0; i < len(userIDs); {
717-
if !am.isUserOwned(userIDs[i]) {
718-
userIDs = append(userIDs[:i], userIDs[i+1:]...)
719-
continue
718+
for _, userID := range allUserIDs {
719+
if am.isUserOwned(userID) {
720+
userIDs = append(userIDs, userID)
720721
}
721-
722-
i++
723722
}
724723
numUsersOwned := len(userIDs)
725724

@@ -731,7 +730,7 @@ func (am *MultitenantAlertmanager) loadAlertmanagerConfigs(ctx context.Context)
731730

732731
am.tenantsDiscovered.Set(float64(numUsersDiscovered))
733732
am.tenantsOwned.Set(float64(numUsersOwned))
734-
return userIDs, configs, nil
733+
return allUserIDs, configs, nil
735734
}
736735

737736
func (am *MultitenantAlertmanager) isUserOwned(userID string) bool {

pkg/alertmanager/multitenant_test.go

+34-17
Original file line numberDiff line numberDiff line change
@@ -581,23 +581,34 @@ func TestMultitenantAlertmanager_deleteUnusedRemoteUserState(t *testing.T) {
581581

582582
alertStore := prepareInMemoryAlertStore()
583583
ringStore := consul.NewInMemoryClient(ring.GetCodec())
584-
reg := prometheus.NewPedanticRegistry()
585-
cfg := mockAlertmanagerConfig(t)
586584

587-
cfg.ShardingRing.ReplicationFactor = 1
588-
cfg.ShardingRing.InstanceID = "instance"
589-
cfg.ShardingRing.InstanceAddr = "127.0.0.1"
590-
cfg.ShardingEnabled = true
585+
createInstance := func(i int) *MultitenantAlertmanager {
586+
reg := prometheus.NewPedanticRegistry()
587+
cfg := mockAlertmanagerConfig(t)
591588

592-
// Increase state write interval so that state gets written sooner, making test faster.
593-
cfg.Persister.Interval = 500 * time.Millisecond
589+
cfg.ShardingRing.ReplicationFactor = 1
590+
cfg.ShardingRing.InstanceID = fmt.Sprintf("instance-%d", i)
591+
cfg.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.1-%d", i)
592+
cfg.ShardingEnabled = true
594593

595-
am, err := createMultitenantAlertmanager(cfg, nil, nil, alertStore, ringStore, nil, log.NewNopLogger(), reg)
596-
require.NoError(t, err)
597-
t.Cleanup(func() {
598-
require.NoError(t, services.StopAndAwaitTerminated(ctx, am))
599-
})
600-
require.NoError(t, services.StartAndAwaitRunning(ctx, am))
594+
// Increase state write interval so that state gets written sooner, making test faster.
595+
cfg.Persister.Interval = 500 * time.Millisecond
596+
597+
am, err := createMultitenantAlertmanager(cfg, nil, nil, alertStore, ringStore, nil, log.NewLogfmtLogger(os.Stdout), reg)
598+
require.NoError(t, err)
599+
t.Cleanup(func() {
600+
require.NoError(t, services.StopAndAwaitTerminated(ctx, am))
601+
})
602+
require.NoError(t, services.StartAndAwaitRunning(ctx, am))
603+
604+
return am
605+
}
606+
607+
// Create two instances. With replication factor of 1, this means that only one
608+
// of the instances will own the user. This tests that an instance does not delete
609+
// state for users that are configured, but are owned by other instances.
610+
am1 := createInstance(1)
611+
am2 := createInstance(2)
601612

602613
// Configure the users and wait for the state persister to write some state for both.
603614
{
@@ -612,7 +623,9 @@ func TestMultitenantAlertmanager_deleteUnusedRemoteUserState(t *testing.T) {
612623
Templates: []*alertspb.TemplateDesc{},
613624
}))
614625

615-
err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
626+
err := am1.loadAndSyncConfigs(context.Background(), reasonPeriodic)
627+
require.NoError(t, err)
628+
err = am2.loadAndSyncConfigs(context.Background(), reasonPeriodic)
616629
require.NoError(t, err)
617630

618631
require.Eventually(t, func() bool {
@@ -624,7 +637,9 @@ func TestMultitenantAlertmanager_deleteUnusedRemoteUserState(t *testing.T) {
624637

625638
// Perform another sync to trigger cleanup; this should have no effect.
626639
{
627-
err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
640+
err := am1.loadAndSyncConfigs(context.Background(), reasonPeriodic)
641+
require.NoError(t, err)
642+
err = am2.loadAndSyncConfigs(context.Background(), reasonPeriodic)
628643
require.NoError(t, err)
629644

630645
_, err = alertStore.GetFullState(context.Background(), user1)
@@ -637,7 +652,9 @@ func TestMultitenantAlertmanager_deleteUnusedRemoteUserState(t *testing.T) {
637652
{
638653
require.NoError(t, alertStore.DeleteAlertConfig(ctx, user1))
639654

640-
err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
655+
err := am1.loadAndSyncConfigs(context.Background(), reasonPeriodic)
656+
require.NoError(t, err)
657+
err = am2.loadAndSyncConfigs(context.Background(), reasonPeriodic)
641658
require.NoError(t, err)
642659

643660
_, err = alertStore.GetFullState(context.Background(), user1)

0 commit comments

Comments
 (0)