From 5771d2f2befddcd3a8055badc27b0244d434b58d Mon Sep 17 00:00:00 2001 From: Ken Haines Date: Fri, 3 Apr 2020 21:24:17 -0700 Subject: [PATCH 1/4] When executing updateConsul for an existing ingester, ensure to write the zone on the updated record Signed-off-by: Ken Haines --- pkg/ring/lifecycler.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/ring/lifecycler.go b/pkg/ring/lifecycler.go index 72b975bd7f1..40cddfd1941 100644 --- a/pkg/ring/lifecycler.go +++ b/pkg/ring/lifecycler.go @@ -667,6 +667,7 @@ func (i *Lifecycler) updateConsul(ctx context.Context) error { ingesterDesc.Timestamp = time.Now().Unix() ingesterDesc.State = i.GetState() ingesterDesc.Addr = i.Addr + ingesterDesc.Zone = i.Zone ringDesc.Ingesters[i.ID] = ingesterDesc } From df2fc9b678c839b7d59b2fed304bec10b9d6a48a Mon Sep 17 00:00:00 2001 From: Ken Haines Date: Fri, 3 Apr 2020 22:10:34 -0700 Subject: [PATCH 2/4] adding unit test checking that the zone value is updated when an external actor clears it Signed-off-by: Ken Haines --- pkg/ring/lifecycler_test.go | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/pkg/ring/lifecycler_test.go b/pkg/ring/lifecycler_test.go index faee4c9bf4c..182fd54173e 100644 --- a/pkg/ring/lifecycler_test.go +++ b/pkg/ring/lifecycler_test.go @@ -37,6 +37,7 @@ func testLifecyclerConfig(ringConfig Config, id string) LifecyclerConfig { lifecyclerConfig.RingConfig = ringConfig lifecyclerConfig.NumTokens = 1 lifecyclerConfig.ID = id + lifecyclerConfig.Zone = "zone1" lifecyclerConfig.FinalSleep = 0 lifecyclerConfig.HeartbeatPeriod = 100 * time.Millisecond @@ -390,3 +391,57 @@ func TestJoinInLeavingState(t *testing.T) { len(desc.Ingesters["ing2"].Tokens) == 2 }) } + +func TestRestoreOfZoneWhenOverwritten(t *testing.T) { + // This test is simulating a case during upgrade of pre 1.0 cortex where + // older ingesters do not have the zone field in their ring structs + // so it gets removed. The current version of the lifecylcer should + // write it back on update during its next heartbeat. + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + codec := GetCodec() + ringConfig.KVStore.Mock = consul.NewInMemoryClient(codec) + + r, err := New(ringConfig, "ingester", IngesterRingKey) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), r)) + defer services.StopAndAwaitTerminated(context.Background(), r) //nolint:errcheck + + cfg := testLifecyclerConfig(ringConfig, "ing1") + + // Set ing1 to not have a zone + err = r.KVClient.CAS(context.Background(), IngesterRingKey, func(in interface{}) (interface{}, bool, error) { + r := &Desc{ + Ingesters: map[string]IngesterDesc{ + "ing1": { + State: ACTIVE, + Addr: "0.0.0.0", + Tokens: []uint32{1, 4}, + }, + "ing2": { + Tokens: []uint32{2, 3}, + }, + }, + } + + return r, true, nil + }) + require.NoError(t, err) + + l1, err := NewLifecycler(cfg, &nopFlushTransferer{}, "ingester", IngesterRingKey, true) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), l1)) + + // Check that the lifecycler was able to reset the zone value to the expected setting + test.Poll(t, 1000*time.Millisecond, true, func() interface{} { + d, err := r.KVClient.Get(context.Background(), IngesterRingKey) + require.NoError(t, err) + desc, ok := d.(*Desc) + return ok && + len(desc.Ingesters) == 2 && + desc.Ingesters["ing1"].Zone == l1.Zone && + desc.Ingesters["ing2"].Zone == "" + + }) +} From ccea87e34c9eda72f6c9b65a400d7c262a0ee167 Mon Sep 17 00:00:00 2001 From: Ken Haines Date: Fri, 3 Apr 2020 22:38:38 -0700 Subject: [PATCH 3/4] updating CHANGELOG.md Signed-off-by: Ken Haines --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c89ac904fd..44dd4bef27e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ * [BUGFIX] Experimental TSDB: fixed chunk data corruption when querying back series using the experimental blocks storage. #2400 * [BUGFIX] Cassandra Storage: Fix endpoint TLS host verification. #2109 * [BUGFIX] Experimental TSDB: fixed response status code from `422` to `500` when an error occurs while iterating chunks with the experimental blocks storage. #2402 +* [BUGFIX] Ring: Fixed a situation where upgrading from pre-1.0 cortex with a rolling strategy caused new 1.0 ingesters to lose their zone value in the ring until manually forced to re-register. #2404 ## 1.0.0 / 2020-04-02 From 7c2b451e6d53a9de35059e9504773ffe0de99acf Mon Sep 17 00:00:00 2001 From: Ken Haines Date: Mon, 6 Apr 2020 07:32:47 -0700 Subject: [PATCH 4/4] removing internal default 'no-zone' value of lifecyler.id and leaving a blank string Signed-off-by: Ken Haines --- pkg/ring/lifecycler.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/ring/lifecycler.go b/pkg/ring/lifecycler.go index 40cddfd1941..85b14bd9c9d 100644 --- a/pkg/ring/lifecycler.go +++ b/pkg/ring/lifecycler.go @@ -162,10 +162,6 @@ func NewLifecycler(cfg LifecyclerConfig, flushTransferer FlushTransferer, ringNa util.WarnExperimentalUse("Zone aware replication") } - if zone == "" { - zone = cfg.ID - } - // We do allow a nil FlushTransferer, but to keep the ring logic easier we assume // it's always set, so we use a noop FlushTransferer if flushTransferer == nil {