Skip to content

Commit 789ef71

Browse files
authored
receive: Fixed small options race; Removed unused StartTime feature. (#2816)
startTimeMargin and StartTime is used only by Prometheus remote read, Thanos does not use it. Fixed following race: ``` === RUN TestMultiTSDB/run_on_existing_storage ================== WARNING: DATA RACE Read at 0x00c00073ae80 by goroutine 69: github.com/prometheus/prometheus/tsdb.validateOpts() /home/bwplotka/Repos/thanosgopath/pkg/mod/github.com/prometheus/[email protected]/tsdb/db.go:510 +0x55 github.com/prometheus/prometheus/tsdb.Open() /home/bwplotka/Repos/thanosgopath/pkg/mod/github.com/prometheus/[email protected]/tsdb/db.go:502 +0x61 github.com/thanos-io/thanos/pkg/receive.(*MultiTSDB).getOrLoadTenant.func1() /home/bwplotka/Repos/thanos/pkg/receive/multitsdb.go:268 +0x56b github.com/thanos-io/thanos/pkg/receive.(*MultiTSDB).getOrLoadTenant() /home/bwplotka/Repos/thanos/pkg/receive/multitsdb.go:302 +0x4ef github.com/thanos-io/thanos/pkg/receive.(*MultiTSDB).Open.func1() /home/bwplotka/Repos/thanos/pkg/receive/multitsdb.go:142 +0x66 golang.org/x/sync/errgroup.(*Group).Go.func1() /home/bwplotka/Repos/thanosgopath/pkg/mod/golang.org/x/[email protected]/errgroup/errgroup.go:57 +0x85 Previous write at 0x00c00073ae80 by goroutine 57: github.com/prometheus/prometheus/tsdb.validateOpts() /home/bwplotka/Repos/thanosgopath/pkg/mod/github.com/prometheus/[email protected]/tsdb/db.go:511 +0x1f2 github.com/prometheus/prometheus/tsdb.Open() /home/bwplotka/Repos/thanosgopath/pkg/mod/github.com/prometheus/[email protected]/tsdb/db.go:502 +0x61 github.com/thanos-io/thanos/pkg/receive.(*MultiTSDB).getOrLoadTenant.func1() /home/bwplotka/Repos/thanos/pkg/receive/multitsdb.go:268 +0x56b github.com/thanos-io/thanos/pkg/receive.(*MultiTSDB).getOrLoadTenant() /home/bwplotka/Repos/thanos/pkg/receive/multitsdb.go:302 +0x4ef github.com/thanos-io/thanos/pkg/receive.(*MultiTSDB).Open.func1() /home/bwplotka/Repos/thanos/pkg/receive/multitsdb.go:142 +0x66 golang.org/x/sync/errgroup.(*Group).Go.func1() /home/bwplotka/Repos/thanosgopath/pkg/mod/golang.org/x/[email protected]/errgroup/errgroup.go:57 +0x85 Goroutine 69 (running) created at: golang.org/x/sync/errgroup.(*Group).Go() /home/bwplotka/Repos/thanosgopath/pkg/mod/golang.org/x/[email protected]/errgroup/errgroup.go:54 +0x73 github.com/thanos-io/thanos/pkg/receive.(*MultiTSDB).Open() /home/bwplotka/Repos/thanos/pkg/receive/multitsdb.go:141 +0x2af github.com/thanos-io/thanos/pkg/receive.TestMultiTSDB.func3() /home/bwplotka/Repos/thanos/pkg/receive/multitsdb_test.go:118 +0x6d3 testing.tRunner() /home/bwplotka/.gvm/gos/go1.14.2/src/testing/testing.go:991 +0x1eb Goroutine 57 (running) created at: golang.org/x/sync/errgroup.(*Group).Go() /home/bwplotka/Repos/thanosgopath/pkg/mod/golang.org/x/[email protected]/errgroup/errgroup.go:54 +0x73 github.com/thanos-io/thanos/pkg/receive.(*MultiTSDB).Open() /home/bwplotka/Repos/thanos/pkg/receive/multitsdb.go:141 +0x2af github.com/thanos-io/thanos/pkg/receive.TestMultiTSDB.func3() /home/bwplotka/Repos/thanos/pkg/receive/multitsdb_test.go:118 +0x6d3 testing.tRunner() /home/bwplotka/.gvm/gos/go1.14.2/src/testing/testing.go:991 +0x1eb ================== ``` Signed-off-by: Bartlomiej Plotka <[email protected]>
1 parent e24ce43 commit 789ef71

File tree

1 file changed

+58
-84
lines changed

1 file changed

+58
-84
lines changed

pkg/receive/multitsdb.go

+58-84
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,18 @@ import (
99
"os"
1010
"path"
1111
"sync"
12-
"time"
1312

1413
"github.com/go-kit/kit/log"
1514
"github.com/go-kit/kit/log/level"
1615
"github.com/pkg/errors"
1716
"github.com/prometheus/client_golang/prometheus"
18-
"github.com/prometheus/common/model"
1917
"github.com/prometheus/prometheus/pkg/labels"
2018
"github.com/prometheus/prometheus/storage"
2119
"github.com/prometheus/prometheus/tsdb"
2220
terrors "github.com/prometheus/prometheus/tsdb/errors"
2321
"github.com/thanos-io/thanos/pkg/block/metadata"
2422
"github.com/thanos-io/thanos/pkg/component"
2523
"github.com/thanos-io/thanos/pkg/objstore"
26-
"github.com/thanos-io/thanos/pkg/runutil"
2724
"github.com/thanos-io/thanos/pkg/shipper"
2825
"github.com/thanos-io/thanos/pkg/store"
2926
"golang.org/x/sync/errgroup"
@@ -72,8 +69,6 @@ func NewMultiTSDB(
7269
}
7370

7471
type tenant struct {
75-
tsdbOpts *tsdb.Options
76-
7772
readyS *ReadyStorage
7873
tsdb *tsdb.DB
7974
storeTSDB *store.TSDBStore
@@ -82,11 +77,10 @@ type tenant struct {
8277
mtx *sync.RWMutex
8378
}
8479

85-
func newTenant(tsdbOpts *tsdb.Options) *tenant {
80+
func newTenant() *tenant {
8681
return &tenant{
87-
tsdbOpts: tsdbOpts,
88-
readyS: &ReadyStorage{},
89-
mtx: &sync.RWMutex{},
82+
readyS: &ReadyStorage{},
83+
mtx: &sync.RWMutex{},
9084
}
9185
}
9286

@@ -113,7 +107,7 @@ func (t *tenant) db() *tsdb.DB {
113107
}
114108

115109
func (t *tenant) set(storeTSDB *store.TSDBStore, tenantTSDB *tsdb.DB, ship *shipper.Shipper) {
116-
t.readyS.Set(tenantTSDB, int64(2*time.Duration(t.tsdbOpts.MinBlockDuration).Seconds()*1000))
110+
t.readyS.Set(tenantTSDB)
117111
t.mtx.Lock()
118112
t.tsdb = tenantTSDB
119113
t.storeTSDB = storeTSDB
@@ -220,6 +214,47 @@ func (t *MultiTSDB) TSDBStores() map[string]*store.TSDBStore {
220214
return res
221215
}
222216

217+
func (t *MultiTSDB) startTSDB(logger log.Logger, tenantID string, tenant *tenant) error {
218+
reg := prometheus.WrapRegistererWith(prometheus.Labels{"tenant": tenantID}, t.reg)
219+
lbls := append(t.labels, labels.Label{Name: t.tenantLabelName, Value: tenantID})
220+
dataDir := path.Join(t.dataDir, tenantID)
221+
222+
opts := *t.tsdbOpts
223+
s, err := tsdb.Open(
224+
dataDir,
225+
logger,
226+
&UnRegisterer{Registerer: reg},
227+
&opts,
228+
)
229+
if err != nil {
230+
t.mtx.Lock()
231+
delete(t.tenants, tenantID)
232+
t.mtx.Unlock()
233+
return err
234+
}
235+
236+
var ship *shipper.Shipper
237+
if t.bucket != nil {
238+
ship = shipper.New(
239+
logger,
240+
reg,
241+
dataDir,
242+
t.bucket,
243+
func() labels.Labels { return lbls },
244+
metadata.ReceiveSource,
245+
t.allowOutOfOrderUpload,
246+
)
247+
}
248+
tenant.set(store.NewTSDBStore(
249+
logger,
250+
reg,
251+
s,
252+
component.Receive,
253+
lbls,
254+
), s, ship)
255+
256+
return nil
257+
}
223258
func (t *MultiTSDB) getOrLoadTenant(tenantID string, blockingStart bool) (*tenant, error) {
224259
// Fast path, as creating tenants is a very rare operation.
225260
t.mtx.RLock()
@@ -239,68 +274,20 @@ func (t *MultiTSDB) getOrLoadTenant(tenantID string, blockingStart bool) (*tenan
239274
return tenant, nil
240275
}
241276

242-
tenant = newTenant(t.tsdbOpts)
277+
tenant = newTenant()
243278
t.tenants[tenantID] = tenant
244279
t.mtx.Unlock()
245280

246-
var err error
247-
startTSDB := func() {
248-
reg := prometheus.WrapRegistererWith(prometheus.Labels{
249-
"tenant": tenantID,
250-
}, t.reg)
251-
logger := log.With(t.logger, "tenant", tenantID)
252-
lbls := append(t.labels, labels.Label{Name: t.tenantLabelName, Value: tenantID})
253-
dataDir := path.Join(t.dataDir, tenantID)
254-
255-
var ship *shipper.Shipper
256-
if t.bucket != nil {
257-
ship = shipper.New(
258-
logger,
259-
reg,
260-
dataDir,
261-
t.bucket,
262-
func() labels.Labels { return lbls },
263-
metadata.ReceiveSource,
264-
t.allowOutOfOrderUpload,
265-
)
266-
}
267-
268-
s, err := tsdb.Open(
269-
dataDir,
270-
logger,
271-
&UnRegisterer{Registerer: reg},
272-
t.tsdbOpts,
273-
)
274-
275-
// Assign to outer error to report in blocking case.
276-
if err != nil {
277-
level.Error(logger).Log("msg", "failed to open tsdb", "err", err)
278-
t.mtx.Lock()
279-
delete(t.tenants, tenantID)
280-
t.mtx.Unlock()
281-
runutil.CloseWithLogOnErr(logger, s, "failed to close tsdb")
282-
return
283-
}
284-
285-
tenant.set(
286-
store.NewTSDBStore(
287-
logger,
288-
reg,
289-
s,
290-
component.Receive,
291-
lbls,
292-
),
293-
s,
294-
ship,
295-
)
296-
}
281+
logger := log.With(t.logger, "tenant", tenantID)
297282
if !blockingStart {
298-
go startTSDB()
283+
go func() {
284+
if err := t.startTSDB(logger, tenantID, tenant); err != nil {
285+
level.Error(logger).Log("msg", "failed to start tsdb asynchronously", "err", err)
286+
}
287+
}()
299288
return tenant, nil
300289
}
301-
302-
startTSDB()
303-
return tenant, err
290+
return tenant, t.startTSDB(logger, tenantID, tenant)
304291
}
305292

306293
func (t *MultiTSDB) TenantAppendable(tenantID string) (Appendable, error) {
@@ -323,11 +310,11 @@ type ReadyStorage struct {
323310
}
324311

325312
// Set the storage.
326-
func (s *ReadyStorage) Set(db *tsdb.DB, startTimeMargin int64) {
313+
func (s *ReadyStorage) Set(db *tsdb.DB) {
327314
s.mtx.Lock()
328315
defer s.mtx.Unlock()
329316

330-
s.a = &adapter{db: db, startTimeMargin: startTimeMargin}
317+
s.a = &adapter{db: db}
331318
}
332319

333320
// Get the storage.
@@ -347,10 +334,7 @@ func (s *ReadyStorage) get() *adapter {
347334

348335
// StartTime implements the Storage interface.
349336
func (s *ReadyStorage) StartTime() (int64, error) {
350-
if x := s.get(); x != nil {
351-
return x.StartTime()
352-
}
353-
return int64(model.Latest), ErrNotReady
337+
return 0, errors.New("not implemented")
354338
}
355339

356340
// Querier implements the Storage interface.
@@ -379,22 +363,12 @@ func (s *ReadyStorage) Close() error {
379363

380364
// adapter implements a storage.Storage around TSDB.
381365
type adapter struct {
382-
db *tsdb.DB
383-
startTimeMargin int64
366+
db *tsdb.DB
384367
}
385368

386369
// StartTime implements the Storage interface.
387370
func (a adapter) StartTime() (int64, error) {
388-
var startTime int64
389-
390-
if len(a.db.Blocks()) > 0 {
391-
startTime = a.db.Blocks()[0].Meta().MinTime
392-
} else {
393-
startTime = time.Now().Unix() * 1000
394-
}
395-
396-
// Add a safety margin as it may take a few minutes for everything to spin up.
397-
return startTime + a.startTimeMargin, nil
371+
return 0, errors.New("not implemented")
398372
}
399373

400374
func (a adapter) Querier(ctx context.Context, mint, maxt int64) (storage.Querier, error) {

0 commit comments

Comments
 (0)