cortexproject · pracucci · Jun 26, 2020 · Jun 26, 2020 · Jun 26, 2020 · Jun 26, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,10 @@
 ## master / unreleased
 
 * [CHANGE] Experimental Delete Series: Change target flag for purger from `data-purger` to `purger`. #2777
+* [CHANGE] Experimental TSDB: The max concurrent queries against the long-term storage, configured via `-experimental.tsdb.bucket-store.max-concurrent`, is now a limit shared across all tenants and not a per-tenant limit anymore. The default value has changed from `20` to `100` and the following new metrics have been added: #2797
+  * `cortex_bucket_stores_gate_queries_concurrent_max`
+  * `cortex_bucket_stores_gate_queries_in_flight`
+  * `cortex_bucket_stores_gate_duration_seconds`
 * [FEATURE] Introduced `ruler.for-outage-tolerance`, Max time to tolerate outage for restoring "for" state of alert. #2783
 * [FEATURE] Introduced `ruler.for-grace-period`, Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period. #2783
 * [FEATURE] Introduced `ruler.for-resend-delay`, Minimum amount of time to wait before resending an alert to Alertmanager. #2783

diff --git a/go.mod b/go.mod
@@ -50,7 +50,7 @@ require (
 	github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e
 	github.com/spf13/afero v1.2.2
 	github.com/stretchr/testify v1.5.1
-	github.com/thanos-io/thanos v0.12.3-0.20200618165043-6c513e5f5c5f
+	github.com/thanos-io/thanos v0.13.1-0.20200625180332-f078faed1b96
 	github.com/uber/jaeger-client-go v2.23.1+incompatible
 	github.com/weaveworks/common v0.0.0-20200512154658-384f10054ec5
 	go.etcd.io/bbolt v1.3.5-0.20200615073812-232d8fc87f50

diff --git a/go.sum b/go.sum
@@ -960,7 +960,7 @@ github.com/prometheus/prometheus v0.0.0-20180315085919-58e2a31db8de/go.mod h1:oA
 github.com/prometheus/prometheus v0.0.0-20190818123050-43acd0e2e93f/go.mod h1:rMTlmxGCvukf2KMu3fClMDKLLoJ5hl61MhcJ7xKakf0=
 github.com/prometheus/prometheus v1.8.2-0.20200107122003-4708915ac6ef/go.mod h1:7U90zPoLkWjEIQcy/rweQla82OCTUzxVHE51G3OhJbI=
 github.com/prometheus/prometheus v1.8.2-0.20200213233353-b90be6f32a33/go.mod h1:fkIPPkuZnkXyopYHmXPxf9rgiPkVgZCN8w9o8+UgBlY=
-github.com/prometheus/prometheus v1.8.2-0.20200609165731-66dfb951c4ca/go.mod h1:CwaXafRa0mm72de2GQWtfQxjGytbSKIGivWxQvjpRZs=
+github.com/prometheus/prometheus v1.8.2-0.20200619100132-74207c04655e/go.mod h1:QV6T0PPQi5UFmqcLBJw3JiyIR8r1O7KEv9qlVw4VV40=
 github.com/prometheus/prometheus v1.8.2-0.20200622142935-153f859b7499 h1:q+yGm39CmSV1S7oxCz36nlvx9ugRoEodwuHusgJw+iU=
 github.com/prometheus/prometheus v1.8.2-0.20200622142935-153f859b7499/go.mod h1:QV6T0PPQi5UFmqcLBJw3JiyIR8r1O7KEv9qlVw4VV40=
 github.com/rafaeljusto/redigomock v0.0.0-20190202135759-257e089e14a1 h1:+kGqA4dNN5hn7WwvKdzHl0rdN5AEkbNZd0VjRltAiZg=
@@ -1046,8 +1046,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P
 github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
 github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
 github.com/thanos-io/thanos v0.8.1-0.20200109203923-552ffa4c1a0d/go.mod h1:usT/TxtJQ7DzinTt+G9kinDQmRS5sxwu0unVKZ9vdcw=
-github.com/thanos-io/thanos v0.12.3-0.20200618165043-6c513e5f5c5f h1:UnMVEOejh6tWKUag5tuC0WjKfKmGwJ2+ky0MV4KM52I=
-github.com/thanos-io/thanos v0.12.3-0.20200618165043-6c513e5f5c5f/go.mod h1:CPqrM/ibNtlraee0to4dSRiTs+KLI1c3agMS2lmJpz0=
+github.com/thanos-io/thanos v0.13.1-0.20200625180332-f078faed1b96 h1:McsluZ8fXVwGbdXsZ20uZNGukmPycDU9m6df64S2bqQ=
+github.com/thanos-io/thanos v0.13.1-0.20200625180332-f078faed1b96/go.mod h1:VuNcGvUE0u57S1XXqYhf0dQzUO3wUnw2B5IKsju+1z4=
 github.com/tidwall/pretty v0.0.0-20180105212114-65a9db5fad51/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
 github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4=
 github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=

diff --git a/pkg/ingester/ingester_v2.go b/pkg/ingester/ingester_v2.go
@@ -898,7 +898,10 @@ func (i *Ingester) createTSDB(userID string) (*userTSDB, error) {
 			tsdbPromReg,
 			udir,
 			cortex_tsdb.NewUserBucketClient(userID, i.TSDBState.bucket),
-			func() labels.Labels { return l }, metadata.ReceiveSource)
+			func() labels.Labels { return l },
+			metadata.ReceiveSource,
+			true, // Allow out of order uploads. It's fine in Cortex's context.
+		)
 	}
 
 	i.TSDBState.tsdbMetrics.setRegistryForUser(userID, tsdbPromReg)

diff --git a/pkg/storage/tsdb/config.go b/pkg/storage/tsdb/config.go
@@ -207,7 +207,7 @@ func (cfg *BucketStoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.SyncInterval, "experimental.tsdb.bucket-store.sync-interval", 5*time.Minute, "How frequently scan the bucket to look for changes (new blocks shipped by ingesters and blocks removed by retention or compaction). 0 disables it.")
 	f.Uint64Var(&cfg.MaxChunkPoolBytes, "experimental.tsdb.bucket-store.max-chunk-pool-bytes", uint64(2*units.Gibibyte), "Max size - in bytes - of a per-tenant chunk pool, used to reduce memory allocations.")
 	f.Uint64Var(&cfg.MaxSampleCount, "experimental.tsdb.bucket-store.max-sample-count", 0, "Max number of samples per query when loading series from the long-term storage. 0 disables the limit.")
-	f.IntVar(&cfg.MaxConcurrent, "experimental.tsdb.bucket-store.max-concurrent", 20, "Max number of concurrent queries to execute against the long-term storage on a per-tenant basis.")
+	f.IntVar(&cfg.MaxConcurrent, "experimental.tsdb.bucket-store.max-concurrent", 100, "Max number of concurrent queries to execute against the long-term storage. The limit is shared across all tenants.")
 	f.IntVar(&cfg.TenantSyncConcurrency, "experimental.tsdb.bucket-store.tenant-sync-concurrency", 10, "Maximum number of concurrent tenants synching blocks.")
 	f.IntVar(&cfg.BlockSyncConcurrency, "experimental.tsdb.bucket-store.block-sync-concurrency", 20, "Maximum number of concurrent blocks synching per tenant.")
 	f.IntVar(&cfg.MetaSyncConcurrency, "experimental.tsdb.bucket-store.meta-sync-concurrency", 20, "Number of Go routines to use when syncing block meta files from object storage per tenant.")

diff --git a/pkg/storegateway/bucket_store_metrics_test.go b/pkg/storegateway/bucket_store_metrics_test.go
@@ -236,7 +236,6 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry {
 	m.chunkSizeBytes.Observe(30 * base)
 
 	m.queriesDropped.Add(31 * base)
-	m.queriesLimit.Add(32 * base)
 
 	m.seriesRefetches.Add(33 * base)
 
@@ -273,7 +272,6 @@ type mockedBucketStoreMetrics struct {
 	resultSeriesCount     prometheus.Summary
 	chunkSizeBytes        prometheus.Histogram
 	queriesDropped        prometheus.Counter
-	queriesLimit          prometheus.Gauge
 
 	cachedPostingsCompressions           *prometheus.CounterVec
 	cachedPostingsCompressionErrors      *prometheus.CounterVec
@@ -355,10 +353,6 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe
 		Name: "thanos_bucket_store_queries_dropped_total",
 		Help: "Number of queries that were dropped due to the sample limit.",
 	})
-	m.queriesLimit = promauto.With(reg).NewGauge(prometheus.GaugeOpts{
-		Name: "thanos_bucket_store_queries_concurrent_max",
-		Help: "Number of maximum concurrent queries.",
-	})
 	m.seriesRefetches = promauto.With(reg).NewCounter(prometheus.CounterOpts{
 		Name: "thanos_bucket_store_series_refetches_total",
 		Help: fmt.Sprintf("Total number of cases where %v bytes was not enough was to fetch series from index, resulting in refetch.", 64*1024),

diff --git a/pkg/storegateway/bucket_stores.go b/pkg/storegateway/bucket_stores.go
@@ -18,6 +18,7 @@ import (
 	"github.com/thanos-io/thanos/pkg/block"
 	thanos_metadata "github.com/thanos-io/thanos/pkg/block/metadata"
 	"github.com/thanos-io/thanos/pkg/extprom"
+	"github.com/thanos-io/thanos/pkg/gate"
 	"github.com/thanos-io/thanos/pkg/objstore"
 	"github.com/thanos-io/thanos/pkg/store"
 	storecache "github.com/thanos-io/thanos/pkg/store/cache"
@@ -43,6 +44,9 @@ type BucketStores struct {
 	// Index cache shared across all tenants.
 	indexCache storecache.IndexCache
 
+	// Gate used to limit query concurrency across all tenants.
+	queryGate gate.Gate
+
 	// Keeps a bucket store for each tenant.
 	storesMu sync.RWMutex
 	stores   map[string]*store.BucketStore
@@ -59,6 +63,14 @@ func NewBucketStores(cfg tsdb.Config, filters []block.MetadataFilter, bucketClie
 		return nil, errors.Wrapf(err, "create caching bucket")
 	}
 
+	// The number of concurrent queries against the tenants BucketStores are limited.
+	queryGateReg := extprom.WrapRegistererWithPrefix("cortex_bucket_stores_", reg)
+	queryGate := gate.NewKeeper(queryGateReg).NewGate(cfg.BucketStore.MaxConcurrent)
+	promauto.With(reg).NewGauge(prometheus.GaugeOpts{
+		Name: "cortex_bucket_stores_gate_queries_concurrent_max",
+		Help: "Number of maximum concurrent queries allowed.",
+	}).Set(float64(cfg.BucketStore.MaxConcurrent))
+
 	u := &BucketStores{
 		logger:             logger,
 		cfg:                cfg,
@@ -68,6 +80,7 @@ func NewBucketStores(cfg tsdb.Config, filters []block.MetadataFilter, bucketClie
 		logLevel:           logLevel,
 		bucketStoreMetrics: NewBucketStoreMetrics(),
 		metaFetcherMetrics: NewMetadataFetcherMetrics(),
+		queryGate:          queryGate,
 		syncTimes: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
 			Name:    "cortex_bucket_stores_blocks_sync_seconds",
 			Help:    "The total time it takes to perform a sync stores",
@@ -268,9 +281,9 @@ func (u *BucketStores) getOrCreateStore(userID string) (*store.BucketStore, erro
 		fetcher,
 		filepath.Join(u.cfg.BucketStore.SyncDir, userID),
 		u.indexCache,
-		uint64(u.cfg.BucketStore.MaxChunkPoolBytes),
+		u.queryGate,
+		u.cfg.BucketStore.MaxChunkPoolBytes,
 		u.cfg.BucketStore.MaxSampleCount,
-		u.cfg.BucketStore.MaxConcurrent,
 		u.logLevel.String() == "debug", // Turn on debug logging, if the log level is set to debug
 		u.cfg.BucketStore.BlockSyncConcurrency,
 		nil,   // Do not limit timerange.

diff --git a/pkg/storegateway/bucket_stores_test.go b/pkg/storegateway/bucket_stores_test.go
@@ -88,7 +88,21 @@ func TestBucketStores_InitialSync(t *testing.T) {
 			# HELP cortex_bucket_store_block_load_failures_total Total number of failed remote block loading attempts.
 			# TYPE cortex_bucket_store_block_load_failures_total counter
 			cortex_bucket_store_block_load_failures_total 0
-	`), "cortex_bucket_store_blocks_loaded", "cortex_bucket_store_block_loads_total", "cortex_bucket_store_block_load_failures_total"))
+
+			# HELP cortex_bucket_stores_gate_queries_concurrent_max Number of maximum concurrent queries allowed.
+			# TYPE cortex_bucket_stores_gate_queries_concurrent_max gauge
+			cortex_bucket_stores_gate_queries_concurrent_max 100
+
+			# HELP cortex_bucket_stores_gate_queries_in_flight Number of queries that are currently in flight.
+			# TYPE cortex_bucket_stores_gate_queries_in_flight gauge
+			cortex_bucket_stores_gate_queries_in_flight 0
+	`),
+		"cortex_bucket_store_blocks_loaded",
+		"cortex_bucket_store_block_loads_total",
+		"cortex_bucket_store_block_load_failures_total",
+		"cortex_bucket_stores_gate_queries_concurrent_max",
+		"cortex_bucket_stores_gate_queries_in_flight",
+	))
 
 	assert.Greater(t, testutil.ToFloat64(stores.syncLastSuccess), float64(0))
 }
@@ -145,7 +159,21 @@ func TestBucketStores_SyncBlocks(t *testing.T) {
 			# HELP cortex_bucket_store_block_load_failures_total Total number of failed remote block loading attempts.
 			# TYPE cortex_bucket_store_block_load_failures_total counter
 			cortex_bucket_store_block_load_failures_total 0
-	`), "cortex_bucket_store_blocks_loaded", "cortex_bucket_store_block_loads_total", "cortex_bucket_store_block_load_failures_total"))
+
+			# HELP cortex_bucket_stores_gate_queries_concurrent_max Number of maximum concurrent queries allowed.
+			# TYPE cortex_bucket_stores_gate_queries_concurrent_max gauge
+			cortex_bucket_stores_gate_queries_concurrent_max 100
+
+			# HELP cortex_bucket_stores_gate_queries_in_flight Number of queries that are currently in flight.
+			# TYPE cortex_bucket_stores_gate_queries_in_flight gauge
+			cortex_bucket_stores_gate_queries_in_flight 0
+	`),
+		"cortex_bucket_store_blocks_loaded",
+		"cortex_bucket_store_block_loads_total",
+		"cortex_bucket_store_block_load_failures_total",
+		"cortex_bucket_stores_gate_queries_concurrent_max",
+		"cortex_bucket_stores_gate_queries_in_flight",
+	))
 
 	assert.Greater(t, testutil.ToFloat64(stores.syncLastSuccess), float64(0))
 }