Skip to content

Commit 60c2aac

Browse files
author
Thibault Gilles
committed
Share blocking queries between RPC requests
Only run one blocking query per request and share the result with all RPC calls. This optimizes the blocking query process when many agents are watching the same service.
1 parent fd8f448 commit 60c2aac

File tree

5 files changed

+240
-35
lines changed

5 files changed

+240
-35
lines changed

agent/consul/health_endpoint.go

+55-34
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package consul
22

33
import (
44
"fmt"
5+
"sort"
56

67
"github.com/armon/go-metrics"
78
"github.com/hashicorp/consul/agent/consul/state"
@@ -137,52 +138,72 @@ func (h *Health) ServiceNodes(args *structs.ServiceSpecificRequest, reply *struc
137138
}
138139
}
139140

140-
err := h.srv.blockingQuery(
141+
err := h.srv.sharedBlockingQuery(
142+
args,
143+
reply,
141144
&args.QueryOptions,
142145
&reply.QueryMeta,
143-
func(ws memdb.WatchSet, state *state.Store) error {
146+
func(ws memdb.WatchSet, state *state.Store) (uint64, func(uint64, interface{}) error, error) {
144147
index, nodes, err := f(ws, state, args)
145148
if err != nil {
146-
return err
149+
return 0, nil, err
147150
}
148151

149-
reply.Index, reply.Nodes = index, nodes
150-
if len(args.NodeMetaFilters) > 0 {
151-
reply.Nodes = nodeMetaFilter(args.NodeMetaFilters, reply.Nodes)
152-
}
153-
if err := h.srv.filterACL(args.Token, reply); err != nil {
154-
return err
155-
}
156-
return h.srv.sortNodesByDistanceFrom(args.Source, reply.Nodes)
152+
return index, func(index uint64, rawReply interface{}) error {
153+
reply := rawReply.(*structs.IndexedCheckServiceNodes)
154+
// copy the slice to ensure requests filtering / sorting dont affect each others
155+
replyNodes := make(structs.CheckServiceNodes, len(nodes))
156+
copy(replyNodes, nodes)
157+
reply.Index, reply.Nodes = index, replyNodes
158+
return nil
159+
}, nil
157160
})
161+
if err != nil {
162+
return err
163+
}
164+
165+
h.srv.setQueryMeta(&reply.QueryMeta)
166+
167+
if len(args.NodeMetaFilters) > 0 {
168+
reply.Nodes = nodeMetaFilter(args.NodeMetaFilters, reply.Nodes)
169+
}
170+
if err := h.srv.filterACL(args.Token, reply); err != nil {
171+
return err
172+
}
173+
err = h.srv.sortNodesByDistanceFrom(args.Source, reply.Nodes)
174+
if err != nil {
175+
return err
176+
}
158177

159178
// Provide some metrics
160-
if err == nil {
161-
// For metrics, we separate Connect-based lookups from non-Connect
162-
key := "service"
163-
if args.Connect {
164-
key = "connect"
165-
}
179+
// For metrics, we separate Connect-based lookups from non-Connect
180+
key := "service"
181+
if args.Connect {
182+
key = "connect"
183+
}
166184

167-
metrics.IncrCounterWithLabels([]string{"health", key, "query"}, 1,
168-
[]metrics.Label{{Name: "service", Value: args.ServiceName}})
169-
if args.ServiceTag != "" {
170-
metrics.IncrCounterWithLabels([]string{"health", key, "query-tag"}, 1,
171-
[]metrics.Label{{Name: "service", Value: args.ServiceName}, {Name: "tag", Value: args.ServiceTag}})
172-
}
173-
if len(args.ServiceTags) > 0 {
174-
labels := []metrics.Label{{Name: "service", Value: args.ServiceName}}
175-
for _, tag := range args.ServiceTags {
176-
labels = append(labels, metrics.Label{Name: "tag", Value: tag})
177-
}
178-
metrics.IncrCounterWithLabels([]string{"health", key, "query-tags"}, 1, labels)
179-
}
180-
if len(reply.Nodes) == 0 {
181-
metrics.IncrCounterWithLabels([]string{"health", key, "not-found"}, 1,
182-
[]metrics.Label{{Name: "service", Value: args.ServiceName}})
185+
metrics.IncrCounterWithLabels([]string{"health", key, "query"}, 1,
186+
[]metrics.Label{{Name: "service", Value: args.ServiceName}})
187+
if args.ServiceTag != "" {
188+
metrics.IncrCounterWithLabels([]string{"health", key, "query-tag"}, 1,
189+
[]metrics.Label{{Name: "service", Value: args.ServiceName}, {Name: "tag", Value: args.ServiceTag}})
190+
}
191+
if len(args.ServiceTags) > 0 {
192+
// Sort tags so that the metric is the same even if the request
193+
// tags are in a different order
194+
sort.Strings(args.ServiceTags)
195+
196+
labels := []metrics.Label{{Name: "service", Value: args.ServiceName}}
197+
for _, tag := range args.ServiceTags {
198+
labels = append(labels, metrics.Label{Name: "tag", Value: tag})
183199
}
200+
metrics.IncrCounterWithLabels([]string{"health", key, "query-tags"}, 1, labels)
201+
}
202+
if len(reply.Nodes) == 0 {
203+
metrics.IncrCounterWithLabels([]string{"health", key, "not-found"}, 1,
204+
[]metrics.Label{{Name: "service", Value: args.ServiceName}})
184205
}
185-
return err
206+
return nil
186207
}
187208

188209
// The serviceNodes* functions below are the various lookup methods that

agent/consul/rpc.go

+153
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ import (
66
"io"
77
"net"
88
"strings"
9+
"sync/atomic"
910
"time"
1011

1112
"github.com/armon/go-metrics"
13+
"github.com/hashicorp/consul/agent/cache"
1214
"github.com/hashicorp/consul/agent/consul/state"
1315
"github.com/hashicorp/consul/agent/metadata"
1416
"github.com/hashicorp/consul/agent/pool"
@@ -443,6 +445,157 @@ RUN_QUERY:
443445
return err
444446
}
445447

448+
type sharedQueryFn func(memdb.WatchSet, *state.Store) (uint64, func(uint64, interface{}) error, error)
449+
450+
func (s *Server) sharedBlockingQuery(req cache.Request, res interface{}, queryOpts *structs.QueryOptions, queryMeta *structs.QueryMeta, fn sharedQueryFn) error {
451+
var timeout *time.Timer
452+
453+
if queryOpts.RequireConsistent {
454+
if err := s.consistentRead(); err != nil {
455+
return err
456+
}
457+
}
458+
459+
// Restrict the max query time, and ensure there is always one.
460+
if queryOpts.MaxQueryTime > maxQueryTime {
461+
queryOpts.MaxQueryTime = maxQueryTime
462+
} else if queryOpts.MaxQueryTime <= 0 {
463+
queryOpts.MaxQueryTime = defaultQueryTime
464+
}
465+
466+
// Apply a small amount of jitter to the request.
467+
queryOpts.MaxQueryTime += lib.RandomStagger(queryOpts.MaxQueryTime / jitterFraction)
468+
469+
// Setup a query timeout.
470+
timeout = time.NewTimer(queryOpts.MaxQueryTime)
471+
defer timeout.Stop()
472+
473+
cacheInfo := req.CacheInfo()
474+
475+
// Check if a blocking query is already runnning for this request
476+
s.blockingQueriesLock.RLock()
477+
queryState, alreadyInserted := s.blockingQueries[cacheInfo.Key]
478+
s.blockingQueriesLock.RUnlock()
479+
480+
// If not, run one
481+
if !alreadyInserted {
482+
ws := memdb.NewWatchSet()
483+
// run the func a first time to get the index
484+
firstRunIndex, apply, err := fn(ws, s.fsm.State())
485+
486+
s.blockingQueriesLock.Lock()
487+
queryState, alreadyInserted = s.blockingQueries[cacheInfo.Key]
488+
if alreadyInserted {
489+
// Another query raced with us and already ran a blocking query
490+
s.blockingQueriesLock.Unlock()
491+
} else {
492+
// Add query to map
493+
queryState = newBlockingQueryState(firstRunIndex, apply, err)
494+
s.blockingQueries[cacheInfo.Key] = queryState
495+
s.blockingQueriesLock.Unlock()
496+
497+
// Run the shared blocking query
498+
go s.runSharedBlockingQuery(firstRunIndex, cacheInfo.Key, ws, queryMeta, queryState, fn)
499+
}
500+
}
501+
502+
stateIndex := atomic.LoadUint64(&queryState.Index)
503+
504+
if stateIndex <= queryOpts.MinQueryIndex {
505+
// Increment the shared query watcher
506+
atomic.AddInt32(&queryState.Watchers, 1)
507+
508+
// block on either timeout or shared query
509+
select {
510+
case <-timeout.C:
511+
if n := atomic.AddInt32(&queryState.Watchers, -1); n == 0 {
512+
s.logger.Println("[TRACE] consul: cancelling shared blocking query because there is no more watchers")
513+
514+
// we were the last request to wait on the shared blocking wuery and we reached MaxQueryTime, cancel the blocking query
515+
close(queryState.Cancel)
516+
}
517+
case <-queryState.Done:
518+
}
519+
}
520+
521+
if err := queryState.Err.Load(); err != nil {
522+
return err.(error)
523+
}
524+
525+
return queryState.Apply.Load().(func(uint64, interface{}) error)(atomic.LoadUint64(&queryState.Index), res)
526+
}
527+
528+
func (s *Server) runSharedBlockingQuery(index uint64, cacheKey string, ws memdb.WatchSet, queryMeta *structs.QueryMeta, queryState *blockingQueryState, fn sharedQueryFn) {
529+
s.logger.Println("[TRACE] consul: running shared blocking query")
530+
531+
// Wait initial query watchset
532+
expired := ws.Watch(queryState.Cancel)
533+
534+
RUN_QUERY:
535+
536+
// If the read must be consistent we verify that we are still the leader.
537+
538+
// Run the query.
539+
metrics.IncrCounter([]string{"rpc", "query"}, 1)
540+
541+
// Operate on a consistent set of state. This makes sure that the
542+
// abandon channel goes with the state that the caller is using to
543+
// build watches.
544+
state := s.fsm.State()
545+
546+
// We can skip all watch tracking if this isn't a blocking query.
547+
if index > 0 {
548+
ws = memdb.NewWatchSet()
549+
550+
// This channel will be closed if a snapshot is restored and the
551+
// whole state store is abandoned.
552+
ws.Add(state.AbandonCh())
553+
}
554+
555+
// Block up to the timeout if we didn't see anything fresh.
556+
idx, apply, err := fn(ws, state)
557+
// Note we check queryOpts.MinQueryIndex is greater than zero to determine if
558+
// blocking was requested by client, NOT meta.Index since the state function
559+
// might return zero if something is not initialised and care wasn't taken to
560+
// handle that special case (in practice this happened a lot so fixing it
561+
// systematically here beats trying to remember to add zero checks in every
562+
// state method). We also need to ensure that unless there is an error, we
563+
// return an index > 0 otherwise the client will never block and burn CPU and
564+
// requests.
565+
if err == nil && idx < 1 {
566+
idx = 1
567+
}
568+
if !expired && err == nil && index > 0 && idx <= index {
569+
if expired := ws.Watch(queryState.Cancel); !expired {
570+
// If a restore may have woken us up then bail out from
571+
// the query immediately. This is slightly race-ey since
572+
// this might have been interrupted for other reasons,
573+
// but it's OK to kick it back to the caller in either
574+
// case.
575+
select {
576+
case <-state.AbandonCh():
577+
default:
578+
goto RUN_QUERY
579+
}
580+
}
581+
}
582+
583+
// store results
584+
s.blockingQueriesLock.Lock()
585+
if err != nil {
586+
queryState.Err.Store(err)
587+
}
588+
if apply != nil {
589+
queryState.Apply.Store(apply)
590+
}
591+
atomic.StoreUint64(&queryState.Index, idx)
592+
delete(s.blockingQueries, cacheKey)
593+
s.blockingQueriesLock.Unlock()
594+
595+
// notify changed
596+
close(queryState.Done)
597+
}
598+
446599
// setQueryMeta is used to populate the QueryMeta data for an RPC call
447600
func (s *Server) setQueryMeta(m *structs.QueryMeta) {
448601
if s.IsLeader() {

agent/consul/server.go

+28
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,30 @@ var (
8888
ErrWANFederationDisabled = fmt.Errorf("WAN Federation is disabled")
8989
)
9090

91+
type blockingQueryState struct {
92+
Index uint64
93+
Cancel chan time.Time
94+
Watchers int32
95+
Done chan struct{}
96+
Apply atomic.Value
97+
Err atomic.Value
98+
}
99+
100+
func newBlockingQueryState(index uint64, apply func(uint64, interface{}) error, err error) *blockingQueryState {
101+
queryState := &blockingQueryState{
102+
Done: make(chan struct{}),
103+
Cancel: make(chan time.Time),
104+
Index: index,
105+
}
106+
if apply != nil {
107+
queryState.Apply.Store(apply)
108+
}
109+
if err != nil {
110+
queryState.Err.Store(err)
111+
}
112+
return queryState
113+
}
114+
91115
// Server is Consul server which manages the service discovery,
92116
// health checking, DC forwarding, Raft, and multiple Serf pools.
93117
type Server struct {
@@ -235,6 +259,9 @@ type Server struct {
235259
shutdownCh chan struct{}
236260
shutdownLock sync.Mutex
237261

262+
blockingQueriesLock sync.RWMutex
263+
blockingQueries map[string]*blockingQueryState
264+
238265
// embedded struct to hold all the enterprise specific data
239266
EnterpriseServer
240267
}
@@ -323,6 +350,7 @@ func NewServerLogger(config *Config, logger *log.Logger, tokens *token.Store) (*
323350
sessionTimers: NewSessionTimers(),
324351
tombstoneGC: gc,
325352
serverLookup: NewServerLookup(),
353+
blockingQueries: make(map[string]*blockingQueryState),
326354
shutdownCh: shutdownCh,
327355
}
328356

agent/consul/state/catalog.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1697,7 +1697,7 @@ func (s *Store) CheckServiceTagNodes(ws memdb.WatchSet, serviceName string, tags
16971697
for service := iter.Next(); service != nil; service = iter.Next() {
16981698
svc := service.(*structs.ServiceNode)
16991699
if !serviceTagsFilter(svc, tags) {
1700-
results = append(results, svc)
1700+
results = append(results, service.(*structs.ServiceNode))
17011701
}
17021702
}
17031703
return s.parseCheckServiceNodes(tx, ws, idx, serviceName, results, err)

agent/structs/structs.go

+3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"math/rand"
77
"reflect"
88
"regexp"
9+
"sort"
910
"strconv"
1011
"strings"
1112
"time"
@@ -379,10 +380,12 @@ func (r *ServiceSpecificRequest) CacheInfo() cache.RequestInfo {
379380
// cached results, we need to be careful we maintain the same order of fields
380381
// here. We could alternatively use `hash:set` struct tag on an anonymous
381382
// struct to make it more robust if it becomes significant.
383+
sort.Strings(r.ServiceTags)
382384
v, err := hashstructure.Hash([]interface{}{
383385
r.NodeMetaFilters,
384386
r.ServiceName,
385387
r.ServiceTag,
388+
r.ServiceTags, // make the tests pass until http://github.com/hashicorp/consul/pull/4987 is merged
386389
r.ServiceAddress,
387390
r.TagFilter,
388391
r.Connect,

0 commit comments

Comments
 (0)