Skip to content

Commit eafae6b

Browse files
authored
Check the ready status of component before it becomes reachable. (#761)
This fixes an situation where the component takes a while to initialize: previously, we were declaring the component as activated/healthy even during the pending init, resulting in subsequent method calls failing. Thanks to rgrandl@ for debugging the issue. Other changes: * Don't add a component to the routing info until it has been initialized. This fixes the health-checking situation where one component replica becomes healthy, but we subsequently send method calls to other replicas which fail. * Fix go vet in a couple of places.
1 parent 3d36c90 commit eafae6b

File tree

13 files changed

+399
-377
lines changed

13 files changed

+399
-377
lines changed

examples/bankofanthos/common/ledger_reader.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ func (reader *LedgerReader) pollTransactions(startingID int64) int64 {
104104
latestID := startingID
105105
transactionList, err := reader.dbRepo.FindLatest(startingID)
106106
if err != nil {
107-
reader.logger.Error("Error polling transactions", err)
107+
reader.logger.Error("Error polling transactions", "err", err)
108108
return latestID
109109
}
110110
reader.logger.Info("Polling new transactions")

examples/bankofanthos/frontend/handlers.go

+11-11
Original file line numberDiff line numberDiff line change
@@ -124,15 +124,15 @@ func (s *server) homeHandler(w http.ResponseWriter, r *http.Request) {
124124

125125
balance, err := s.balanceReader.Get().GetBalance(r.Context(), accountID)
126126
if err != nil {
127-
logger.Error("Couldn't fetch balance", err)
127+
logger.Error("Couldn't fetch balance", "err", err)
128128
}
129129
txnHistory, err := s.transactionHistory.Get().GetTransactions(r.Context(), accountID)
130130
if err != nil {
131-
logger.Error("Couldn't fetch transaction history", err)
131+
logger.Error("Couldn't fetch transaction history", "err", err)
132132
}
133133
contacts, err := s.contacts.Get().GetContacts(r.Context(), username)
134134
if err != nil {
135-
logger.Error("Couldn't fetch contacts", err)
135+
logger.Error("Couldn't fetch contacts", "err", err)
136136
}
137137
labeledHistory := populateContactLabels(accountID, txnHistory, contacts)
138138

@@ -149,7 +149,7 @@ func (s *server) homeHandler(w http.ResponseWriter, r *http.Request) {
149149
"Message": r.URL.Query().Get("msg"),
150150
"BankName": s.config.bankName,
151151
}); err != nil {
152-
logger.Error("couldn't generate home page", err)
152+
logger.Error("couldn't generate home page", "err", err)
153153
}
154154
}
155155

@@ -194,7 +194,7 @@ func (s *server) paymentHandler(w http.ResponseWriter, r *http.Request) {
194194
token, err := r.Cookie(tokenCookieName)
195195
if err != nil || !verifyToken(token.Value, s.config.publicKey) {
196196
msg := "Error submitting payment: user is not authenticated"
197-
logger.Error(msg, err)
197+
logger.Error(msg, "err", err)
198198
http.Error(w, msg, http.StatusUnauthorized)
199199
return
200200
}
@@ -257,7 +257,7 @@ func (s *server) depositHandler(w http.ResponseWriter, r *http.Request) {
257257
token, err := r.Cookie(tokenCookieName)
258258
if err != nil || !verifyToken(token.Value, s.config.publicKey) {
259259
msg := "Error submitting deposit: user is not authenticated"
260-
logger.Error(msg, err)
260+
logger.Error(msg, "err", err)
261261
http.Error(w, msg, http.StatusUnauthorized)
262262
return
263263
}
@@ -415,7 +415,7 @@ func (s *server) loginGetHandler(w http.ResponseWriter, r *http.Request) {
415415
"RedirectURI": redirectURI,
416416
"AppName": appName,
417417
}); err != nil {
418-
logger.Error("couldn't generate login page", err)
418+
logger.Error("couldn't generate login page", "err", err)
419419
}
420420
}
421421

@@ -426,7 +426,7 @@ func (s *server) loginPostHandler(w http.ResponseWriter, r *http.Request) {
426426
logger := s.Logger(r.Context())
427427
err := s.loginPostHelper(w, r)
428428
if err != nil {
429-
logger.Error("/login POST failed", err, "user", r.FormValue("username"))
429+
logger.Error("/login POST failed", "err", err, "user", r.FormValue("username"))
430430
http.Redirect(w, r, "/login?msg=Login+Failed", http.StatusFound)
431431
}
432432
}
@@ -565,7 +565,7 @@ func (s *server) consentGetHandler(w http.ResponseWriter, r *http.Request) {
565565
"RedirectURI": redirectURI,
566566
"AppName": appName,
567567
}); err != nil {
568-
logger.Error("couldn't generate consent.html", err)
568+
logger.Error("couldn't generate consent.html", "err", err)
569569
}
570570
return
571571
}
@@ -612,7 +612,7 @@ func (s *server) signupGetHandler(w http.ResponseWriter, r *http.Request) {
612612
"PodZone": s.config.podZone,
613613
"BankName": s.config.bankName,
614614
}); err != nil {
615-
logger.Error("couldn't generate consent.html", err)
615+
logger.Error("couldn't generate consent.html", "err", err)
616616
}
617617
}
618618

@@ -637,7 +637,7 @@ func (s *server) signupPostHandler(w http.ResponseWriter, r *http.Request) {
637637
}
638638
err := s.userService.Get().CreateUser(r.Context(), creq)
639639
if err != nil {
640-
logger.Debug("Error creating new user", "err", err)
640+
logger.Error("Error creating new user", "err", err)
641641
url := fmt.Sprintf("/login?msg=Account creation failed: %s", err)
642642
http.Redirect(w, r, url, http.StatusSeeOther)
643643
return

examples/bankofanthos/transactionhistory/transactionhistory.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ func (i *impl) processTransactionForAcct(accountID string, transaction model.Tra
6969
i.Logger(ctx).Debug("Processing transaction", "accountID", accountID, "transaction", transaction)
7070
got, err := i.txnCache.c.Get(accountID)
7171
if err != nil {
72-
i.Logger(ctx).Error("processTransactionForAcct failed", err)
72+
i.Logger(ctx).Error("processTransactionForAcct failed", "err", err)
7373
return
7474
}
7575
txns := got.([]model.Transaction)

godeps.txt

+1
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,7 @@ github.com/ServiceWeaver/weaver/internal/weaver
693693
sort
694694
strings
695695
sync
696+
sync/atomic
696697
syscall
697698
time
698699
github.com/ServiceWeaver/weaver/metrics

internal/net/call/handlers.go

+2-9
Original file line numberDiff line numberDiff line change
@@ -48,19 +48,12 @@ type HandlerMap struct {
4848
}
4949

5050
// NewHandlerMap returns a handler map to which the server handlers can
51-
// be added. A "ready" handler is automatically registered in the new
52-
// returned map.
51+
// be added.
5352
func NewHandlerMap() *HandlerMap {
54-
hm := &HandlerMap{
53+
return &HandlerMap{
5554
handlers: map[MethodKey]Handler{},
5655
names: map[MethodKey]string{},
5756
}
58-
// Add a dummy "ready" handler. Clients will repeatedly call this
59-
// RPC until it responds successfully, ensuring the server is ready.
60-
hm.Set("", "ready", func(context.Context, []byte) ([]byte, error) {
61-
return nil, nil
62-
})
63-
return hm
6457
}
6558

6659
// Set registers a handler for the specified method of component.

internal/proxy/proxy.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ func (p *Proxy) director(r *http.Request) {
5858
p.mu.Lock()
5959
defer p.mu.Unlock()
6060
if len(p.backends) == 0 {
61-
p.logger.Error("director", errors.New("no backends"), "url", r.URL)
61+
p.logger.Error("director", "err", errors.New("no backends"), "url", r.URL)
6262
return
6363
}
6464
r.URL.Scheme = "http" // TODO(mwhittaker): Support HTTPS.

internal/weaver/remoteweavelet.go

+30-14
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"reflect"
2727
"strings"
2828
"sync"
29+
"sync/atomic"
2930

3031
"github.com/ServiceWeaver/weaver/internal/config"
3132
"github.com/ServiceWeaver/weaver/internal/control"
@@ -45,8 +46,9 @@ import (
4546
"golang.org/x/sync/errgroup"
4647
)
4748

48-
// readyMethodKey holds the key for a method used to check if a backend is ready.
49-
var readyMethodKey = call.MakeMethodKey("", "ready")
49+
// readyMethodName holds the name of the special component method used by the
50+
// clients to check if a component is ready.
51+
const readyMethodName = "ready"
5052

5153
// RemoteWeaveletOptions configure a RemoteWeavelet.
5254
type RemoteWeaveletOptions struct {
@@ -113,16 +115,10 @@ type component struct {
113115

114116
implInit sync.Once // used to initialize impl, severStub
115117
implErr error // non-nil if impl creation fails
118+
implReady atomic.Bool // true only after impl creation succeeds
116119
impl any // instance of component implementation
117120
serverStub codegen.Server // handles remote calls from other processes
118121

119-
// TODO(mwhittaker): We have one client for every component. Every client
120-
// independently maintains network connections to every weavelet hosting
121-
// the component. Thus, there may be many redundant network connections to
122-
// the same weavelet. Given n weavelets hosting m components, there's at
123-
// worst n^2m connections rather than a more optimal n^2 (a single
124-
// connection between every pair of weavelets). We should rewrite things to
125-
// avoid the redundancy.
126122
resolver *routingResolver // client resolver
127123
balancer *routingBalancer // client balancer
128124

@@ -419,6 +415,7 @@ func (w *RemoteWeavelet) GetImpl(t reflect.Type) (any, error) {
419415
return
420416
} else {
421417
w.syslogger.Debug("Constructed", "component", name)
418+
c.implReady.Store(true)
422419
}
423420

424421
logger := w.logger(c.reg.Name)
@@ -513,7 +510,7 @@ func (w *RemoteWeavelet) makeStub(fullName string, reg *codegen.Registration, re
513510
return nil, err
514511
}
515512
if wait {
516-
if err := waitUntilReady(w.ctx, conn); err != nil {
513+
if err := waitUntilReady(w.ctx, conn, fullName); err != nil {
517514
w.syslogger.Error("Failed to wait for remote", "component", name, "err", err)
518515
return nil, err
519516
}
@@ -647,7 +644,16 @@ func (w *RemoteWeavelet) UpdateRoutingInfo(ctx context.Context, req *protos.Upda
647644

648645
// GetHealth implements controller.GetHealth.
649646
func (w *RemoteWeavelet) GetHealth(ctx context.Context, req *protos.GetHealthRequest) (*protos.GetHealthReply, error) {
650-
return &protos.GetHealthReply{Status: protos.HealthStatus_HEALTHY}, nil
647+
// Get the health status for all components. For now, we consider a component
648+
// healthy iff it has been successfully initialized. In the future, we will
649+
// maintain a real-time health for each component.
650+
reply := &protos.GetHealthReply{Status: protos.HealthStatus_HEALTHY}
651+
for cname, c := range w.componentsByName {
652+
if c.implReady.Load() {
653+
reply.HealthyComponents = append(reply.HealthyComponents, cname)
654+
}
655+
}
656+
return reply, nil
651657
}
652658

653659
// GetMetrics implements controller.GetMetrics.
@@ -718,6 +724,16 @@ func (w *RemoteWeavelet) addHandlers(handlers *call.HandlerMap, c *component) {
718724
}
719725
handlers.Set(c.reg.Name, mname, handler)
720726
}
727+
728+
// Add the special "component is ready" method handler, which is used by
729+
// the clients to wait for the component to be ready before receiving traffic
730+
// (see waitUntilReady).
731+
handlers.Set(c.reg.Name, readyMethodName, func(context.Context, []byte) ([]byte, error) {
732+
if c.implReady.Load() {
733+
return nil, nil
734+
}
735+
return nil, call.Unreachable
736+
})
721737
}
722738

723739
// repeatedly repeatedly executes f until it succeeds or until ctx is cancelled.
@@ -924,10 +940,10 @@ func (s *server) handlers(components []string) (*call.HandlerMap, error) {
924940
}
925941

926942
// waitUntilReady blocks until a successful call to the "ready" method is made
927-
// on the provided client.
928-
func waitUntilReady(ctx context.Context, client call.Connection) error {
943+
// on the provided component.
944+
func waitUntilReady(ctx context.Context, client call.Connection, fullComponentName string) error {
929945
for r := retry.Begin(); r.Continue(ctx); {
930-
_, err := client.Call(ctx, readyMethodKey, nil, call.CallOptions{})
946+
_, err := client.Call(ctx, call.MakeMethodKey(fullComponentName, readyMethodName), nil, call.CallOptions{})
931947
if err == nil || !errors.Is(err, call.Unreachable) {
932948
return err
933949
}

runtime/bin/bin.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func init() {
4545
// the value of version.DeployerVersion. If the string is not a
4646
// constant---if we try to use fmt.Sprintf, for example---it will not be
4747
// embedded in a Service Weaver binary.
48-
versionData = "⟦wEaVeRvErSiOn:deployer=v0.23.0⟧"
48+
versionData = "⟦wEaVeRvErSiOn:deployer=v0.24.0⟧"
4949
}
5050

5151
// rodata returns the read-only data section of the provided binary.

runtime/envelope/envelope.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -309,12 +309,12 @@ func (e *Envelope) WeaveletAddress() string {
309309
}
310310

311311
// GetHealth returns the health status of the weavelet.
312-
func (e *Envelope) GetHealth() protos.HealthStatus {
312+
func (e *Envelope) GetHealth() *protos.GetHealthReply {
313313
reply, err := e.controller.GetHealth(context.TODO(), &protos.GetHealthRequest{})
314314
if err != nil {
315-
return protos.HealthStatus_UNKNOWN
315+
return &protos.GetHealthReply{Status: protos.HealthStatus_UNKNOWN}
316316
}
317-
return reply.Status
317+
return reply
318318
}
319319

320320
// GetProfile gets a profile from the weavelet.

0 commit comments

Comments
 (0)