Skip to content

Commit 2d165b5

Browse files
committed
storage
1 parent 84e2b5e commit 2d165b5

File tree

13 files changed

+445
-95
lines changed

13 files changed

+445
-95
lines changed

guardian/cmd/guardian/main.go

+19-1
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@
1515
package main
1616

1717
import (
18+
"context"
1819
"flag"
1920
"os"
21+
"os/signal"
22+
"syscall"
2023

2124
"github.com/sirupsen/logrus"
2225

@@ -44,5 +47,20 @@ func main() {
4447
}
4548

4649
logrus.Infof("Starting Calico Guardian %s", cfg.String())
47-
daemon.Run(cfg.Config, cfg.Targets())
50+
daemon.Run(GetShutdownContext(), cfg.Config, cfg.Targets())
51+
}
52+
53+
// GetShutdownContext creates a context that's done when either syscall.SIGINT or syscall.SIGTERM notified.
54+
func GetShutdownContext() context.Context {
55+
signalChan := make(chan os.Signal, 1)
56+
signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)
57+
58+
ctx, cancel := context.WithCancel(context.Background())
59+
go func() {
60+
<-signalChan
61+
logrus.Debug("Shutdown signal received, shutting down.")
62+
cancel()
63+
}()
64+
65+
return ctx
4866
}

guardian/pkg/asyncutil/async.go

+30-29
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ type CommandExecutor[C any, R any] interface {
3737
type ExecutionController interface {
3838
DrainAndBacklog() Signaler
3939
Resume()
40-
ShutdownSignaler() Signaler
40+
WaitForShutdown() <-chan struct{}
4141
}
4242

4343
type executorCoordinator []ExecutionController
@@ -65,35 +65,31 @@ func (coordinator executorCoordinator) Resume() {
6565
}
6666
}
6767

68-
func (coordinator executorCoordinator) ShutdownSignaler() Signaler {
69-
var signalers []Signaler
70-
for _, executor := range coordinator {
71-
signalers = append(signalers, executor.ShutdownSignaler())
72-
}
73-
74-
signal := NewSignaler()
68+
func (coordinator executorCoordinator) WaitForShutdown() <-chan struct{} {
69+
signal := make(chan struct{})
7570
go func() {
76-
defer signal.Send()
77-
for _, signaler := range signalers {
78-
<-signaler.Receive()
71+
defer close(signal)
72+
for _, executor := range coordinator {
73+
<-executor.WaitForShutdown()
7974
}
8075
}()
8176

8277
return signal
8378
}
8479

8580
type commandExecutor[C any, R any] struct {
86-
command func(context.Context, C) (R, error)
87-
drainAndBacklogSig chan Signaler
88-
resumeBackloggedSig Signaler
89-
cmdChan chan Command[C, R]
81+
command func(context.Context, C) (R, error)
82+
cmdChan chan Command[C, R]
9083
// backlogChan contains all the commands that failed with EOF, waiting to be retried.
9184
backlogChan chan Command[C, R]
9285
// inflightCmds keeps track of the number of commands that are currently being executed.
93-
inflightCmds sync.WaitGroup
94-
executeSig Signaler
95-
shutdownCompleteSig Signaler
96-
errBuff ErrorBuffer
86+
inflightCmds sync.WaitGroup
87+
88+
resumeBackloggedSig Signaler
89+
drainAndBacklogSig chan Signaler
90+
shutdownCompleteSig chan struct{}
91+
92+
errBuff ErrorBuffer
9793

9894
backLogCommands bool
9995
backlog []Command[C, R]
@@ -119,9 +115,7 @@ func NewCommandExecutor[C any, R any](ctx context.Context, errBuff ErrorBuffer,
119115
backlogChan: make(chan Command[C, R], 100),
120116
drainAndBacklogSig: make(chan Signaler, 100),
121117
resumeBackloggedSig: NewSignaler(),
122-
123-
executeSig: NewSignaler(),
124-
shutdownCompleteSig: NewSignaler(),
118+
shutdownCompleteSig: make(chan struct{}),
125119
}
126120

127121
go executor.loop(ctx)
@@ -135,7 +129,7 @@ func (executor *commandExecutor[C, R]) loop(shutdownCtx context.Context) {
135129
// doesn't wait on the channel provided.
136130
defer func() {
137131
defer stopCommands()
138-
defer executor.shutdownCompleteSig.Close()
132+
defer close(executor.shutdownCompleteSig)
139133

140134
// close the cmdChan in case anything tries to write to it. This will ensure a panic occurs while trying to
141135
// clean up any outstanding cmd.
@@ -150,13 +144,16 @@ func (executor *commandExecutor[C, R]) loop(shutdownCtx context.Context) {
150144
executor.drainBacklogChannel()
151145
executor.backlog = append(executor.backlog, ReadAll(executor.cmdChan)...)
152146

153-
logrus.Debug("Returning errors for outstanding requests due to shutdown.")
154-
for _, cmd := range executor.backlog {
155-
cmd.ReturnError(context.Canceled)
147+
if len(executor.backlog) > 0 {
148+
logrus.Debug("Returning errors for outstanding commands due to shutdown...")
149+
for _, cmd := range executor.backlog {
150+
cmd.ReturnError(context.Canceled)
151+
}
152+
logrus.Debug("Finished returning errors for outstanding commands.")
153+
} else {
154+
logrus.Debug("No outstanding commands, shutting down..")
156155
}
157-
logrus.Debug("Finished returning errors for outstanding requests due to shutdown.")
158156

159-
executor.executeSig.Close()
160157
close(executor.drainAndBacklogSig)
161158
executor.resumeBackloggedSig.Close()
162159
}()
@@ -179,6 +176,9 @@ func (executor *commandExecutor[C, R]) loop(shutdownCtx context.Context) {
179176
}
180177
case cmd := <-executor.backlogChan:
181178
logrus.Debugf("Received backlog command (current backlog size: %d).", len(executor.backlog))
179+
if len(executor.backlog) > 50 {
180+
logrus.Warningf("Backlog size exceeded has exceed 50.")
181+
}
182182
executor.backlog = append(executor.backlog, cmd)
183183
case signal := <-executor.drainAndBacklogSig:
184184
logrus.Debugf("Received requeue signal.")
@@ -250,6 +250,7 @@ func (executor *commandExecutor[C, R]) executeCommand(ctx context.Context, req C
250250
defer executor.inflightCmds.Done()
251251
result, err := executor.command(ctx, req.Get())
252252
if err != nil {
253+
logrus.Debugf("Error executing command: %v", err)
253254
executor.errBuff.Write(err)
254255
if errors.Is(err, io.EOF) || errors.Is(err, context.Canceled) {
255256
executor.backlogChan <- req
@@ -287,6 +288,6 @@ func (executor *commandExecutor[Req, Resp]) Resume() {
287288
executor.resumeBackloggedSig.Send()
288289
}
289290

290-
func (executor *commandExecutor[C, R]) ShutdownSignaler() Signaler {
291+
func (executor *commandExecutor[C, R]) WaitForShutdown() <-chan struct{} {
291292
return executor.shutdownCompleteSig
292293
}

guardian/pkg/config/config.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,11 @@ type Config struct {
5959

6060
K8sEndpoint string `default:"https://kubernetes.default" split_words:"true"`
6161

62-
TunnelDialRetryAttempts int `default:"20" split_words:"true"`
62+
// TunnelDialRetryAttempts is the number of times to the tunnel dialer should retry before failing.
63+
// -1 means dial indefinitely.
64+
TunnelDialRetryAttempts int `default:"-1" split_words:"true"`
6365
TunnelDialRetryInterval time.Duration `default:"5s" split_words:"true"`
64-
TunnelDialTimeout time.Duration `default:"60s" split_words:"true"`
66+
TunnelDialTimeout time.Duration `default:"10s" split_words:"true"`
6567

6668
TunnelDialRecreateOnTunnelClose bool `default:"true" split_words:"true"`
6769
ConnectionRetryAttempts int `default:"25" split_words:"true"`

guardian/pkg/daemon/daemon.go

+4-23
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,7 @@ package daemon
1616

1717
import (
1818
"context"
19-
"os"
20-
"os/signal"
2119
"sync"
22-
"syscall"
2320
"time"
2421

2522
"github.com/sirupsen/logrus"
@@ -30,10 +27,11 @@ import (
3027
)
3128

3229
// Run starts the daemon, which configures and starts the services needed for guardian to run.
33-
func Run(cfg config.Config, proxyTargets []server.Target) {
30+
func Run(ctx context.Context, cfg config.Config, proxyTargets []server.Target) {
3431
tunnelDialOpts := []tunnel.DialerOption{
3532
tunnel.WithDialerRetryInterval(cfg.TunnelDialRetryInterval),
3633
tunnel.WithDialerTimeout(cfg.TunnelDialTimeout),
34+
tunnel.WithDialerRetryAttempts(cfg.TunnelDialRetryAttempts),
3735
tunnel.WithDialerKeepAliveSettings(cfg.KeepAliveEnable, time.Duration(cfg.KeepAliveInterval)*time.Millisecond),
3836
}
3937

@@ -57,8 +55,6 @@ func Run(cfg config.Config, proxyTargets []server.Target) {
5755

5856
logrus.Infof("Using server name %s", tlsConfig.ServerName)
5957

60-
ctx := GetShutdownContext()
61-
6258
dialer, err := tunnel.NewTLSSessionDialer(cfg.VoltronURL, tlsConfig, tunnelDialOpts...)
6359
if err != nil {
6460
logrus.WithError(err).Fatal("Failed to create session dialer.")
@@ -87,7 +83,7 @@ func Run(cfg config.Config, proxyTargets []server.Target) {
8783
defer wg.Done()
8884
// Allow requests to come down from the management cluster.
8985
if err := srv.ListenAndServeManagementCluster(); err != nil {
90-
logrus.WithError(err).Fatal("Serving the tunnel exited.")
86+
logrus.WithError(err).Info("Serving the tunnel exited.")
9187
}
9288
}()
9389

@@ -98,7 +94,7 @@ func Run(cfg config.Config, proxyTargets []server.Target) {
9894
defer wg.Done()
9995

10096
if err := srv.ListenAndServeCluster(); err != nil {
101-
logrus.WithError(err).Fatal("proxy tunnel exited with an error")
97+
logrus.WithError(err).Info("proxy tunnel exited with an error")
10298
}
10399
}()
104100
}
@@ -107,18 +103,3 @@ func Run(cfg config.Config, proxyTargets []server.Target) {
107103
logrus.WithError(err).Fatal("proxy tunnel exited with an error")
108104
}
109105
}
110-
111-
// GetShutdownContext creates a context that's done when either syscall.SIGINT or syscall.SIGTERM notified.
112-
func GetShutdownContext() context.Context {
113-
signalChan := make(chan os.Signal, 1)
114-
signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)
115-
116-
ctx, cancel := context.WithCancel(context.Background())
117-
go func() {
118-
<-signalChan
119-
logrus.Debug("Shutdown signal received, shutting down.")
120-
cancel()
121-
}()
122-
123-
return ctx
124-
}

guardian/pkg/server/server.go

+24-10
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
calicotls "github.com/projectcalico/calico/crypto/pkg/tls"
2828
"github.com/projectcalico/calico/guardian/pkg/conn"
2929
"github.com/projectcalico/calico/guardian/pkg/tunnel"
30+
"github.com/projectcalico/calico/lib/std/chanutil"
3031
)
3132

3233
// Server represents a server interface with methods for cluster and management cluster operations and graceful shutdown.
@@ -51,18 +52,20 @@ type server struct {
5152
listenPort string
5253
listenHost string
5354

54-
shutdownCtx context.Context
55+
shutdownCtx context.Context
56+
preemptiveShutdown chan struct{}
5557
}
5658

5759
func New(shutdownCtx context.Context, tunnelCert *tls.Certificate, dialer tunnel.SessionDialer, opts ...Option) (Server, error) {
5860
var err error
5961
srv := &server{
60-
http: new(http.Server),
61-
shutdownCtx: shutdownCtx,
62-
connRetryAttempts: 5,
63-
connRetryInterval: 2 * time.Second,
64-
listenPort: "8080",
65-
tunnelCert: tunnelCert,
62+
http: new(http.Server),
63+
shutdownCtx: shutdownCtx,
64+
connRetryAttempts: 5,
65+
connRetryInterval: 2 * time.Second,
66+
listenPort: "8080",
67+
tunnelCert: tunnelCert,
68+
preemptiveShutdown: make(chan struct{}),
6669
}
6770

6871
for _, o := range opts {
@@ -93,6 +96,7 @@ func New(shutdownCtx context.Context, tunnelCert *tls.Certificate, dialer tunnel
9396
}
9497

9598
func (srv *server) ListenAndServeManagementCluster() error {
99+
defer close(srv.preemptiveShutdown)
96100
if err := srv.tunnel.Connect(srv.shutdownCtx); err != nil {
97101
return fmt.Errorf("failed to connect to tunnel: %w", err)
98102
}
@@ -117,6 +121,7 @@ func (srv *server) ListenAndServeManagementCluster() error {
117121
}
118122

119123
func (srv *server) ListenAndServeCluster() error {
124+
defer close(srv.preemptiveShutdown)
120125
logrus.Infof("Listening on %s:%s for connections to proxy to voltron", srv.listenHost, srv.listenPort)
121126
if err := srv.tunnel.Connect(srv.shutdownCtx); err != nil {
122127
return fmt.Errorf("failed to connect to tunnel: %w", err)
@@ -153,14 +158,23 @@ func (srv *server) ListenAndServeCluster() error {
153158
}
154159

155160
func (srv *server) WaitForShutdown() error {
156-
<-srv.shutdownCtx.Done()
157-
logrus.Info("Received shutdown signal, shutting server down.")
161+
// TODO might just want to wrap the context we have.
162+
select {
163+
case <-srv.preemptiveShutdown:
164+
logrus.Info("Received preemptive shutdown signal, shutting server down.")
165+
case <-srv.shutdownCtx.Done():
166+
logrus.Info("Received shutdown signal, shutting server down.")
167+
}
158168

159169
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
160170
defer cancel()
171+
172+
logrus.Info("Waiting for tunnel to close...")
173+
chanutil.Read(ctx, srv.tunnel.WaitForClose())
174+
logrus.Info("Tunnel is closed.")
175+
161176
err := srv.http.Shutdown(ctx)
162177
logrus.Info("Server shutdown complete.")
163-
164178
return err
165179
}
166180

guardian/pkg/thirdpartymocks/net/Conn.go

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)