Skip to content

Commit a4fc6f9

Browse files
lisongminsqueed
authored andcommitted
feat(dhcp): Cancel backoff retry on stop
Signed-off-by: Songmin Li <[email protected]>
1 parent d61e7e5 commit a4fc6f9

File tree

5 files changed

+72
-51
lines changed

5 files changed

+72
-51
lines changed

plugins/ipam/dhcp/daemon.go

+16-13
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,21 @@ import (
3939
var errNoMoreTries = errors.New("no more tries")
4040

4141
type DHCP struct {
42-
mux sync.Mutex
43-
leases map[string]*DHCPLease
44-
hostNetnsPrefix string
45-
clientTimeout time.Duration
46-
clientResendMax time.Duration
47-
broadcast bool
42+
mux sync.Mutex
43+
leases map[string]*DHCPLease
44+
hostNetnsPrefix string
45+
clientTimeout time.Duration
46+
clientResendMax time.Duration
47+
clientResendTimeout time.Duration
48+
broadcast bool
4849
}
4950

50-
func newDHCP(clientTimeout, clientResendMax time.Duration) *DHCP {
51+
func newDHCP(clientTimeout, clientResendMax time.Duration, resendTimeout time.Duration) *DHCP {
5152
return &DHCP{
52-
leases: make(map[string]*DHCPLease),
53-
clientTimeout: clientTimeout,
54-
clientResendMax: clientResendMax,
53+
leases: make(map[string]*DHCPLease),
54+
clientTimeout: clientTimeout,
55+
clientResendMax: clientResendMax,
56+
clientResendTimeout: resendTimeout,
5557
}
5658
}
5759

@@ -90,7 +92,7 @@ func (d *DHCP) Allocate(args *skel.CmdArgs, result *current.Result) error {
9092
hostNetns := d.hostNetnsPrefix + args.Netns
9193
l, err = AcquireLease(clientID, hostNetns, args.IfName,
9294
opts,
93-
d.clientTimeout, d.clientResendMax, d.broadcast)
95+
d.clientTimeout, d.clientResendMax, d.clientResendTimeout, d.broadcast)
9496
if err != nil {
9597
return err
9698
}
@@ -190,7 +192,8 @@ func getListener(socketPath string) (net.Listener, error) {
190192

191193
func runDaemon(
192194
pidfilePath, hostPrefix, socketPath string,
193-
dhcpClientTimeout time.Duration, resendMax time.Duration, broadcast bool,
195+
dhcpClientTimeout time.Duration, resendMax time.Duration, resendTimeout time.Duration,
196+
broadcast bool,
194197
) error {
195198
// since other goroutines (on separate threads) will change namespaces,
196199
// ensure the RPC server does not get scheduled onto those
@@ -225,7 +228,7 @@ func runDaemon(
225228
done <- true
226229
}()
227230

228-
dhcp := newDHCP(dhcpClientTimeout, resendMax)
231+
dhcp := newDHCP(dhcpClientTimeout, resendMax, resendTimeout)
229232
dhcp.hostNetnsPrefix = hostPrefix
230233
dhcp.broadcast = broadcast
231234
rpc.Register(dhcp)

plugins/ipam/dhcp/dhcp2_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ var _ = Describe("DHCP Multiple Lease Operations", func() {
6666
// Start the DHCP client daemon
6767
dhcpPluginPath, err := exec.LookPath("dhcp")
6868
Expect(err).NotTo(HaveOccurred())
69-
clientCmd = exec.Command(dhcpPluginPath, "daemon", "-socketpath", socketPath)
69+
clientCmd = exec.Command(dhcpPluginPath, "daemon", "-socketpath", socketPath, "--timeout", "2s", "--resendtimeout", "8s")
7070
err = clientCmd.Start()
7171
Expect(err).NotTo(HaveOccurred())
7272
Expect(clientCmd.Process).NotTo(BeNil())

plugins/ipam/dhcp/dhcp_test.go

+16-7
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,25 @@ func getTmpDir() (string, error) {
4545
}
4646

4747
type DhcpServer struct {
48-
cmd *exec.Cmd
48+
cmd *exec.Cmd
49+
lock sync.Mutex
4950

5051
startAddr net.IP
5152
endAddr net.IP
5253
leaseTime time.Duration
5354
}
5455

5556
func (s *DhcpServer) Serve() error {
57+
if err := s.Start(); err != nil {
58+
return err
59+
}
60+
return s.cmd.Wait()
61+
}
62+
63+
func (s *DhcpServer) Start() error {
64+
s.lock.Lock()
65+
defer s.lock.Unlock()
66+
5667
s.cmd = exec.Command(
5768
"dnsmasq",
5869
"--no-daemon",
@@ -69,11 +80,9 @@ func (s *DhcpServer) Serve() error {
6980
}
7081

7182
func (s *DhcpServer) Stop() error {
72-
if err := s.cmd.Process.Kill(); err != nil {
73-
return err
74-
}
75-
_, err := s.cmd.Process.Wait()
76-
return err
83+
s.lock.Lock()
84+
defer s.lock.Unlock()
85+
return s.cmd.Process.Kill()
7786
}
7887

7988
func dhcpServerStart(netns ns.NetNS, numLeases int, stopCh <-chan bool) *sync.WaitGroup {
@@ -535,7 +544,7 @@ var _ = Describe("DHCP Lease Unavailable Operations", func() {
535544
// `go test` timeout with default delays. Since our DHCP server
536545
// and client daemon are local processes anyway, we can depend on
537546
// them to respond very quickly.
538-
clientCmd = exec.Command(dhcpPluginPath, "daemon", "-socketpath", socketPath, "-timeout", "2s", "-resendmax", "8s")
547+
clientCmd = exec.Command(dhcpPluginPath, "daemon", "-socketpath", socketPath, "-timeout", "2s", "-resendmax", "8s", "--resendtimeout", "10s")
539548

540549
// copy dhcp client's stdout/stderr to test stdout
541550
var b bytes.Buffer

plugins/ipam/dhcp/lease.go

+34-27
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,10 @@ import (
3636
// RFC 2131 suggests using exponential backoff, starting with 4sec
3737
// and randomized to +/- 1sec
3838
const (
39-
resendDelay0 = 4 * time.Second
40-
resendDelayMax = 62 * time.Second
41-
defaultLeaseTime = 60 * time.Minute
39+
resendDelay0 = 4 * time.Second
40+
resendDelayMax = 62 * time.Second
41+
defaultLeaseTime = 60 * time.Minute
42+
defaultResendTimeout = 208 * time.Second // fast resend + backoff resend
4243
)
4344

4445
// To speed up the retry for first few failures, we retry without
@@ -69,6 +70,7 @@ type DHCPLease struct {
6970
expireTime time.Time
7071
timeout time.Duration
7172
resendMax time.Duration
73+
resendTimeout time.Duration
7274
broadcast bool
7375
stopping uint32
7476
stop chan struct{}
@@ -155,23 +157,24 @@ func prepareOptions(cniArgs string, provideOptions []ProvideOption, requestOptio
155157
func AcquireLease(
156158
clientID, netns, ifName string,
157159
opts []dhcp4.Option,
158-
timeout, resendMax time.Duration, broadcast bool,
160+
timeout, resendMax time.Duration, resendTimeout time.Duration, broadcast bool,
159161
) (*DHCPLease, error) {
160162
errCh := make(chan error, 1)
161163

162164
ctx := context.Background()
163165
ctx, cancel := context.WithCancel(ctx)
164166

165167
l := &DHCPLease{
166-
clientID: clientID,
167-
stop: make(chan struct{}),
168-
check: make(chan struct{}),
169-
timeout: timeout,
170-
resendMax: resendMax,
171-
broadcast: broadcast,
172-
opts: opts,
173-
cancelFunc: cancel,
174-
ctx: ctx,
168+
clientID: clientID,
169+
stop: make(chan struct{}),
170+
check: make(chan struct{}),
171+
timeout: timeout,
172+
resendMax: resendMax,
173+
resendTimeout: resendTimeout,
174+
broadcast: broadcast,
175+
opts: opts,
176+
cancelFunc: cancel,
177+
ctx: ctx,
175178
}
176179

177180
log.Printf("%v: acquiring lease", clientID)
@@ -213,6 +216,7 @@ func AcquireLease(
213216
func (l *DHCPLease) Stop() {
214217
if atomic.CompareAndSwapUint32(&l.stopping, 0, 1) {
215218
close(l.stop)
219+
l.cancelFunc()
216220
}
217221
l.wg.Wait()
218222
}
@@ -251,9 +255,11 @@ func (l *DHCPLease) acquire() error {
251255
}
252256
defer c.Close()
253257

254-
pkt, err := backoffRetry(l.resendMax, func() (*nclient4.Lease, error) {
258+
timeoutCtx, cancel := context.WithTimeoutCause(l.ctx, l.resendTimeout, errNoMoreTries)
259+
defer cancel()
260+
pkt, err := backoffRetry(timeoutCtx, l.resendMax, func() (*nclient4.Lease, error) {
255261
return c.Request(
256-
l.ctx,
262+
timeoutCtx,
257263
withClientID(l.clientID),
258264
withAllOptions(l),
259265
)
@@ -351,9 +357,11 @@ func (l *DHCPLease) renew() error {
351357
}
352358
defer c.Close()
353359

354-
lease, err := backoffRetry(l.resendMax, func() (*nclient4.Lease, error) {
360+
timeoutCtx, cancel := context.WithTimeoutCause(l.ctx, l.resendTimeout, errNoMoreTries)
361+
defer cancel()
362+
lease, err := backoffRetry(timeoutCtx, l.resendMax, func() (*nclient4.Lease, error) {
355363
return c.Renew(
356-
l.ctx,
364+
timeoutCtx,
357365
l.latestLease,
358366
withClientID(l.clientID),
359367
withAllOptions(l),
@@ -441,7 +449,7 @@ func jitter(span time.Duration) time.Duration {
441449
return time.Duration(float64(span) * (2.0*rand.Float64() - 1.0))
442450
}
443451

444-
func backoffRetry(resendMax time.Duration, f func() (*nclient4.Lease, error)) (*nclient4.Lease, error) {
452+
func backoffRetry(ctx context.Context, resendMax time.Duration, f func() (*nclient4.Lease, error)) (*nclient4.Lease, error) {
445453
baseDelay := resendDelay0
446454
var sleepTime time.Duration
447455
fastRetryLimit := resendFastMax
@@ -462,17 +470,16 @@ func backoffRetry(resendMax time.Duration, f func() (*nclient4.Lease, error)) (*
462470

463471
log.Printf("retrying in %f seconds", sleepTime.Seconds())
464472

465-
time.Sleep(sleepTime)
466-
467-
// only adjust delay time if we are in normal backoff stage
468-
if baseDelay < resendMax && fastRetryLimit == 0 {
469-
baseDelay *= 2
470-
} else if fastRetryLimit == 0 { // only break if we are at normal delay
471-
break
473+
select {
474+
case <-ctx.Done():
475+
return nil, context.Cause(ctx)
476+
case <-time.After(sleepTime):
477+
// only adjust delay time if we are in normal backoff stage
478+
if baseDelay < resendMax && fastRetryLimit == 0 {
479+
baseDelay *= 2
480+
}
472481
}
473482
}
474-
475-
return nil, errNoMoreTries
476483
}
477484

478485
func newDHCPClient(

plugins/ipam/dhcp/main.go

+5-3
Original file line numberDiff line numberDiff line change
@@ -80,20 +80,22 @@ func main() {
8080
var broadcast bool
8181
var timeout time.Duration
8282
var resendMax time.Duration
83+
var resendTimeout time.Duration
8384
daemonFlags := flag.NewFlagSet("daemon", flag.ExitOnError)
8485
daemonFlags.StringVar(&pidfilePath, "pidfile", "", "optional path to write daemon PID to")
8586
daemonFlags.StringVar(&hostPrefix, "hostprefix", "", "optional prefix to host root")
8687
daemonFlags.StringVar(&socketPath, "socketpath", "", "optional dhcp server socketpath")
8788
daemonFlags.BoolVar(&broadcast, "broadcast", false, "broadcast DHCP leases")
88-
daemonFlags.DurationVar(&timeout, "timeout", 10*time.Second, "optional dhcp client timeout duration")
89-
daemonFlags.DurationVar(&resendMax, "resendmax", resendDelayMax, "optional dhcp client resend max duration")
89+
daemonFlags.DurationVar(&timeout, "timeout", 10*time.Second, "optional dhcp client timeout duration for each request")
90+
daemonFlags.DurationVar(&resendMax, "resendmax", resendDelayMax, "optional dhcp client max resend delay between requests")
91+
daemonFlags.DurationVar(&resendTimeout, "resendtimeout", defaultResendTimeout, "optional dhcp client resend timeout, no more retries after this timeout")
9092
daemonFlags.Parse(os.Args[2:])
9193

9294
if socketPath == "" {
9395
socketPath = defaultSocketPath
9496
}
9597

96-
if err := runDaemon(pidfilePath, hostPrefix, socketPath, timeout, resendMax, broadcast); err != nil {
98+
if err := runDaemon(pidfilePath, hostPrefix, socketPath, timeout, resendMax, resendTimeout, broadcast); err != nil {
9799
log.Print(err.Error())
98100
os.Exit(1)
99101
}

0 commit comments

Comments
 (0)