Skip to content

Commit 409990b

Browse files
committed
feat: Drop unnecessary listing for the sake of watch reinitialization
Signed-off-by: Antoni Zawodny <[email protected]>
1 parent 6b2984e commit 409990b

File tree

1 file changed

+16
-15
lines changed

1 file changed

+16
-15
lines changed

pkg/cache/cluster.go

+16-15
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,7 @@ func (c *clusterCache) loadInitialState(ctx context.Context, api kube.APIResourc
605605
}
606606

607607
func (c *clusterCache) watchEvents(ctx context.Context, api kube.APIResourceInfo, resClient dynamic.ResourceInterface, ns string, resourceVersion string) {
608+
timeoutSeconds := int64(c.watchResyncTimeout.Seconds())
608609
kube.RetryUntilSucceed(ctx, watchResourcesRetryTimeout, fmt.Sprintf("watch %s on %s", api.GroupKind, c.config.Host), c.log, func() (err error) {
609610
defer func() {
610611
if r := recover(); r != nil {
@@ -622,6 +623,7 @@ func (c *clusterCache) watchEvents(ctx context.Context, api kube.APIResourceInfo
622623

623624
w, err := watchutil.NewRetryWatcher(resourceVersion, &cache.ListWatch{
624625
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
626+
options.TimeoutSeconds = &timeoutSeconds
625627
res, err := resClient.Watch(ctx, options)
626628
if errors.IsNotFound(err) {
627629
c.stopWatching(api.GroupKind, ns)
@@ -633,30 +635,21 @@ func (c *clusterCache) watchEvents(ctx context.Context, api kube.APIResourceInfo
633635
return err
634636
}
635637

636-
defer func() {
637-
w.Stop()
638-
resourceVersion = ""
639-
}()
640-
641-
var watchResyncTimeoutCh <-chan time.Time
642-
if c.watchResyncTimeout > 0 {
643-
shouldResync := time.NewTimer(c.watchResyncTimeout)
644-
defer shouldResync.Stop()
645-
watchResyncTimeoutCh = shouldResync.C
646-
}
638+
defer w.Stop()
647639

648640
for {
649641
select {
650642
// stop watching when parent context got cancelled
651643
case <-ctx.Done():
652644
return nil
653645

654-
// re-synchronize API state and restart watch periodically
655-
case <-watchResyncTimeoutCh:
656-
return fmt.Errorf("Resyncing %s on %s due to timeout", api.GroupKind, c.config.Host)
657-
658646
// re-synchronize API state and restart watch if retry watcher failed to continue watching using provided resource version
659647
case <-w.Done():
648+
// The underlying retry watcher has stopped, possibly due to specifying an RV in
649+
// the watch request that is stale (error code 410). This forces us to relist
650+
// objects from the kube-apiserver to get a fresher RV and we invoke that relist
651+
// by resetting the locally stored RV.
652+
resourceVersion = ""
660653
return fmt.Errorf("Watch %s on %s has closed", api.GroupKind, c.config.Host)
661654

662655
case event, ok := <-w.ResultChan():
@@ -666,8 +659,16 @@ func (c *clusterCache) watchEvents(ctx context.Context, api kube.APIResourceInfo
666659

667660
obj, ok := event.Object.(*unstructured.Unstructured)
668661
if !ok {
662+
// We failed to cast the object received in the watch event to something
663+
// that contains a resource version field. Because of that, we don't know
664+
// from what RV we should reinitialize the watch connection, so in order to
665+
// avoid any inconsistencies due to accidental skipping of a potential RV,
666+
// we reset the locally stored RV to forcefully invoke the list API call to
667+
// get it from the kube-apiserver.
668+
resourceVersion = ""
669669
return fmt.Errorf("Failed to convert to *unstructured.Unstructured: %v", event.Object)
670670
}
671+
resourceVersion = obj.GetResourceVersion()
671672

672673
c.processEvent(event.Type, obj)
673674
if kube.IsCRD(obj) {

0 commit comments

Comments
 (0)