Skip to content

Commit ad114f8

Browse files
Merge pull request #6723 from sseago/restore-get-perf
2 parents 84734f1 + 7750e12 commit ad114f8

File tree

14 files changed

+243
-34
lines changed

14 files changed

+243
-34
lines changed

changelogs/unreleased/6723-sseago

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Perf improvements for existing resource restore

pkg/client/dynamic.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@ package client
1818

1919
import (
2020
"context"
21+
"time"
2122

2223
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2324
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2425
"k8s.io/apimachinery/pkg/runtime/schema"
2526
"k8s.io/apimachinery/pkg/types"
2627
"k8s.io/apimachinery/pkg/watch"
2728
"k8s.io/client-go/dynamic"
29+
"k8s.io/client-go/dynamic/dynamicinformer"
2830
)
2931

3032
// DynamicFactory contains methods for retrieving dynamic clients for GroupVersionResources and
@@ -33,6 +35,8 @@ type DynamicFactory interface {
3335
// ClientForGroupVersionResource returns a Dynamic client for the given group/version
3436
// and resource for the given namespace.
3537
ClientForGroupVersionResource(gv schema.GroupVersion, resource metav1.APIResource, namespace string) (Dynamic, error)
38+
// DynamicSharedInformerFactoryForNamespace returns a DynamicSharedInformerFactory for the given namespace.
39+
DynamicSharedInformerFactoryForNamespace(namespace string) dynamicinformer.DynamicSharedInformerFactory
3640
}
3741

3842
// dynamicFactory implements DynamicFactory.
@@ -51,6 +55,10 @@ func (f *dynamicFactory) ClientForGroupVersionResource(gv schema.GroupVersion, r
5155
}, nil
5256
}
5357

58+
func (f *dynamicFactory) DynamicSharedInformerFactoryForNamespace(namespace string) dynamicinformer.DynamicSharedInformerFactory {
59+
return dynamicinformer.NewFilteredDynamicSharedInformerFactory(f.dynamicClient, time.Minute, namespace, nil)
60+
}
61+
5462
// Creator creates an object.
5563
type Creator interface {
5664
// Create creates an object.

pkg/cmd/cli/install/install.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ type Options struct {
8181
DefaultVolumesToFsBackup bool
8282
UploaderType string
8383
DefaultSnapshotMoveData bool
84+
DisableInformerCache bool
8485
}
8586

8687
// BindFlags adds command line values to the options struct.
@@ -122,6 +123,7 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) {
122123
flags.BoolVar(&o.DefaultVolumesToFsBackup, "default-volumes-to-fs-backup", o.DefaultVolumesToFsBackup, "Bool flag to configure Velero server to use pod volume file system backup by default for all volumes on all backups. Optional.")
123124
flags.StringVar(&o.UploaderType, "uploader-type", o.UploaderType, fmt.Sprintf("The type of uploader to transfer the data of pod volumes, the supported values are '%s', '%s'", uploader.ResticType, uploader.KopiaType))
124125
flags.BoolVar(&o.DefaultSnapshotMoveData, "default-snapshot-move-data", o.DefaultSnapshotMoveData, "Bool flag to configure Velero server to move data by default for all snapshots supporting data movement. Optional.")
126+
flags.BoolVar(&o.DisableInformerCache, "disable-informer-cache", o.DisableInformerCache, "Disable informer cache for Get calls on restore. With this enabled, it will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is false (don't disable). Optional.")
125127
}
126128

127129
// NewInstallOptions instantiates a new, default InstallOptions struct.
@@ -149,6 +151,7 @@ func NewInstallOptions() *Options {
149151
DefaultVolumesToFsBackup: false,
150152
UploaderType: uploader.KopiaType,
151153
DefaultSnapshotMoveData: false,
154+
DisableInformerCache: true,
152155
}
153156
}
154157

@@ -213,6 +216,7 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) {
213216
DefaultVolumesToFsBackup: o.DefaultVolumesToFsBackup,
214217
UploaderType: o.UploaderType,
215218
DefaultSnapshotMoveData: o.DefaultSnapshotMoveData,
219+
DisableInformerCache: o.DisableInformerCache,
216220
}, nil
217221
}
218222

pkg/cmd/server/server.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ const (
112112
defaultCredentialsDirectory = "/tmp/credentials"
113113

114114
defaultMaxConcurrentK8SConnections = 30
115+
defaultDisableInformerCache = false
115116
)
116117

117118
type serverConfig struct {
@@ -136,6 +137,7 @@ type serverConfig struct {
136137
uploaderType string
137138
maxConcurrentK8SConnections int
138139
defaultSnapshotMoveData bool
140+
disableInformerCache bool
139141
}
140142

141143
func NewCommand(f client.Factory) *cobra.Command {
@@ -165,6 +167,7 @@ func NewCommand(f client.Factory) *cobra.Command {
165167
uploaderType: uploader.ResticType,
166168
maxConcurrentK8SConnections: defaultMaxConcurrentK8SConnections,
167169
defaultSnapshotMoveData: false,
170+
disableInformerCache: defaultDisableInformerCache,
168171
}
169172
)
170173

@@ -236,6 +239,7 @@ func NewCommand(f client.Factory) *cobra.Command {
236239
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.")
237240
command.Flags().IntVar(&config.maxConcurrentK8SConnections, "max-concurrent-k8s-connections", config.maxConcurrentK8SConnections, "Max concurrent connections number that Velero can create with kube-apiserver. Default is 30.")
238241
command.Flags().BoolVar(&config.defaultSnapshotMoveData, "default-snapshot-move-data", config.defaultSnapshotMoveData, "Move data by default for all snapshots supporting data movement.")
242+
command.Flags().BoolVar(&config.disableInformerCache, "disable-informer-cache", config.disableInformerCache, "Disable informer cache for Get calls on restore. WIth this enabled, it will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is false (don't disable).")
239243

240244
return command
241245
}
@@ -936,6 +940,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
936940
s.metrics,
937941
s.config.formatFlag.Parse(),
938942
s.config.defaultItemOperationTimeout,
943+
s.config.disableInformerCache,
939944
)
940945

941946
if err = r.SetupWithManager(s.mgr); err != nil {

pkg/controller/restore_controller.go

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ type restoreReconciler struct {
101101
logFormat logging.Format
102102
clock clock.WithTickerAndDelayedExecution
103103
defaultItemOperationTimeout time.Duration
104+
disableInformerCache bool
104105

105106
newPluginManager func(logger logrus.FieldLogger) clientmgmt.Manager
106107
backupStoreGetter persistence.ObjectBackupStoreGetter
@@ -123,6 +124,7 @@ func NewRestoreReconciler(
123124
metrics *metrics.ServerMetrics,
124125
logFormat logging.Format,
125126
defaultItemOperationTimeout time.Duration,
127+
disableInformerCache bool,
126128
) *restoreReconciler {
127129
r := &restoreReconciler{
128130
ctx: ctx,
@@ -135,6 +137,7 @@ func NewRestoreReconciler(
135137
logFormat: logFormat,
136138
clock: &clock.RealClock{},
137139
defaultItemOperationTimeout: defaultItemOperationTimeout,
140+
disableInformerCache: disableInformerCache,
138141

139142
// use variables to refer to these functions so they can be
140143
// replaced with fakes for testing.
@@ -519,13 +522,14 @@ func (r *restoreReconciler) runValidatedRestore(restore *api.Restore, info backu
519522
}
520523

521524
restoreReq := &pkgrestore.Request{
522-
Log: restoreLog,
523-
Restore: restore,
524-
Backup: info.backup,
525-
PodVolumeBackups: podVolumeBackups,
526-
VolumeSnapshots: volumeSnapshots,
527-
BackupReader: backupFile,
528-
ResourceModifiers: resourceModifiers,
525+
Log: restoreLog,
526+
Restore: restore,
527+
Backup: info.backup,
528+
PodVolumeBackups: podVolumeBackups,
529+
VolumeSnapshots: volumeSnapshots,
530+
BackupReader: backupFile,
531+
ResourceModifiers: resourceModifiers,
532+
DisableInformerCache: r.disableInformerCache,
529533
}
530534
restoreWarnings, restoreErrors := r.restorer.RestoreWithResolvers(restoreReq, actionsResolver, pluginManager)
531535

pkg/controller/restore_controller_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ func TestFetchBackupInfo(t *testing.T) {
114114
metrics.NewServerMetrics(),
115115
formatFlag,
116116
60*time.Minute,
117+
false,
117118
)
118119

119120
if test.backupStoreError == nil {
@@ -191,6 +192,7 @@ func TestProcessQueueItemSkips(t *testing.T) {
191192
metrics.NewServerMetrics(),
192193
formatFlag,
193194
60*time.Minute,
195+
false,
194196
)
195197

196198
_, err := r.Reconcile(context.Background(), ctrl.Request{NamespacedName: types.NamespacedName{
@@ -445,6 +447,7 @@ func TestRestoreReconcile(t *testing.T) {
445447
metrics.NewServerMetrics(),
446448
formatFlag,
447449
60*time.Minute,
450+
false,
448451
)
449452

450453
r.clock = clocktesting.NewFakeClock(now)
@@ -616,6 +619,7 @@ func TestValidateAndCompleteWhenScheduleNameSpecified(t *testing.T) {
616619
metrics.NewServerMetrics(),
617620
formatFlag,
618621
60*time.Minute,
622+
false,
619623
)
620624

621625
restore := &velerov1api.Restore{
@@ -708,6 +712,7 @@ func TestValidateAndCompleteWithResourceModifierSpecified(t *testing.T) {
708712
metrics.NewServerMetrics(),
709713
formatFlag,
710714
60*time.Minute,
715+
false,
711716
)
712717

713718
restore := &velerov1api.Restore{

pkg/install/deployment.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ type podTemplateConfig struct {
4848
uploaderType string
4949
defaultSnapshotMoveData bool
5050
privilegedNodeAgent bool
51+
disableInformerCache bool
5152
}
5253

5354
func WithImage(image string) podTemplateOption {
@@ -144,6 +145,12 @@ func WithDefaultSnapshotMoveData() podTemplateOption {
144145
}
145146
}
146147

148+
func WithDisableInformerCache() podTemplateOption {
149+
return func(c *podTemplateConfig) {
150+
c.disableInformerCache = true
151+
}
152+
}
153+
147154
func WithServiceAccountName(sa string) podTemplateOption {
148155
return func(c *podTemplateConfig) {
149156
c.serviceAccountName = sa
@@ -185,6 +192,10 @@ func Deployment(namespace string, opts ...podTemplateOption) *appsv1.Deployment
185192
args = append(args, "--default-snapshot-move-data=true")
186193
}
187194

195+
if c.disableInformerCache {
196+
args = append(args, "--disable-informer-cache=true")
197+
}
198+
188199
if len(c.uploaderType) > 0 {
189200
args = append(args, fmt.Sprintf("--uploader-type=%s", c.uploaderType))
190201
}

pkg/install/deployment_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,8 @@ func TestDeployment(t *testing.T) {
6464

6565
deploy = Deployment("velero", WithServiceAccountName("test-sa"))
6666
assert.Equal(t, "test-sa", deploy.Spec.Template.Spec.ServiceAccountName)
67+
68+
deploy = Deployment("velero", WithDisableInformerCache())
69+
assert.Len(t, deploy.Spec.Template.Spec.Containers[0].Args, 2)
70+
assert.Equal(t, "--disable-informer-cache=true", deploy.Spec.Template.Spec.Containers[0].Args[1])
6771
}

pkg/install/resources.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ type VeleroOptions struct {
253253
DefaultVolumesToFsBackup bool
254254
UploaderType string
255255
DefaultSnapshotMoveData bool
256+
DisableInformerCache bool
256257
}
257258

258259
func AllCRDs() *unstructured.UnstructuredList {
@@ -357,6 +358,10 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList {
357358
deployOpts = append(deployOpts, WithDefaultSnapshotMoveData())
358359
}
359360

361+
if o.DisableInformerCache {
362+
deployOpts = append(deployOpts, WithDisableInformerCache())
363+
}
364+
360365
deploy := Deployment(o.Namespace, deployOpts...)
361366

362367
if err := appendUnstructured(resources, deploy); err != nil {

pkg/restore/request.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,15 @@ func resourceKey(obj runtime.Object) string {
5151
type Request struct {
5252
*velerov1api.Restore
5353

54-
Log logrus.FieldLogger
55-
Backup *velerov1api.Backup
56-
PodVolumeBackups []*velerov1api.PodVolumeBackup
57-
VolumeSnapshots []*volume.Snapshot
58-
BackupReader io.Reader
59-
RestoredItems map[itemKey]restoredItemStatus
60-
itemOperationsList *[]*itemoperation.RestoreOperation
61-
ResourceModifiers *resourcemodifiers.ResourceModifiers
54+
Log logrus.FieldLogger
55+
Backup *velerov1api.Backup
56+
PodVolumeBackups []*velerov1api.PodVolumeBackup
57+
VolumeSnapshots []*volume.Snapshot
58+
BackupReader io.Reader
59+
RestoredItems map[itemKey]restoredItemStatus
60+
itemOperationsList *[]*itemoperation.RestoreOperation
61+
ResourceModifiers *resourcemodifiers.ResourceModifiers
62+
DisableInformerCache bool
6263
}
6364

6465
type restoredItemStatus struct {

0 commit comments

Comments
 (0)