15
15
use std:: sync:: Arc ;
16
16
use std:: time:: Instant ;
17
17
18
+ use arc_swap:: ArcSwap ;
18
19
use itertools:: Itertools ;
19
20
use prometheus:: Registry ;
20
21
use risingwave_backup:: error:: BackupError ;
21
- use risingwave_backup:: storage:: MetaSnapshotStorageRef ;
22
+ use risingwave_backup:: storage:: { BoxedMetaSnapshotStorage , ObjectStoreMetaSnapshotStorage } ;
22
23
use risingwave_backup:: { MetaBackupJobId , MetaSnapshotId , MetaSnapshotManifest } ;
23
24
use risingwave_common:: bail;
24
25
use risingwave_hummock_sdk:: HummockSstableId ;
26
+ use risingwave_object_store:: object:: object_metrics:: ObjectStoreMetrics ;
27
+ use risingwave_object_store:: object:: parse_remote_object_store;
25
28
use risingwave_pb:: backup_service:: { BackupJobStatus , MetaBackupManifestId } ;
26
29
use risingwave_pb:: meta:: subscribe_response:: { Info , Operation } ;
27
30
use tokio:: task:: JoinHandle ;
28
31
29
32
use crate :: backup_restore:: meta_snapshot_builder:: MetaSnapshotBuilder ;
30
33
use crate :: backup_restore:: metrics:: BackupManagerMetrics ;
31
34
use crate :: hummock:: { HummockManagerRef , HummockVersionSafePoint } ;
32
- use crate :: manager:: { IdCategory , MetaSrvEnv } ;
35
+ use crate :: manager:: { IdCategory , LocalNotification , MetaSrvEnv } ;
33
36
use crate :: storage:: MetaStore ;
34
37
use crate :: MetaResult ;
35
38
@@ -57,40 +60,118 @@ impl BackupJobHandle {
57
60
}
58
61
59
62
pub type BackupManagerRef < S > = Arc < BackupManager < S > > ;
63
+ /// (url, dir)
64
+ type StoreConfig = ( String , String ) ;
60
65
61
66
/// `BackupManager` manages lifecycle of all existent backups and the running backup job.
62
67
pub struct BackupManager < S : MetaStore > {
63
68
env : MetaSrvEnv < S > ,
64
69
hummock_manager : HummockManagerRef < S > ,
65
- backup_store : MetaSnapshotStorageRef ,
70
+ backup_store : ArcSwap < ( BoxedMetaSnapshotStorage , StoreConfig ) > ,
66
71
/// Tracks the running backup job. Concurrent jobs is not supported.
67
72
running_backup_job : tokio:: sync:: Mutex < Option < BackupJobHandle > > ,
68
73
metrics : BackupManagerMetrics ,
69
74
}
70
75
71
76
impl < S : MetaStore > BackupManager < S > {
72
- pub fn new (
77
+ pub async fn new (
73
78
env : MetaSrvEnv < S > ,
74
79
hummock_manager : HummockManagerRef < S > ,
75
- backup_store : MetaSnapshotStorageRef ,
76
80
registry : Registry ,
81
+ store_url : & str ,
82
+ store_dir : & str ,
83
+ ) -> MetaResult < Arc < Self > > {
84
+ let store_config = ( store_url. to_string ( ) , store_dir. to_string ( ) ) ;
85
+ let store = create_snapshot_store ( & store_config) . await ?;
86
+ tracing:: info!(
87
+ "backup manager initialized: url={}, dir={}" ,
88
+ store_config. 0 ,
89
+ store_config. 1
90
+ ) ;
91
+ let instance = Arc :: new ( Self :: with_store (
92
+ env. clone ( ) ,
93
+ hummock_manager,
94
+ registry,
95
+ ( store, store_config) ,
96
+ ) ) ;
97
+ let ( local_notification_tx, mut local_notification_rx) =
98
+ tokio:: sync:: mpsc:: unbounded_channel ( ) ;
99
+ env. notification_manager ( )
100
+ . insert_local_sender ( local_notification_tx)
101
+ . await ;
102
+ let this = instance. clone ( ) ;
103
+ tokio:: spawn ( async move {
104
+ loop {
105
+ match local_notification_rx. recv ( ) . await {
106
+ Some ( notification) => {
107
+ if let LocalNotification :: SystemParamsChange ( p) = notification {
108
+ let new_config = (
109
+ p. backup_storage_url ( ) . to_string ( ) ,
110
+ p. backup_storage_directory ( ) . to_string ( ) ,
111
+ ) ;
112
+ this. handle_new_config ( new_config) . await ;
113
+ }
114
+ }
115
+ None => {
116
+ return ;
117
+ }
118
+ }
119
+ }
120
+ } ) ;
121
+ Ok ( instance)
122
+ }
123
+
124
+ async fn handle_new_config ( & self , new_config : StoreConfig ) {
125
+ if self . backup_store . load ( ) . 1 == new_config {
126
+ return ;
127
+ }
128
+ if let Err ( e) = self . set_store ( new_config. clone ( ) ) . await {
129
+ // Retry is driven by periodic system params notification.
130
+ tracing:: warn!(
131
+ "failed to apply new backup config: url={}, dir={}, {:#?}" ,
132
+ new_config. 0 ,
133
+ new_config. 1 ,
134
+ e
135
+ ) ;
136
+ }
137
+ }
138
+
139
+ fn with_store (
140
+ env : MetaSrvEnv < S > ,
141
+ hummock_manager : HummockManagerRef < S > ,
142
+ registry : Registry ,
143
+ backup_store : ( BoxedMetaSnapshotStorage , StoreConfig ) ,
77
144
) -> Self {
78
145
Self {
79
146
env,
80
147
hummock_manager,
81
- backup_store,
148
+ backup_store : ArcSwap :: from_pointee ( backup_store ) ,
82
149
running_backup_job : tokio:: sync:: Mutex :: new ( None ) ,
83
150
metrics : BackupManagerMetrics :: new ( registry) ,
84
151
}
85
152
}
86
153
154
+ pub async fn set_store ( & self , config : StoreConfig ) -> MetaResult < ( ) > {
155
+ let new_store = create_snapshot_store ( & config) . await ?;
156
+ tracing:: info!(
157
+ "new backup config is applied: url={}, dir={}" ,
158
+ config. 0 ,
159
+ config. 1
160
+ ) ;
161
+ self . backup_store . store ( Arc :: new ( ( new_store, config) ) ) ;
162
+ Ok ( ( ) )
163
+ }
164
+
87
165
#[ cfg( test) ]
88
166
pub fn for_test ( env : MetaSrvEnv < S > , hummock_manager : HummockManagerRef < S > ) -> Self {
89
- Self :: new (
167
+ Self :: with_store (
90
168
env,
91
169
hummock_manager,
92
- Arc :: new ( risingwave_backup:: storage:: DummyMetaSnapshotStorage :: default ( ) ) ,
93
170
Registry :: new ( ) ,
171
+ (
172
+ Box :: < risingwave_backup:: storage:: DummyMetaSnapshotStorage > :: default ( ) ,
173
+ StoreConfig :: default ( ) ,
174
+ ) ,
94
175
)
95
176
}
96
177
@@ -104,6 +185,26 @@ impl<S: MetaStore> BackupManager<S> {
104
185
job. job_id
105
186
) ) ;
106
187
}
188
+ // The reasons to limit number of meta snapshot are:
189
+ // 1. limit size of `MetaSnapshotManifest`, which is kept in memory by
190
+ // `ObjectStoreMetaSnapshotStorage`.
191
+ // 2. limit number of pinned SSTs returned by
192
+ // `list_pinned_ssts`, which subsequently is used by GC.
193
+ const MAX_META_SNAPSHOT_NUM : usize = 100 ;
194
+ let current_number = self
195
+ . backup_store
196
+ . load ( )
197
+ . 0
198
+ . manifest ( )
199
+ . snapshot_metadata
200
+ . len ( ) ;
201
+ if current_number > MAX_META_SNAPSHOT_NUM {
202
+ bail ! ( format!(
203
+ "too many existent meta snapshots, expect at most {}" ,
204
+ MAX_META_SNAPSHOT_NUM
205
+ ) )
206
+ }
207
+
107
208
let job_id = self
108
209
. env
109
210
. id_gen_manager ( )
@@ -134,6 +235,8 @@ impl<S: MetaStore> BackupManager<S> {
134
235
}
135
236
if self
136
237
. backup_store
238
+ . load ( )
239
+ . 0
137
240
. manifest ( )
138
241
. snapshot_metadata
139
242
. iter ( )
@@ -160,7 +263,7 @@ impl<S: MetaStore> BackupManager<S> {
160
263
. notify_hummock_without_version (
161
264
Operation :: Update ,
162
265
Info :: MetaBackupManifestId ( MetaBackupManifestId {
163
- id : self . backup_store . manifest ( ) . manifest_id ,
266
+ id : self . backup_store . load ( ) . 0 . manifest ( ) . manifest_id ,
164
267
} ) ,
165
268
) ;
166
269
}
@@ -188,13 +291,13 @@ impl<S: MetaStore> BackupManager<S> {
188
291
189
292
/// Deletes existent backups from backup storage.
190
293
pub async fn delete_backups ( & self , ids : & [ MetaSnapshotId ] ) -> MetaResult < ( ) > {
191
- self . backup_store . delete ( ids) . await ?;
294
+ self . backup_store . load ( ) . 0 . delete ( ids) . await ?;
192
295
self . env
193
296
. notification_manager ( )
194
297
. notify_hummock_without_version (
195
298
Operation :: Update ,
196
299
Info :: MetaBackupManifestId ( MetaBackupManifestId {
197
- id : self . backup_store . manifest ( ) . manifest_id ,
300
+ id : self . backup_store . load ( ) . 0 . manifest ( ) . manifest_id ,
198
301
} ) ,
199
302
) ;
200
303
Ok ( ( ) )
@@ -203,6 +306,8 @@ impl<S: MetaStore> BackupManager<S> {
203
306
/// List all `SSTables` required by backups.
204
307
pub fn list_pinned_ssts ( & self ) -> Vec < HummockSstableId > {
205
308
self . backup_store
309
+ . load ( )
310
+ . 0
206
311
. manifest ( )
207
312
. snapshot_metadata
208
313
. iter ( )
@@ -212,7 +317,7 @@ impl<S: MetaStore> BackupManager<S> {
212
317
}
213
318
214
319
pub fn manifest ( & self ) -> Arc < MetaSnapshotManifest > {
215
- self . backup_store . manifest ( )
320
+ self . backup_store . load ( ) . 0 . manifest ( )
216
321
}
217
322
}
218
323
@@ -234,7 +339,12 @@ impl<S: MetaStore> BackupWorker<S> {
234
339
// Reuse job id as snapshot id.
235
340
snapshot_builder. build ( job_id) . await ?;
236
341
let snapshot = snapshot_builder. finish ( ) ?;
237
- backup_manager_clone. backup_store . create ( & snapshot) . await ?;
342
+ backup_manager_clone
343
+ . backup_store
344
+ . load ( )
345
+ . 0
346
+ . create ( & snapshot)
347
+ . await ?;
238
348
Ok ( BackupJobResult :: Succeeded )
239
349
} ;
240
350
tokio:: spawn ( async move {
@@ -245,3 +355,16 @@ impl<S: MetaStore> BackupWorker<S> {
245
355
} )
246
356
}
247
357
}
358
+
359
+ async fn create_snapshot_store ( config : & StoreConfig ) -> MetaResult < BoxedMetaSnapshotStorage > {
360
+ let object_store = Arc :: new (
361
+ parse_remote_object_store (
362
+ & config. 0 ,
363
+ Arc :: new ( ObjectStoreMetrics :: unused ( ) ) ,
364
+ "Meta Backup" ,
365
+ )
366
+ . await ,
367
+ ) ;
368
+ let store = ObjectStoreMetaSnapshotStorage :: new ( & config. 1 , object_store) . await ?;
369
+ Ok ( Box :: new ( store) )
370
+ }
0 commit comments