@@ -757,7 +757,7 @@ def getTreeHeight(self):
757
757
758
758
return maxChildHeight + 1
759
759
760
- def getAllLeaves (self ):
760
+ def getAllLeaves (self ) -> List [ "VDI" ] :
761
761
"Get all leaf nodes in the subtree rooted at self"
762
762
if len (self .children ) == 0 :
763
763
return [self ]
@@ -828,6 +828,57 @@ def _clear(self):
828
828
def _clearRef (self ):
829
829
self ._vdiRef = None
830
830
831
+ def _call_plug_cancel (self , hostRef ):
832
+ args = {"path" : self .path , "vdi_type" : self .vdi_type }
833
+ self .sr .xapi .session .xenapi .host .call_plugin ( \
834
+ hostRef , XAPI .PLUGIN_ON_SLAVE , "commit_cancel" , args )
835
+
836
+ def _call_plugin_coalesce (self , hostRef ):
837
+ args = {"path" : self .path , "vdi_type" : self .vdi_type }
838
+ self .sr .xapi .session .xenapi .host .call_plugin ( \
839
+ hostRef , XAPI .PLUGIN_ON_SLAVE , "commit_tapdisk" , args )
840
+
841
+ def _doCoalesceOnHost (self , hostRef ):
842
+ self .validate ()
843
+ self .parent .validate (True )
844
+ self .parent ._increaseSizeVirt (self .sizeVirt )
845
+ self .sr ._updateSlavesOnResize (self .parent )
846
+ #TODO: We might need to make the LV RW on the slave directly for coalesce?
847
+ # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them
848
+
849
+ def abortTest ():
850
+ file = self .sr ._gc_running_file (self )
851
+ try :
852
+ with open (file , "r" ) as f :
853
+ if not f .read ():
854
+ #TODO: Need to call commit cancel on the hostRef if we stop
855
+ self ._call_plug_cancel (hostRef )
856
+ return True
857
+ except OSError as e :
858
+ if e .errno == errno .ENOENT :
859
+ util .SMlog ("File {} does not exist" .format (file ))
860
+ else :
861
+ util .SMlog ("IOError: {}" .format (e ))
862
+ return True
863
+ return False
864
+
865
+ Util .runAbortable (lambda : self ._call_plugin_coalesce (hostRef ),
866
+ None , self .sr .uuid , abortTest , VDI .POLL_INTERVAL , 0 )
867
+
868
+ self .parent .validate (True )
869
+ #self._verifyContents(0)
870
+ self .parent .updateBlockInfo ()
871
+
872
+ def _isOpenOnHosts (self ) -> Optional [str ]:
873
+ for pbdRecord in self .sr .xapi .getAttachedPBDs ():
874
+ hostRef = pbdRecord ["host" ]
875
+ args = {"path" : self .path }
876
+ is_openers = util .strtobool (self .sr .xapi .session .xenapi .host .call_plugin ( \
877
+ hostRef , XAPI .PLUGIN_ON_SLAVE , "is_openers" , args ))
878
+ if is_openers :
879
+ return hostRef
880
+ return None
881
+
831
882
def _doCoalesce (self ) -> None :
832
883
"""Coalesce self onto parent. Only perform the actual coalescing of
833
884
an image, but not the subsequent relinking. We'll do that as the next step,
@@ -914,7 +965,7 @@ def coalesce(self) -> int:
914
965
return self .cowutil .coalesce (self .path )
915
966
916
967
@staticmethod
917
- def _doCoalesceCowImage (vdi ):
968
+ def _doCoalesceCowImage (vdi : "VDI" ):
918
969
try :
919
970
startTime = time .time ()
920
971
allocated_size = vdi .getAllocatedSize ()
@@ -943,7 +994,21 @@ def _vdi_is_raw(self, vdi_path):
943
994
944
995
def _coalesceCowImage (self , timeOut ):
945
996
Util .log (" Running COW coalesce on %s" % self )
946
- abortTest = lambda : IPCFlag (self .sr .uuid ).test (FLAG_TYPE_ABORT )
997
+ def abortTest ():
998
+ if self .cowutil .isCoalesceableOnRemote ():
999
+ file = self .sr ._gc_running_file (self )
1000
+ try :
1001
+ with open (file , "r" ) as f :
1002
+ if not f .read ():
1003
+ return True
1004
+ except OSError as e :
1005
+ if e .errno == errno .ENOENT :
1006
+ util .SMlog ("File {} does not exist" .format (file ))
1007
+ else :
1008
+ util .SMlog ("IOError: {}" .format (e ))
1009
+ return True
1010
+ return IPCFlag (self .sr .uuid ).test (FLAG_TYPE_ABORT )
1011
+
947
1012
try :
948
1013
util .fistpoint .activate_custom_fn (
949
1014
"cleanup_coalesceVHD_inject_failure" ,
@@ -2383,7 +2448,24 @@ def cleanupJournals(self, dryRun=False):
2383
2448
def cleanupCache (self , maxAge = - 1 ) -> int :
2384
2449
return 0
2385
2450
2386
- def _coalesce (self , vdi ):
2451
+ def _hasLeavesAttachedOn (self , vdi : VDI ):
2452
+ leaves = vdi .getAllLeaves ()
2453
+ leaves_vdi = [leaf .uuid for leaf in leaves ]
2454
+ return util .get_hosts_attached_on (self .xapi .session , leaves_vdi )
2455
+
2456
+ def _gc_running_file (self , vdi : VDI ):
2457
+ run_file = "gc_running_{}" .format (vdi .uuid )
2458
+ return os .path .join (NON_PERSISTENT_DIR , str (self .uuid ), run_file )
2459
+
2460
+ def _create_running_file (self , vdi : VDI ):
2461
+ with open (self ._gc_running_file (vdi ), "w" ) as f :
2462
+ f .write ("1" )
2463
+
2464
+ def _delete_running_file (self , vdi : VDI ):
2465
+ os .unlink (self ._gc_running_file (vdi ))
2466
+
2467
+ def _coalesce (self , vdi : VDI ):
2468
+ skipRelink = False
2387
2469
if self .journaler .get (vdi .JRN_RELINK , vdi .uuid ):
2388
2470
# this means we had done the actual coalescing already and just
2389
2471
# need to finish relinking and/or refreshing the children
@@ -2393,8 +2475,37 @@ def _coalesce(self, vdi):
2393
2475
# order to decide whether to abort the coalesce. We remove the
2394
2476
# journal as soon as the COW coalesce step is done, because we
2395
2477
# don't expect the rest of the process to take long
2478
+
2479
+ #TODO: Create `gc_running` in `/run/nonpersistent/sm/<sr uuid>/`
2480
+ if os .path .exists (self ._gc_running_file (vdi )):
2481
+ util .SMlog ("gc_running already exist for {}. Ignoring..." .format (self .uuid ))
2482
+
2483
+ self ._create_running_file (vdi )
2484
+
2396
2485
self .journaler .create (vdi .JRN_COALESCE , vdi .uuid , "1" )
2397
- vdi ._doCoalesce ()
2486
+ host_refs = self ._hasLeavesAttachedOn (vdi )
2487
+ #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time
2488
+ if len (host_refs ) > 1 :
2489
+ util .SMlog ("Not coalesceable, chain activated more than once" )
2490
+ raise Exception ("Not coalesceable, chain activated more than once" ) #TODO: Use correct error
2491
+
2492
+ try :
2493
+ if host_refs and vdi .cowutil .isCoalesceableOnRemote :
2494
+ #Leaf opened on another host, we need to call online coalesce
2495
+ util .SMlog ("DAMS: Remote coalesce for {}" .format (vdi .path ))
2496
+ vdi ._doCoalesceOnHost (list (host_refs )[0 ])
2497
+ skipRelink = True
2498
+ else :
2499
+ util .SMlog ("DAMS: Offline coalesce for {}" .format (vdi .path ))
2500
+ vdi ._doCoalesce ()
2501
+ except Exception as e :
2502
+ util .SMlog ("DAMS: EXCEPTION {}" .format (e ))
2503
+ self ._delete_running_file (vdi )
2504
+ raise
2505
+ """
2506
+ vdi._doCoalesce will call vdi._coalesceCowImage (after doing other things).
2507
+ It will then call VDI._doCoalesceCowImage in a runAbortable context
2508
+ """
2398
2509
self .journaler .remove (vdi .JRN_COALESCE , vdi .uuid )
2399
2510
2400
2511
util .fistpoint .activate ("LVHDRT_before_create_relink_journal" , self .uuid )
@@ -2403,19 +2514,22 @@ def _coalesce(self, vdi):
2403
2514
# like SM.clone from manipulating the VDIs we'll be relinking and
2404
2515
# rescan the SR first in case the children changed since the last
2405
2516
# scan
2406
- self .journaler .create (vdi .JRN_RELINK , vdi .uuid , "1" )
2517
+ if not skipRelink :
2518
+ self .journaler .create (vdi .JRN_RELINK , vdi .uuid , "1" )
2407
2519
2408
- self .lock ()
2409
- try :
2410
- vdi .parent ._tagChildrenForRelink ()
2411
- self .scan ()
2412
- vdi ._relinkSkip ()
2413
- finally :
2414
- self .unlock ()
2415
- # Reload the children to leave things consistent
2416
- vdi .parent ._reloadChildren (vdi )
2520
+ if not skipRelink :
2521
+ self .lock ()
2522
+ try :
2523
+ vdi .parent ._tagChildrenForRelink ()
2524
+ self .scan ()
2525
+ vdi ._relinkSkip ()
2526
+ finally :
2527
+ self .unlock ()
2528
+ # Reload the children to leave things consistent
2529
+ vdi .parent ._reloadChildren (vdi )
2530
+ self .journaler .remove (vdi .JRN_RELINK , vdi .uuid )
2417
2531
2418
- self .journaler . remove (vdi . JRN_RELINK , vdi . uuid )
2532
+ self ._delete_running_file (vdi )
2419
2533
self .deleteVDI (vdi )
2420
2534
2421
2535
class CoalesceTracker :
@@ -2655,6 +2769,7 @@ def _liveLeafCoalesce(self, vdi) -> bool:
2655
2769
try :
2656
2770
try :
2657
2771
# "vdi" object will no longer be valid after this call
2772
+ self ._create_running_file (vdi )
2658
2773
self ._doCoalesceLeaf (vdi )
2659
2774
except :
2660
2775
Util .logException ("_doCoalesceLeaf" )
@@ -2664,6 +2779,7 @@ def _liveLeafCoalesce(self, vdi) -> bool:
2664
2779
vdi = self .getVDI (uuid )
2665
2780
if vdi :
2666
2781
vdi .ensureUnpaused ()
2782
+ self ._delete_running_file (vdi )
2667
2783
vdiOld = self .getVDI (self .TMP_RENAME_PREFIX + uuid )
2668
2784
if vdiOld :
2669
2785
util .fistpoint .activate ("LVHDRT_coaleaf_before_delete" , self .uuid )
@@ -2675,7 +2791,7 @@ def _liveLeafCoalesce(self, vdi) -> bool:
2675
2791
self .logFilter .logState ()
2676
2792
return True
2677
2793
2678
- def _doCoalesceLeaf (self , vdi ):
2794
+ def _doCoalesceLeaf (self , vdi : VDI ):
2679
2795
"""Actual coalescing of a leaf VDI onto parent. Must be called in an
2680
2796
offline/atomic context"""
2681
2797
self .journaler .create (VDI .JRN_LEAF , vdi .uuid , vdi .parent .uuid )
@@ -3718,8 +3834,7 @@ def _gc_init_file(sr_uuid):
3718
3834
3719
3835
def _create_init_file (sr_uuid ):
3720
3836
util .makedirs (os .path .join (NON_PERSISTENT_DIR , str (sr_uuid )))
3721
- with open (os .path .join (
3722
- NON_PERSISTENT_DIR , str (sr_uuid ), 'gc_init' ), 'w+' ) as f :
3837
+ with open (os .path .join (_gc_init_file (sr_uuid )), 'w+' ) as f :
3723
3838
f .write ('1' )
3724
3839
3725
3840
@@ -3748,7 +3863,7 @@ def abortTest():
3748
3863
Util .log ("GC active, quiet period ended" )
3749
3864
3750
3865
3751
- def _gcLoop (sr , dryRun = False , immediate = False ):
3866
+ def _gcLoop (sr : SR , dryRun = False , immediate = False ):
3752
3867
if not lockGCActive .acquireNoblock ():
3753
3868
Util .log ("Another GC instance already active, exiting" )
3754
3869
return
0 commit comments