@@ -757,7 +757,7 @@ def getTreeHeight(self):
757
757
758
758
return maxChildHeight + 1
759
759
760
- def getAllLeaves (self ):
760
+ def getAllLeaves (self ) -> List [ "VDI" ] :
761
761
"Get all leaf nodes in the subtree rooted at self"
762
762
if len (self .children ) == 0 :
763
763
return [self ]
@@ -828,6 +828,57 @@ def _clear(self):
828
828
def _clearRef (self ):
829
829
self ._vdiRef = None
830
830
831
+ def _call_plug_cancel (self , hostRef ):
832
+ args = {"path" : self .path , "vdi_type" : self .vdi_type }
833
+ self .sr .xapi .session .xenapi .host .call_plugin ( \
834
+ hostRef , XAPI .PLUGIN_ON_SLAVE , "commit_cancel" , args )
835
+
836
+ def _call_plugin_coalesce (self , hostRef ):
837
+ args = {"path" : self .path , "vdi_type" : self .vdi_type }
838
+ self .sr .xapi .session .xenapi .host .call_plugin ( \
839
+ hostRef , XAPI .PLUGIN_ON_SLAVE , "commit_tapdisk" , args )
840
+
841
+ def _doCoalesceOnHost (self , hostRef ):
842
+ self .validate ()
843
+ self .parent .validate (True )
844
+ self .parent ._increaseSizeVirt (self .sizeVirt )
845
+ self .sr ._updateSlavesOnResize (self .parent )
846
+ #TODO: We might need to make the LV RW on the slave directly for coalesce?
847
+ # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them
848
+
849
+ def abortTest ():
850
+ file = self .sr ._gc_running_file (self )
851
+ try :
852
+ with open (file , "r" ) as f :
853
+ if not f .read ():
854
+ #TODO: Need to call commit cancel on the hostRef if we stop
855
+ self ._call_plug_cancel (hostRef )
856
+ return True
857
+ except OSError as e :
858
+ if e .errno == errno .ENOENT :
859
+ util .SMlog ("File {} does not exist" .format (file ))
860
+ else :
861
+ util .SMlog ("IOError: {}" .format (e ))
862
+ return True
863
+ return False
864
+
865
+ Util .runAbortable (lambda : self ._call_plugin_coalesce (hostRef ),
866
+ None , self .sr .uuid , abortTest , VDI .POLL_INTERVAL , 0 )
867
+
868
+ self .parent .validate (True )
869
+ #self._verifyContents(0)
870
+ self .parent .updateBlockInfo ()
871
+
872
+ def _isOpenOnHosts (self ) -> Optional [str ]:
873
+ for pbdRecord in self .sr .xapi .getAttachedPBDs ():
874
+ hostRef = pbdRecord ["host" ]
875
+ args = {"path" : self .path }
876
+ is_openers = util .strtobool (self .sr .xapi .session .xenapi .host .call_plugin ( \
877
+ hostRef , XAPI .PLUGIN_ON_SLAVE , "is_openers" , args ))
878
+ if is_openers :
879
+ return hostRef
880
+ return None
881
+
831
882
def _doCoalesce (self ) -> None :
832
883
"""Coalesce self onto parent. Only perform the actual coalescing of
833
884
an image, but not the subsequent relinking. We'll do that as the next step,
@@ -914,7 +965,7 @@ def coalesce(self) -> int:
914
965
return self .cowutil .coalesce (self .path )
915
966
916
967
@staticmethod
917
- def _doCoalesceCowImage (vdi ):
968
+ def _doCoalesceCowImage (vdi : "VDI" ):
918
969
try :
919
970
startTime = time .time ()
920
971
allocated_size = vdi .getAllocatedSize ()
@@ -943,7 +994,21 @@ def _vdi_is_raw(self, vdi_path):
943
994
944
995
def _coalesceCowImage (self , timeOut ):
945
996
Util .log (" Running COW coalesce on %s" % self )
946
- abortTest = lambda : IPCFlag (self .sr .uuid ).test (FLAG_TYPE_ABORT )
997
+ def abortTest ():
998
+ if self .cowutil .isCoalesceableOnRemote ():
999
+ file = self .sr ._gc_running_file (self )
1000
+ try :
1001
+ with open (file , "r" ) as f :
1002
+ if not f .read ():
1003
+ return True
1004
+ except OSError as e :
1005
+ if e .errno == errno .ENOENT :
1006
+ util .SMlog ("File {} does not exist" .format (file ))
1007
+ else :
1008
+ util .SMlog ("IOError: {}" .format (e ))
1009
+ return True
1010
+ return IPCFlag (self .sr .uuid ).test (FLAG_TYPE_ABORT )
1011
+
947
1012
try :
948
1013
util .fistpoint .activate_custom_fn (
949
1014
"cleanup_coalesceVHD_inject_failure" ,
@@ -2383,7 +2448,17 @@ def cleanupJournals(self, dryRun=False):
2383
2448
def cleanupCache (self , maxAge = - 1 ) -> int :
2384
2449
return 0
2385
2450
2386
- def _coalesce (self , vdi ):
2451
+ def _hasLeavesAttachedOn (self , vdi : VDI ):
2452
+ leaves = vdi .getAllLeaves ()
2453
+ leaves_vdi = [leaf .uuid for leaf in leaves ]
2454
+ return util .get_hosts_attached_on (self .xapi .session , leaves_vdi )
2455
+
2456
+ def _gc_running_file (self , vdi ):
2457
+ run_file = "gc_running_{}" .format (vdi .uuid )
2458
+ return os .path .join (NON_PERSISTENT_DIR , str (self .uuid ), run_file )
2459
+
2460
+ def _coalesce (self , vdi : VDI ):
2461
+ skipRelink = False
2387
2462
if self .journaler .get (vdi .JRN_RELINK , vdi .uuid ):
2388
2463
# this means we had done the actual coalescing already and just
2389
2464
# need to finish relinking and/or refreshing the children
@@ -2393,8 +2468,35 @@ def _coalesce(self, vdi):
2393
2468
# order to decide whether to abort the coalesce. We remove the
2394
2469
# journal as soon as the COW coalesce step is done, because we
2395
2470
# don't expect the rest of the process to take long
2471
+
2472
+ #TODO: Create `gc_running` in `/run/nonpersistent/sm/<sr uuid>/`
2473
+ if os .path .exists (self ._gc_running_file (vdi )):
2474
+ util .SMlog ("gc_running already exist for {}. Ignoring..." .format (self .uuid ))
2475
+
2476
+ with open (self ._gc_running_file (vdi ), "w" ) as f :
2477
+ f .write ("1" )
2478
+
2396
2479
self .journaler .create (vdi .JRN_COALESCE , vdi .uuid , "1" )
2397
- vdi ._doCoalesce ()
2480
+ host_refs = self ._hasLeavesAttachedOn (vdi )
2481
+ #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time
2482
+ if len (host_refs ) > 1 :
2483
+ util .SMlog ("Not coalesceable, chain activated more than once" )
2484
+ raise Exception ("Not coalesceable, chain activated more than once" ) #TODO: Use correct error
2485
+
2486
+ try :
2487
+ if host_refs and vdi .cowutil .isCoalesceableOnRemote :
2488
+ #Leaf opened on another host, we need to call online coalesce
2489
+ vdi ._doCoalesceOnHost (host_refs [0 ])
2490
+ skipRelink = True
2491
+ else :
2492
+ vdi ._doCoalesce ()
2493
+ except :
2494
+ os .unlink (self ._gc_running_file (vdi ))
2495
+ raise
2496
+ """
2497
+ vdi._doCoalesce will call vdi._coalesceCowImage (after doing other things).
2498
+ It will then call VDI._doCoalesceCowImage in a runAbortable context
2499
+ """
2398
2500
self .journaler .remove (vdi .JRN_COALESCE , vdi .uuid )
2399
2501
2400
2502
util .fistpoint .activate ("LVHDRT_before_create_relink_journal" , self .uuid )
@@ -2403,19 +2505,22 @@ def _coalesce(self, vdi):
2403
2505
# like SM.clone from manipulating the VDIs we'll be relinking and
2404
2506
# rescan the SR first in case the children changed since the last
2405
2507
# scan
2406
- self .journaler .create (vdi .JRN_RELINK , vdi .uuid , "1" )
2508
+ if not skipRelink :
2509
+ self .journaler .create (vdi .JRN_RELINK , vdi .uuid , "1" )
2407
2510
2408
- self .lock ()
2409
- try :
2410
- vdi .parent ._tagChildrenForRelink ()
2411
- self .scan ()
2412
- vdi ._relinkSkip ()
2413
- finally :
2414
- self .unlock ()
2415
- # Reload the children to leave things consistent
2416
- vdi .parent ._reloadChildren (vdi )
2511
+ if not skipRelink :
2512
+ self .lock ()
2513
+ try :
2514
+ vdi .parent ._tagChildrenForRelink ()
2515
+ self .scan ()
2516
+ vdi ._relinkSkip ()
2517
+ finally :
2518
+ self .unlock ()
2519
+ # Reload the children to leave things consistent
2520
+ vdi .parent ._reloadChildren (vdi )
2521
+ self .journaler .remove (vdi .JRN_RELINK , vdi .uuid )
2417
2522
2418
- self .journaler . remove (vdi . JRN_RELINK , vdi . uuid )
2523
+ os . unlink ( self ._gc_running_file (vdi ) )
2419
2524
self .deleteVDI (vdi )
2420
2525
2421
2526
class CoalesceTracker :
@@ -3718,8 +3823,7 @@ def _gc_init_file(sr_uuid):
3718
3823
3719
3824
def _create_init_file (sr_uuid ):
3720
3825
util .makedirs (os .path .join (NON_PERSISTENT_DIR , str (sr_uuid )))
3721
- with open (os .path .join (
3722
- NON_PERSISTENT_DIR , str (sr_uuid ), 'gc_init' ), 'w+' ) as f :
3826
+ with open (os .path .join (_gc_init_file (sr_uuid )), 'w+' ) as f :
3723
3827
f .write ('1' )
3724
3828
3725
3829
@@ -3748,7 +3852,7 @@ def abortTest():
3748
3852
Util .log ("GC active, quiet period ended" )
3749
3853
3750
3854
3751
- def _gcLoop (sr , dryRun = False , immediate = False ):
3855
+ def _gcLoop (sr : SR , dryRun = False , immediate = False ):
3752
3856
if not lockGCActive .acquireNoblock ():
3753
3857
Util .log ("Another GC instance already active, exiting" )
3754
3858
return
0 commit comments