Skip to content

Commit 6900a74

Browse files
committed
WIP for cancelling coalesce for VDI activation
Signed-off-by: Damien Thenot <[email protected]>
1 parent d61737c commit 6900a74

File tree

3 files changed

+116
-13
lines changed

3 files changed

+116
-13
lines changed

drivers/blktap2.py

Lines changed: 87 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# blktap2: blktap/tapdisk management layer
1919
#
2020

21-
from sm_typing import Any, Callable, ClassVar, Dict, override
21+
from sm_typing import Any, Callable, ClassVar, Dict, override, List
2222

2323
from abc import abstractmethod
2424

@@ -1657,6 +1657,89 @@ def activate(self, sr_uuid, vdi_uuid, writable, caching_params):
16571657
time.sleep(1)
16581658
raise util.SMException("VDI %s locked" % vdi_uuid)
16591659

1660+
def _get_host_ref(self) -> str:
1661+
"""
1662+
Give the host ref of the one responsible for Garbage Collection for a SR.
1663+
Meaning this host for a local SR, the master for a shared SR.
1664+
"""
1665+
sr = self.target.vdi.sr
1666+
if sr.is_shared():
1667+
host_ref = util.get_master_ref(self._session)
1668+
else:
1669+
host_ref = sr.host_ref
1670+
return host_ref
1671+
1672+
def _get_chain(self, cowutil, extractUuid) -> List[str]:
1673+
vdi_chain = []
1674+
path = self.target.get_vdi_path()
1675+
1676+
#TODO: Need to add handling of error for getParentNoCheck, e.g. corrupted VDI where we can't read parent
1677+
vdi_chain.append(extractUuid(path))
1678+
parent = cowutil.getParentNoCheck(path)
1679+
while parent:
1680+
vdi_chain.append(extractUuid(parent))
1681+
parent = cowutil.getParentNoCheck(parent)
1682+
vdi_chain.reverse()
1683+
return vdi_chain
1684+
1685+
def _check_journal_coalesce_chain(self, sr_uuid: str, vdi_uuid: str) -> bool:
1686+
vdi_type = self.target.get_vdi_type()
1687+
cowutil = getCowUtil(vdi_type)
1688+
if not cowutil.isCoalesceableOnRemote(): #We only need to stop the coalesce in case of QCOW2
1689+
return True
1690+
1691+
level = 0
1692+
path = self.target.get_vdi_path()
1693+
1694+
# Different extractUUID & journaler function for LVMSR and FileSR
1695+
journaler = None
1696+
extractUuid = None
1697+
if path.startswith("/dev/"): #TODO: How to identify SR type easily, we could ask XAPI since we have the sruuid (and even ref)
1698+
from lvmcowutil import LvmCowUtil
1699+
import lvmcache
1700+
import journaler
1701+
vgName = "VG_XenStorage-{}".format(sr_uuid)
1702+
lvmCache = lvmcache.LVMCache(vgName)
1703+
journaler = journaler.Journaler(lvmCache)
1704+
1705+
extractUuid = LvmCowUtil.extractUuid
1706+
else:
1707+
from FileSR import FileVDI
1708+
import fjournaler
1709+
journaler = fjournaler.Journaler(os.getcwd())
1710+
extractUuid = FileVDI.extractUuid
1711+
1712+
# Get the VDI chain
1713+
vdi_chain = self._get_chain(cowutil, extractUuid)
1714+
1715+
if len(vdi_chain) == 1:
1716+
#We only have a leaf, do nothing
1717+
util.SMlog("VDI {} is only a leaf, continuing...".format(vdi_uuid))
1718+
return True
1719+
1720+
# Log the chain of active VDI
1721+
util.SMlog("VDI chain:")
1722+
for vdi in vdi_chain:
1723+
prefix = " " * level
1724+
level += 1
1725+
util.SMlog("{}{}".format(prefix, vdi))
1726+
1727+
vdi_to_cancel = []
1728+
for entry in journaler.getAll("coalesce").keys():
1729+
if entry in vdi_chain:
1730+
vdi_to_cancel.append(entry)
1731+
util.SMlog("Coalescing VDI {} in chain".format(entry))
1732+
1733+
# Get the host_ref from the host doing the GC work
1734+
host_ref = self._get_host_ref()
1735+
for vdi in vdi_to_cancel:
1736+
args = {"sr_uuid": sr_uuid, "vdi_uuid": vdi}
1737+
util.SMlog("Calling cancel_coalesce_master with args: {}".format(args))
1738+
self._session.xenapi.host.call_plugin(\
1739+
host_ref, PLUGIN_ON_SLAVE, "cancel_coalesce_master", args)
1740+
1741+
return True
1742+
16601743
@locking("VDIUnavailable")
16611744
def _activate_locked(self, sr_uuid, vdi_uuid, options):
16621745
"""Wraps target.activate and adds a tapdisk"""
@@ -1666,8 +1749,6 @@ def _activate_locked(self, sr_uuid, vdi_uuid, options):
16661749
if self.tap_wanted():
16671750
if not self._add_tag(vdi_uuid, not options["rdonly"]):
16681751
return False
1669-
#TODO: Need to interrupt coalesce on master, the coalesce will check for host_OpaqueRef on the VDI before trying offline coalesce
1670-
#TODO: The coalesce could happen on another slave in onlinecoalesce, interrupt coalesce on another slave (online coalesce)?
16711752
refresh = True
16721753

16731754
try:
@@ -1692,6 +1773,9 @@ def _activate_locked(self, sr_uuid, vdi_uuid, options):
16921773

16931774
vdi_type = self.target.get_vdi_type()
16941775

1776+
self._check_journal_coalesce_chain(sr_uuid, vdi_uuid)
1777+
#TODO: handling error here
1778+
16951779
# Take lvchange-p Lock before running
16961780
# tap-ctl open
16971781
# Needed to avoid race with lvchange -p which is

drivers/cleanup.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -835,8 +835,10 @@ def _call_plug_cancel(self, hostRef):
835835

836836
def _call_plugin_coalesce(self, hostRef):
837837
args = {"path": self.path, "vdi_type": self.vdi_type}
838-
self.sr.xapi.session.xenapi.host.call_plugin( \
838+
util.SMlog("DAMS: Calling remote coalesce with: {}".format(args))
839+
ret = self.sr.xapi.session.xenapi.host.call_plugin( \
839840
hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args)
841+
util.SMlog("DAMS: Remote coalesce returned {}".format(ret))
840842

841843
def _doCoalesceOnHost(self, hostRef):
842844
self.validate()
@@ -852,6 +854,7 @@ def abortTest():
852854
with open(file, "r") as f:
853855
if not f.read():
854856
#TODO: Need to call commit cancel on the hostRef if we stop
857+
util.SMlog("DAMS: Cancelling")
855858
self._call_plug_cancel(hostRef)
856859
return True
857860
except OSError as e:
@@ -862,6 +865,7 @@ def abortTest():
862865
return True
863866
return False
864867

868+
#TODO: Add exception handling here like when callinng in a runAbortable situation_doCoalesceCOWImage
865869
Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef),
866870
None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0)
867871

@@ -2476,7 +2480,6 @@ def _coalesce(self, vdi: VDI):
24762480
# journal as soon as the COW coalesce step is done, because we
24772481
# don't expect the rest of the process to take long
24782482

2479-
#TODO: Create `gc_running` in `/run/nonpersistent/sm/<sr uuid>/`
24802483
if os.path.exists(self._gc_running_file(vdi)):
24812484
util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid))
24822485

@@ -2492,14 +2495,14 @@ def _coalesce(self, vdi: VDI):
24922495
try:
24932496
if host_refs and vdi.cowutil.isCoalesceableOnRemote:
24942497
#Leaf opened on another host, we need to call online coalesce
2495-
util.SMlog("DAMS: Remote coalesce for {}".format(vdi.path))
2498+
util.SMlog("Remote coalesce for {}".format(vdi.path))
24962499
vdi._doCoalesceOnHost(list(host_refs)[0])
24972500
skipRelink = True
24982501
else:
2499-
util.SMlog("DAMS: Offline coalesce for {}".format(vdi.path))
2502+
util.SMlog("Offline coalesce for {}".format(vdi.path))
25002503
vdi._doCoalesce()
25012504
except Exception as e:
2502-
util.SMlog("DAMS: EXCEPTION {}".format(e))
2505+
util.SMlog("EXCEPTION while coalescing: {}".format(e))
25032506
self._delete_running_file(vdi)
25042507
raise
25052508
"""
@@ -2517,7 +2520,7 @@ def _coalesce(self, vdi: VDI):
25172520
if not skipRelink:
25182521
self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1")
25192522

2520-
if not skipRelink:
2523+
if not skipRelink: #TODO: we might want to let relink happen for VDI not currently in use
25212524
self.lock()
25222525
try:
25232526
vdi.parent._tagChildrenForRelink()
@@ -2752,7 +2755,7 @@ def _snapshotCoalesce(self, vdi):
27522755
return False
27532756
return True
27542757

2755-
def _liveLeafCoalesce(self, vdi) -> bool:
2758+
def _liveLeafCoalesce(self, vdi: VDI) -> bool:
27562759
util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid)
27572760
self.lock()
27582761
try:

drivers/on_slave.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,15 +163,29 @@ def refresh_lun_size_by_SCSIid(session, args):
163163
return "False"
164164

165165
def commit_tapdisk(session, args):
166-
path = args["path"]
166+
path: str = args["path"]
167167
vdi_type = args["vdi_type"]
168-
#TODO: Miss activating/changing RW, naming should reflect that it does more than coalesceing
168+
#TODO: naming should reflect that it does more than coalesceing, like setting volume RW
169+
170+
def set_RW(path):
171+
try:
172+
util.pread2(["lvchange", "-p", "rw", path])
173+
except:
174+
pass
175+
#TODO: need to make children RW. Or we let the relink happen with a refresh on master and hope it doesn't corrupt the disk
176+
if path.startswith("/dev/"):
177+
set_RW(path)
178+
169179
from cowutil import getCowUtil
170180
cowutil = getCowUtil(vdi_type)
171181
try:
182+
parent = cowutil.getParentNoCheck(path)
183+
if parent.startswith("/dev/"):
184+
set_RW(parent)
172185
return str(cowutil.coalesceOnline(path))
173186
except:
174-
return "0"
187+
util.logException("Couldn't coalesce online")
188+
raise
175189

176190
def commit_cancel(session, args):
177191
path = args["path"]
@@ -205,6 +219,7 @@ def cancel_coalesce_master(session, args):
205219

206220
# return "True"
207221

222+
util.SMlog("Running cancel_coalesce_master plugin: {}".format(vdi_uuid))
208223
path = "/run/nonpersistent/sm/{}/gc_running_{}".format(sr_uuid, vdi_uuid)
209224

210225
try:
@@ -236,4 +251,5 @@ def is_openers(session, args):
236251
"is_openers": is_openers,
237252
"commit_tapdisk": commit_tapdisk,
238253
"commit_cancel": commit_cancel,
254+
"cancel_coalesce_master": cancel_coalesce_master,
239255
})

0 commit comments

Comments
 (0)