Skip to content

feat(linstorvolumemanager): cache controller uri in a file #83

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: 3.2.12-8.3
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion drivers/LinstorSR.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from linstorvolumemanager import write_controller_uri_cache
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anything imported from sm_typing should come before any other imports. This module contains everything important for typing the project.

And more important: you must move this import in the try/catch below, otherwise if LINSTOR python packages are not installed, it can have serious repercussions on the XAPI driver detection.

from sm_typing import Optional, override

from constants import CBTLOG_TAG
Expand Down Expand Up @@ -420,6 +420,7 @@ def connect():
try:
util.SMlog('Connecting from config to LINSTOR controller using: {}'.format(ip))
create_linstor(controller_uri, attempt_count=0)
write_controller_uri_cache(controller_uri)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function might make more sense line 434.
This code is used to test to get a controller called a number of times.
If it succeed, you can just save the controller after.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No that's correct. This code part is only executed when we can't reuse the cached value. If you move it line 434, the write call would be executed multiple times for each on-slave call.

return controller_uri
except:
pass
Expand Down
39 changes: 13 additions & 26 deletions drivers/linstor-manager
Original file line number Diff line number Diff line change
Expand Up @@ -350,8 +350,7 @@ def detach(session, args):
vdi_uuid = args['vdiUuid']
group_name = args['groupName']

linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand Down Expand Up @@ -405,8 +404,7 @@ def get_vhd_info(session, args):
group_name = args['groupName']
include_parent = util.strtobool(args['includeParent'])

linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand Down Expand Up @@ -440,8 +438,7 @@ def get_parent(session, args):
device_path = args['devicePath']
group_name = args['groupName']

linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand Down Expand Up @@ -594,8 +591,7 @@ def deflate(session, args):
zeroize = util.strtobool(args['zeroize'])
group_name = args['groupName']

linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand All @@ -619,8 +615,7 @@ def lock_vdi(session, args):
if locked:
lock.acquire()

linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand Down Expand Up @@ -678,8 +673,7 @@ def add_host(session, args):
)

# 3. Ensure node doesn't exist.
linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand Down Expand Up @@ -781,8 +775,7 @@ def remove_host(session, args):
pbds[pbd_ref] = pbd

# 2. Remove node.
linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand Down Expand Up @@ -1127,8 +1120,7 @@ def create_node_interface(session, args):

ip_addr = get_ip_addr_of_pif(session, pif_uuid)

linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand All @@ -1144,8 +1136,7 @@ def destroy_node_interface(session, args):
hostname = args['hostname']
name = args['name']

linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand All @@ -1164,8 +1155,7 @@ def modify_node_interface(session, args):

ip_addr = get_ip_addr_of_pif(session, pif_uuid)

linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand All @@ -1180,8 +1170,7 @@ def list_node_interfaces(session, args):
group_name = args['groupName']
hostname = args['hostname']

linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand All @@ -1195,8 +1184,7 @@ def get_node_preferred_interface(session, args):
group_name = args['groupName']
hostname = args['hostname']

linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand All @@ -1211,8 +1199,7 @@ def set_node_preferred_interface(session, args):
hostname = args['hostname']
name = args['name']

linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand Down
7 changes: 5 additions & 2 deletions drivers/linstorjournaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@


from linstorvolumemanager import \
get_controller_uri, LinstorVolumeManager, LinstorVolumeManagerError
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you sort the imports? If the line is too long, don't hesitate to make several.

get_controller_uri, LinstorVolumeManager, LinstorVolumeManagerError, build_controller_uri_cache, \
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only delete_controller_uri_cache appear to be used

delete_controller_uri_cache
import linstor
import re
import util
Expand Down Expand Up @@ -160,8 +161,10 @@ def connect(uri):

try:
return connect(uri)
except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError):
except LinstorVolumeManagerError:
pass
except linstor.errors.LinstorNetworkError:
delete_controller_uri_cache()

return util.retry(
lambda: connect(None),
Expand Down
92 changes: 83 additions & 9 deletions drivers/linstorvolumemanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@

DRBD_BY_RES_PATH = '/dev/drbd/by-res/'

CONTROLLER_CACHE_DIRECTORY = os.environ.get('TMPDIR', '/tmp') + '/linstor'
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have dedicated places for nonpersistent SR related data in /run/nonpersistent/sm/<SR_UUID>/.
It might make sense to store it there.
@Wescoeur might have something else on his mind

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hesitated to add such a comment. But I see a problem: if we want to support multiple LINSTOR SRs, we cannot use a non-persistent directory with a specific SR UUID.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having it in /tmp/ also allows any other scripts to use it.
For example, a command above linstor using it to fill --controllers= could use it.

CONTROLLER_CACHE_FILE = 'controller_uri'
CONTROLLER_CACHE_PATH = "{}/{}".format(CONTROLLER_CACHE_DIRECTORY, CONTROLLER_CACHE_FILE)

PLUGIN = 'linstor-manager'


Expand Down Expand Up @@ -196,18 +200,57 @@ def _get_controller_uri():
# Not found, maybe we are trying to create the SR...
pass

def get_controller_uri():
retries = 0
while True:

def get_cached_controller_uri():
try:
with open(CONTROLLER_CACHE_PATH, "r") as f:
return f.read().strip()
except Exception as e:
util.SMlog('Unable to read controller uri cache file at {} : {}'.format(
CONTROLLER_CACHE_PATH,
e
))
Comment on lines +209 to +212
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
util.SMlog('Unable to read controller uri cache file at {} : {}'.format(
CONTROLLER_CACHE_PATH,
e
))
util.SMlog('Unable to read controller uri cache file `{}`: {}'.format(
CONTROLLER_CACHE_PATH,
e
))

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also: I'm not sure that's a good idea to log if the file is missing. Maybe it's better to log in case of a real error like a disk failure.

return None

def delete_controller_uri_cache():
try:
os.remove(CONTROLLER_CACHE_PATH)
except Exception as e:
util.SMlog('Unable to write controller uri cache file at {} : {}'.format(
CONTROLLER_CACHE_PATH,
e
))
Comment on lines +219 to +222
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
util.SMlog('Unable to write controller uri cache file at {} : {}'.format(
CONTROLLER_CACHE_PATH,
e
))
util.SMlog('Unable to delete controller uri cache file `{}`: {}'.format(
CONTROLLER_CACHE_PATH,
e
))

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same, don't see a reason to log if the file is not here.


def write_controller_uri_cache(uri):
try:
if not os.path.exists(CONTROLLER_CACHE_DIRECTORY):
os.makedirs(CONTROLLER_CACHE_DIRECTORY)
os.chmod(CONTROLLER_CACHE_DIRECTORY, 0o700)
if not os.path.isdir(CONTROLLER_CACHE_DIRECTORY):
raise NotADirectoryError
Comment on lines +229 to +230
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure that's important here. You will have an error raised by the next open call if we don't manipulate a file.

with open(CONTROLLER_CACHE_PATH, "w") as f:
f.write(uri)
except Exception as e:
util.SMlog('Unable to write controller uri cache file at {} : {}'.format(
CONTROLLER_CACHE_PATH,
e
))
Comment on lines +234 to +237
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
util.SMlog('Unable to write controller uri cache file at {} : {}'.format(
CONTROLLER_CACHE_PATH,
e
))
util.SMlog('Unable to write controller uri cache file `{}`: {}'.format(
CONTROLLER_CACHE_PATH,
e
))



def build_controller_uri_cache():
for _ in range(10):
uri = _get_controller_uri()
if uri:
write_controller_uri_cache(uri)
return uri

retries += 1
if retries >= 10:
break
Comment on lines -206 to -208
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand the range() call here. But in the worst case you can wait 1s for nothing when the last range value is reached.

time.sleep(1)
return None

def get_controller_uri():
uri = get_cached_controller_uri()
if not uri:
uri = build_controller_uri_cache()
return uri

def get_controller_node_name():
PLUGIN_CMD = 'hasControllerRunning'
Expand Down Expand Up @@ -429,6 +472,34 @@ def __init__(
self._volume_info_cache_dirty = True
self._build_volumes(repair=repair)

@staticmethod
def create_from_cache(
group_name, repair=False, logger=default_logger.__func__,
attempt_count=30
):
"""
Attempt to create a LinstorVolumeManager from cached data.
If it fails, refresh the cache and retry once.

:param str group_name: The SR goup name to use.
:param bool repair: If true we try to remove bad volumes due to a crash
or unexpected behavior.
:param function logger: Function to log messages.
:param int attempt_count: Number of attempts to join the controller.
"""
uri = get_cached_controller_uri()
if not uri:
uri = build_controller_uri_cache()
if not uri:
raise LinstorVolumeManagerError(
"Unable to retrieve a valid controller URI from cache or after rebuild."
)

return LinstorVolumeManager(
uri, group_name, repair=repair,
logger=logger, attempt_count=attempt_count
)

@property
def group_name(self):
"""
Expand Down Expand Up @@ -2598,7 +2669,7 @@ def _get_error_str(cls, result):

@classmethod
def _create_linstor_instance(
cls, uri, keep_uri_unmodified=False, attempt_count=30
cls, uri, keep_uri_unmodified=False, keep_cache_on_error=False, attempt_count=30
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you can remove this new keep_cache_on_error param if not used. Don't see a good use case for now.

):
retry = False

Expand All @@ -2615,8 +2686,11 @@ def connect(uri):

try:
return connect(uri)
except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError):
except LinstorVolumeManagerError:
pass
except linstor.errors.LinstorNetworkError:
if not keep_cache_on_error:
build_controller_uri_cache()

if not keep_uri_unmodified:
uri = None
Expand Down
5 changes: 2 additions & 3 deletions drivers/tapdisk-pause
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import lvmcache

try:
from linstorvhdutil import LinstorVhdUtil
from linstorvolumemanager import get_controller_uri, LinstorVolumeManager
from linstorvolumemanager import LinstorVolumeManager
LINSTOR_AVAILABLE = True
except ImportError:
LINSTOR_AVAILABLE = False
Expand Down Expand Up @@ -163,8 +163,7 @@ class Tapdisk:
dconf = session.xenapi.PBD.get_device_config(pbd)
group_name = dconf['group-name']

linstor = LinstorVolumeManager(
get_controller_uri(),
linstor = LinstorVolumeManager.create_from_cache(
group_name,
logger=util.SMlog
)
Expand Down