Skip to content

Commit 1bf2a61

Browse files
authored
[ctrmgr]: Container image clean up bug fix (#15772)
Why I did it When do clean up container images, current code has two bugs need to be fixed. And some variables' name maybe cause confused, change the variables' name. Work item tracking Microsoft ADO (number only): 24502294 How I did it We do clean up after tag latest successfully. But currently tag latest function only return 0 and 1, 0 means succeed and 1 means failed, when we get 1, we will retry, when we get 0, we will do clean up. Actually the code 0 includes another case we don't need to do clean up. The case is that when we are doing tag latest, the container image we want to tag maybe not running, so we can not tag latest and don't need to cleanup, we need to separate this case from 0, return -1 now. When local mode(v1) -> kube mode(v2) happens, one problem is how to handle the local image, there are two cases. one case is that there was one kube v1 container dry-run(cause we don't relace the local if kube version = local version), we will remove the kube v1 image and tag the local version with ACR prefix and remove local v1 local tag. Another case is that there was no kube v1 container dry-run, we remove the local v1 image directly, cause the local v1 image should not be the last desire version. About the docker_id variable, it may cause confused, it's actually docker image id, so rename the variable. About the two dicts and the list, rename them to be more readable. How to verify it Check tag latest and image clean up result.
1 parent df13380 commit 1bf2a61

File tree

3 files changed

+55
-25
lines changed

3 files changed

+55
-25
lines changed

src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ def on_state_update(self, key, op, data):
590590

591591
def do_tag_latest(self, feat, docker_id, image_ver):
592592
ret = kube_commands.tag_latest(feat, docker_id, image_ver)
593-
if ret != 0:
593+
if ret == 1:
594594
# Tag latest failed. Retry after an interval
595595
self.start_time = datetime.datetime.now()
596596
self.start_time += datetime.timedelta(
@@ -599,7 +599,7 @@ def do_tag_latest(self, feat, docker_id, image_ver):
599599

600600
log_debug("Tag latest as local failed retry after {} seconds @{}".
601601
format(remote_ctr_config[TAG_RETRY], self.start_time))
602-
else:
602+
elif ret == 0:
603603
last_version = self.st_data[feat][ST_FEAT_CTR_STABLE_VER]
604604
if last_version == image_ver:
605605
last_version = self.st_data[feat][ST_FEAT_CTR_LAST_VER]
@@ -609,6 +609,10 @@ def do_tag_latest(self, feat, docker_id, image_ver):
609609
self.st_data[ST_FEAT_CTR_LAST_VER] = last_version
610610
self.st_data[ST_FEAT_CTR_STABLE_VER] = image_ver
611611
self.do_clean_image(feat, image_ver, last_version)
612+
elif ret == -1:
613+
# This means the container we want to tag latest is not running
614+
# so we don't need to do clean up
615+
pass
612616

613617
def do_clean_image(self, feat, current_version, last_version):
614618
ret = kube_commands.clean_image(feat, current_version, last_version)

src/sonic-ctrmgrd/ctrmgr/kube_commands.py

+28-21
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ def tag_latest(feat, docker_id, image_ver):
478478
else:
479479
log_error(err)
480480
elif ret == -1:
481-
ret = 0
481+
log_debug(out)
482482
else:
483483
log_error(err)
484484
return ret
@@ -487,31 +487,38 @@ def _do_clean(feat, current_version, last_version):
487487
err = ""
488488
out = ""
489489
ret = 0
490-
DOCKER_ID = "docker_id"
490+
IMAGE_ID = "image_id"
491491
REPO = "repo"
492492
_, image_info, err = _run_command("docker images |grep {} |grep -v latest |awk '{{print $1,$2,$3}}'".format(feat))
493493
if image_info:
494-
version_dict = {}
495-
version_dict_default = {}
494+
remote_image_version_dict = {}
495+
local_image_version_dict = {}
496496
for info in image_info.split("\n"):
497-
rep, version, docker_id = info.split()
497+
rep, version, image_id = info.split()
498498
if len(rep.split("/")) == 1:
499-
version_dict_default[version] = {DOCKER_ID: docker_id, REPO: rep}
499+
local_image_version_dict[version] = {IMAGE_ID: image_id, REPO: rep}
500500
else:
501-
version_dict[version] = {DOCKER_ID: docker_id, REPO: rep}
501+
remote_image_version_dict[version] = {IMAGE_ID: image_id, REPO: rep}
502502

503-
if current_version in version_dict:
504-
image_prefix = version_dict[current_version][REPO]
505-
del version_dict[current_version]
503+
if current_version in remote_image_version_dict:
504+
image_prefix = remote_image_version_dict[current_version][REPO]
505+
del remote_image_version_dict[current_version]
506506
else:
507507
out = "Current version {} doesn't exist.".format(current_version)
508508
ret = 0
509509
return ret, out, err
510-
# should be only one item in version_dict_default
511-
for k, v in version_dict_default.items():
512-
local_version, local_repo, local_docker_id = k, v[REPO], v[DOCKER_ID]
513-
tag_res, _, err = _run_command("docker tag {} {}:{} && docker rmi {}:{}".format(
514-
local_docker_id, image_prefix, local_version, local_repo, local_version))
510+
# should be only one item in local_image_version_dict
511+
for k, v in local_image_version_dict.items():
512+
local_version, local_repo, local_image_id = k, v[REPO], v[IMAGE_ID]
513+
# if there is a kube image with same version, need to remove the kube version
514+
# and tag the local version to kube version for fallback preparation
515+
# and remove the local version
516+
if local_version in remote_image_version_dict:
517+
tag_res, _, err = _run_command("docker rmi {}:{} && docker tag {} {}:{} && docker rmi {}:{}".format(
518+
image_prefix, local_version, local_image_id, image_prefix, local_version, local_repo, local_version))
519+
# if there is no kube image with same version, just remove the local version
520+
else:
521+
tag_res, _, err = _run_command("docker rmi {}:{}".format(local_repo, local_version))
515522
if tag_res == 0:
516523
msg = "Tag {} local version images successfully".format(feat)
517524
log_debug(msg)
@@ -520,12 +527,12 @@ def _do_clean(feat, current_version, last_version):
520527
err = "Failed to tag {} local version images. Err: {}".format(feat, err)
521528
return ret, out, err
522529

523-
if last_version in version_dict:
524-
del version_dict[last_version]
530+
if last_version in remote_image_version_dict:
531+
del remote_image_version_dict[last_version]
525532

526-
versions = [item[DOCKER_ID] for item in version_dict.values()]
527-
if versions:
528-
clean_res, _, err = _run_command("docker rmi {} --force".format(" ".join(versions)))
533+
image_id_remove_list = [item[IMAGE_ID] for item in remote_image_version_dict.values()]
534+
if image_id_remove_list:
535+
clean_res, _, err = _run_command("docker rmi {} --force".format(" ".join(image_id_remove_list)))
529536
else:
530537
clean_res = 0
531538
if clean_res == 0:
@@ -534,7 +541,7 @@ def _do_clean(feat, current_version, last_version):
534541
err = "Failed to clean {} old version images. Err: {}".format(feat, err)
535542
ret = 1
536543
else:
537-
err = "Failed to docker images |grep {} |awk '{{print $3}}'".format(feat)
544+
err = "Failed to docker images |grep {} |awk '{{print $3}}'. Error: {}".format(feat, err)
538545
ret = 1
539546

540547
return ret, out, err

src/sonic-ctrmgrd/tests/kube_commands_test.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@
266266
},
267267
2: {
268268
common_test.DESCR: "Tag a unstable container",
269-
common_test.RETVAL: 0,
269+
common_test.RETVAL: -1,
270270
common_test.ARGS: ["snmp", "123456", "v1"],
271271
common_test.PROC_CMD: [
272272
"docker ps |grep 123456"
@@ -382,7 +382,7 @@
382382
common_test.ARGS: ["snmp", "20201231.84", ""],
383383
common_test.PROC_CMD: [
384384
"docker images |grep snmp |grep -v latest |awk '{print $1,$2,$3}'",
385-
"docker tag 507f8d28bf6e sonick8scue.azurecr.io/docker-sonic-telemetry:20201231.74 && docker rmi docker-sonic-telemetry:20201231.74"
385+
"docker rmi docker-sonic-telemetry:20201231.74"
386386
],
387387
common_test.PROC_OUT: [
388388
"docker-sonic-telemetry 20201231.74 507f8d28bf6e\n\
@@ -394,6 +394,25 @@
394394
0
395395
]
396396
},
397+
5: {
398+
common_test.DESCR: "Clean image successfuly(local to dry-kube to kube)",
399+
common_test.RETVAL: 0,
400+
common_test.ARGS: ["snmp", "20201231.84", "20201231.74"],
401+
common_test.PROC_CMD: [
402+
"docker images |grep snmp |grep -v latest |awk '{print $1,$2,$3}'",
403+
"docker rmi sonick8scue.azurecr.io/docker-sonic-telemetry:20201231.74 && docker tag 507f8d28bf6e sonick8scue.azurecr.io/docker-sonic-telemetry:20201231.74 && docker rmi docker-sonic-telemetry:20201231.74"
404+
],
405+
common_test.PROC_OUT: [
406+
"docker-sonic-telemetry 20201231.74 507f8d28bf6e\n\
407+
sonick8scue.azurecr.io/docker-sonic-telemetry 20201231.74 507f8d28bf6f\n\
408+
sonick8scue.azurecr.io/docker-sonic-telemetry 20201231.84 507f8d28bf6g",
409+
""
410+
],
411+
common_test.PROC_CODE: [
412+
0,
413+
0
414+
]
415+
},
397416
}
398417

399418
class TestKubeCommands(object):

0 commit comments

Comments
 (0)