Skip to content

Commit

Permalink
[ctrmgr]: Container image clean up bug fix (sonic-net#15772) (sonic-n…
Browse files Browse the repository at this point in the history
  • Loading branch information
mssonicbld authored Jul 19, 2023
1 parent 74598e5 commit 38e721b
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 25 deletions.
8 changes: 6 additions & 2 deletions src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ def on_state_update(self, key, op, data):

def do_tag_latest(self, feat, docker_id, image_ver):
ret = kube_commands.tag_latest(feat, docker_id, image_ver)
if ret != 0:
if ret == 1:
# Tag latest failed. Retry after an interval
self.start_time = datetime.datetime.now()
self.start_time += datetime.timedelta(
Expand All @@ -590,7 +590,7 @@ def do_tag_latest(self, feat, docker_id, image_ver):

log_debug("Tag latest as local failed retry after {} seconds @{}".
format(remote_ctr_config[TAG_RETRY], self.start_time))
else:
elif ret == 0:
last_version = self.st_data[feat][ST_FEAT_CTR_STABLE_VER]
if last_version == image_ver:
last_version = self.st_data[feat][ST_FEAT_CTR_LAST_VER]
Expand All @@ -600,6 +600,10 @@ def do_tag_latest(self, feat, docker_id, image_ver):
self.st_data[ST_FEAT_CTR_LAST_VER] = last_version
self.st_data[ST_FEAT_CTR_STABLE_VER] = image_ver
self.do_clean_image(feat, image_ver, last_version)
elif ret == -1:
# This means the container we want to tag latest is not running
# so we don't need to do clean up
pass

def do_clean_image(self, feat, current_version, last_version):
ret = kube_commands.clean_image(feat, current_version, last_version)
Expand Down
49 changes: 28 additions & 21 deletions src/sonic-ctrmgrd/ctrmgr/kube_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ def tag_latest(feat, docker_id, image_ver):
else:
log_error(err)
elif ret == -1:
ret = 0
log_debug(out)
else:
log_error(err)
return ret
Expand All @@ -487,31 +487,38 @@ def _do_clean(feat, current_version, last_version):
err = ""
out = ""
ret = 0
DOCKER_ID = "docker_id"
IMAGE_ID = "image_id"
REPO = "repo"
_, image_info, err = _run_command("docker images |grep {} |grep -v latest |awk '{{print $1,$2,$3}}'".format(feat))
if image_info:
version_dict = {}
version_dict_default = {}
remote_image_version_dict = {}
local_image_version_dict = {}
for info in image_info.split("\n"):
rep, version, docker_id = info.split()
rep, version, image_id = info.split()
if len(rep.split("/")) == 1:
version_dict_default[version] = {DOCKER_ID: docker_id, REPO: rep}
local_image_version_dict[version] = {IMAGE_ID: image_id, REPO: rep}
else:
version_dict[version] = {DOCKER_ID: docker_id, REPO: rep}
remote_image_version_dict[version] = {IMAGE_ID: image_id, REPO: rep}

if current_version in version_dict:
image_prefix = version_dict[current_version][REPO]
del version_dict[current_version]
if current_version in remote_image_version_dict:
image_prefix = remote_image_version_dict[current_version][REPO]
del remote_image_version_dict[current_version]
else:
out = "Current version {} doesn't exist.".format(current_version)
ret = 0
return ret, out, err
# should be only one item in version_dict_default
for k, v in version_dict_default.items():
local_version, local_repo, local_docker_id = k, v[REPO], v[DOCKER_ID]
tag_res, _, err = _run_command("docker tag {} {}:{} && docker rmi {}:{}".format(
local_docker_id, image_prefix, local_version, local_repo, local_version))
# should be only one item in local_image_version_dict
for k, v in local_image_version_dict.items():
local_version, local_repo, local_image_id = k, v[REPO], v[IMAGE_ID]
# if there is a kube image with same version, need to remove the kube version
# and tag the local version to kube version for fallback preparation
# and remove the local version
if local_version in remote_image_version_dict:
tag_res, _, err = _run_command("docker rmi {}:{} && docker tag {} {}:{} && docker rmi {}:{}".format(
image_prefix, local_version, local_image_id, image_prefix, local_version, local_repo, local_version))
# if there is no kube image with same version, just remove the local version
else:
tag_res, _, err = _run_command("docker rmi {}:{}".format(local_repo, local_version))
if tag_res == 0:
msg = "Tag {} local version images successfully".format(feat)
log_debug(msg)
Expand All @@ -520,12 +527,12 @@ def _do_clean(feat, current_version, last_version):
err = "Failed to tag {} local version images. Err: {}".format(feat, err)
return ret, out, err

if last_version in version_dict:
del version_dict[last_version]
if last_version in remote_image_version_dict:
del remote_image_version_dict[last_version]

versions = [item[DOCKER_ID] for item in version_dict.values()]
if versions:
clean_res, _, err = _run_command("docker rmi {} --force".format(" ".join(versions)))
image_id_remove_list = [item[IMAGE_ID] for item in remote_image_version_dict.values()]
if image_id_remove_list:
clean_res, _, err = _run_command("docker rmi {} --force".format(" ".join(image_id_remove_list)))
else:
clean_res = 0
if clean_res == 0:
Expand All @@ -534,7 +541,7 @@ def _do_clean(feat, current_version, last_version):
err = "Failed to clean {} old version images. Err: {}".format(feat, err)
ret = 1
else:
err = "Failed to docker images |grep {} |awk '{{print $3}}'".format(feat)
err = "Failed to docker images |grep {} |awk '{{print $3}}'. Error: {}".format(feat, err)
ret = 1

return ret, out, err
Expand Down
23 changes: 21 additions & 2 deletions src/sonic-ctrmgrd/tests/kube_commands_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@
},
2: {
common_test.DESCR: "Tag a unstable container",
common_test.RETVAL: 0,
common_test.RETVAL: -1,
common_test.ARGS: ["snmp", "123456", "v1"],
common_test.PROC_CMD: [
"docker ps |grep 123456"
Expand Down Expand Up @@ -382,7 +382,7 @@
common_test.ARGS: ["snmp", "20201231.84", ""],
common_test.PROC_CMD: [
"docker images |grep snmp |grep -v latest |awk '{print $1,$2,$3}'",
"docker tag 507f8d28bf6e sonick8scue.azurecr.io/docker-sonic-telemetry:20201231.74 && docker rmi docker-sonic-telemetry:20201231.74"
"docker rmi docker-sonic-telemetry:20201231.74"
],
common_test.PROC_OUT: [
"docker-sonic-telemetry 20201231.74 507f8d28bf6e\n\
Expand All @@ -394,6 +394,25 @@
0
]
},
5: {
common_test.DESCR: "Clean image successfuly(local to dry-kube to kube)",
common_test.RETVAL: 0,
common_test.ARGS: ["snmp", "20201231.84", "20201231.74"],
common_test.PROC_CMD: [
"docker images |grep snmp |grep -v latest |awk '{print $1,$2,$3}'",
"docker rmi sonick8scue.azurecr.io/docker-sonic-telemetry:20201231.74 && docker tag 507f8d28bf6e sonick8scue.azurecr.io/docker-sonic-telemetry:20201231.74 && docker rmi docker-sonic-telemetry:20201231.74"
],
common_test.PROC_OUT: [
"docker-sonic-telemetry 20201231.74 507f8d28bf6e\n\
sonick8scue.azurecr.io/docker-sonic-telemetry 20201231.74 507f8d28bf6f\n\
sonick8scue.azurecr.io/docker-sonic-telemetry 20201231.84 507f8d28bf6g",
""
],
common_test.PROC_CODE: [
0,
0
]
},
}

class TestKubeCommands(object):
Expand Down

0 comments on commit 38e721b

Please sign in to comment.