From f4a7e22e4ed935b8385aa6701d3959409d8d248d Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Wed, 19 Jul 2023 20:04:23 +0800 Subject: [PATCH] [k8s]: Bypass the systemd service restart limit and do immediately restart when change to local mode (#15432) (#15868) --- src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py | 9 +++++++ src/sonic-ctrmgrd/tests/ctrmgrd_test.py | 31 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py b/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py index ea2db99f0258..7e85c22f60c8 100755 --- a/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py +++ b/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py @@ -151,6 +151,7 @@ def is_systemd_active(feat): def restart_systemd_service(server, feat, owner): log_debug("Restart service {} to owner:{}".format(feat, owner)) if not UNIT_TESTING: + subprocess.call(["systemctl", "reset-failed", str(feat)]) status = subprocess.call(["systemctl", "restart", str(feat)]) else: server.mod_db_entry(STATE_DB_NAME, @@ -551,6 +552,7 @@ def on_state_update(self, key, op, data): self.st_data[key] = _update_entry(dflt_st_feat, data) remote_state = self.st_data[key][ST_FEAT_REMOTE_STATE] + current_owner = self.st_data[key][ST_FEAT_OWNER] if (remote_state == REMOTE_RUNNING) and (old_remote_state != remote_state): # Tag latest @@ -563,6 +565,13 @@ def on_state_update(self, key, op, data): log_debug("try to tag latest label after {} seconds @{}".format( remote_ctr_config[TAG_IMAGE_LATEST], start_time)) + + # This is for going back to local without waiting the systemd restart time + # when k8s is down, can't deploy containers to worker and need to go back to local + # if current owner is already local, we don't do restart + if (current_owner != OWNER_LOCAL) and (remote_state == REMOTE_NONE) and (old_remote_state == REMOTE_STOPPED): + restart_systemd_service(self.server, key, OWNER_LOCAL) + return if (not init): if (old_remote_state == remote_state): diff --git a/src/sonic-ctrmgrd/tests/ctrmgrd_test.py b/src/sonic-ctrmgrd/tests/ctrmgrd_test.py index 0304985224ea..76651309ce6a 100755 --- a/src/sonic-ctrmgrd/tests/ctrmgrd_test.py +++ b/src/sonic-ctrmgrd/tests/ctrmgrd_test.py @@ -324,6 +324,37 @@ } } } + }, + 4: { + common_test.DESCR: "Restart immediately to go back to local when remote_state changes to none from stopped", + common_test.ARGS: "ctrmgrd", + common_test.PRE: { + common_test.STATE_DB_NO: { + common_test.FEATURE_TABLE: { + "snmp": { + "remote_state": "stopped", + } + } + } + }, + common_test.UPD: { + common_test.STATE_DB_NO: { + common_test.FEATURE_TABLE: { + "snmp": { + "remote_state": "none", + } + } + } + }, + common_test.POST: { + common_test.STATE_DB_NO: { + common_test.FEATURE_TABLE: { + "snmp": { + "restart": "true" + } + } + } + } } }