diff --git a/orchagent/Makefile.am b/orchagent/Makefile.am index d4f3627203..94d66e0024 100644 --- a/orchagent/Makefile.am +++ b/orchagent/Makefile.am @@ -16,7 +16,9 @@ dist_swss_DATA = \ pfc_detect_broadcom.lua \ pfc_detect_barefoot.lua \ pfc_detect_nephos.lua \ + pfc_detect_cisco-8000.lua \ pfc_restore.lua \ + pfc_restore_cisco-8000.lua \ port_rates.lua \ watermark_queue.lua \ watermark_pg.lua \ diff --git a/orchagent/orch.h b/orchagent/orch.h index 7fe99cc6ac..9801f09f7d 100644 --- a/orchagent/orch.h +++ b/orchagent/orch.h @@ -37,6 +37,7 @@ const char state_db_key_delimiter = '|'; #define VS_PLATFORM_SUBSTRING "vs" #define NPS_PLATFORM_SUBSTRING "nephos" #define MRVL_PLATFORM_SUBSTRING "marvell" +#define CISCO_8000_PLATFORM_SUBSTRING "cisco-8000" #define CONFIGDB_KEY_SEPARATOR "|" #define DEFAULT_KEY_SEPARATOR ":" diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 814729ee0c..7883b9058e 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -540,6 +540,27 @@ bool OrchDaemon::init() queueStatIds, queueAttrIds, PFC_WD_POLL_MSECS)); + } else if (platform == CISCO_8000_PLATFORM_SUBSTRING) + { + static const vector portStatIds; + + static const vector queueStatIds = + { + SAI_QUEUE_STAT_PACKETS, + }; + + static const vector queueAttrIds = + { + SAI_QUEUE_ATTR_PAUSE_STATUS, + }; + + m_orchList.push_back(new PfcWdSwOrch( + m_configDb, + pfc_wd_tables, + portStatIds, + queueStatIds, + queueAttrIds, + PFC_WD_POLL_MSECS)); } m_orchList.push_back(&CounterCheckOrch::getInstance(m_configDb)); diff --git a/orchagent/pfc_detect_cisco-8000.lua b/orchagent/pfc_detect_cisco-8000.lua new file mode 100644 index 0000000000..a76d6c3c68 --- /dev/null +++ b/orchagent/pfc_detect_cisco-8000.lua @@ -0,0 +1,76 @@ +-- KEYS - queue IDs +-- ARGV[1] - counters db index +-- ARGV[2] - counters table name +-- ARGV[3] - poll time interval (milliseconds) +-- return queue Ids that satisfy criteria + +local counters_db = ARGV[1] +local counters_table_name = ARGV[2] +local poll_time = tonumber(ARGV[3]) * 1000 + +local rets = {} + +redis.call('SELECT', counters_db) + +-- Iterate through each queue +local n = table.getn(KEYS) +for i = n, 1, -1 do + local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i]) + local counter_num = 0 + local old_counter_num = 0 + local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS') + local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION') + local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE') + if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then + local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME') + if detection_time then + detection_time = tonumber(detection_time) + local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT') + if not time_left then + time_left = detection_time + else + time_left = tonumber(time_left) + end + + local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i]) + local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i]) + + -- Get PFC status + local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS') + local queue_pause_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS') + + if packets and queue_pause_status then + + -- DEBUG CODE START. Uncomment to enable + local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM') + -- DEBUG CODE END. + + -- Check actual condition of queue being in PFC storm + if (queue_pause_status == 'true') + -- DEBUG CODE START. Uncomment to enable + or (debug_storm == "enabled") + -- DEBUG CODE END. + then + if time_left <= poll_time then + redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]') + time_left = detection_time + else + time_left = time_left - poll_time + end + else + if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then + redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]') + end + time_left = detection_time + end + + -- Save values for next run + redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left) + redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last', queue_pause_status) + redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets) + end + end + end +end + +return rets diff --git a/orchagent/pfc_restore_cisco-8000.lua b/orchagent/pfc_restore_cisco-8000.lua new file mode 100644 index 0000000000..686de0464b --- /dev/null +++ b/orchagent/pfc_restore_cisco-8000.lua @@ -0,0 +1,62 @@ +-- KEYS - queue IDs +-- ARGV[1] - counters db index +-- ARGV[2] - counters table name +-- ARGV[3] - poll time interval (milliseconds) +-- return queue Ids that satisfy criteria + +local counters_db = ARGV[1] +local counters_table_name = ARGV[2] +local poll_time = tonumber(ARGV[3]) * 1000 + +local rets = {} + +redis.call('SELECT', counters_db) + +-- Iterate through each queue +local n = table.getn(KEYS) +for i = n, 1, -1 do + local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i]) + local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS') + local restoration_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME') + local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION') + local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE') + if not big_red_switch_mode and pfc_wd_status ~= 'operational' and pfc_wd_action ~= 'alert' and restoration_time and restoration_time ~= '' then + restoration_time = tonumber(restoration_time) + local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME_LEFT') + if not time_left then + time_left = restoration_time + else + time_left = tonumber(time_left) + end + + local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i]) + local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i]) + + -- DEBUG CODE START. Uncomment to enable + local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM') + -- DEBUG CODE END. + + -- Check actual condition of queue being restored from PFC storm + local queue_pause_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS') + + if (queue_pause_status == 'false') + -- DEBUG CODE START. Uncomment to enable + and (debug_storm ~= "enabled") + -- DEBUG CODE END. + then + if time_left <= 0 then + redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]') + time_left = restoration_time + else + time_left = time_left - poll_time + end + else + time_left = restoration_time + end + + -- Save values for next run + redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME_LEFT', time_left) + end +end + +return rets diff --git a/orchagent/pfcactionhandler.cpp b/orchagent/pfcactionhandler.cpp index cf43f57d08..12072480f5 100644 --- a/orchagent/pfcactionhandler.cpp +++ b/orchagent/pfcactionhandler.cpp @@ -219,6 +219,82 @@ void PfcWdActionHandler::updateWdCounters(const string& queueIdStr, const PfcWdQ m_countersTable->set(queueIdStr, resultFvValues); } +PfcWdSaiDlrInitHandler::PfcWdSaiDlrInitHandler(sai_object_id_t port, sai_object_id_t queue, + uint8_t queueId, shared_ptr countersTable): + PfcWdActionHandler(port, queue, queueId, countersTable) +{ + SWSS_LOG_ENTER(); + + sai_attribute_t attr; + attr.id = SAI_QUEUE_ATTR_PFC_DLR_INIT; + attr.value.booldata = true; + + // Set DLR init to true to start PFC deadlock recovery + sai_status_t status = sai_queue_api->set_queue_attribute(queue, &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to set PFC DLR INIT on port 0x%" PRIx64 " queue 0x%" PRIx64 + " queueId %d : %d", + port, queue, queueId, status); + return; + } +} + +PfcWdSaiDlrInitHandler::~PfcWdSaiDlrInitHandler(void) +{ + SWSS_LOG_ENTER(); + + sai_object_id_t port = getPort(); + sai_object_id_t queue = getQueue(); + uint8_t queueId = getQueueId(); + + sai_attribute_t attr; + attr.id = SAI_QUEUE_ATTR_PFC_DLR_INIT; + attr.value.booldata = false; + + // Set DLR init to false to stop PFC deadlock recovery + sai_status_t status = sai_queue_api->set_queue_attribute(getQueue(), &attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to clear PFC DLR INIT on port 0x%" PRIx64 " queue 0x%" PRIx64 + " queueId %d : %d", port, queue, queueId, status); + return; + } +} + +bool PfcWdSaiDlrInitHandler::getHwCounters(PfcWdHwStats& counters) +{ + SWSS_LOG_ENTER(); + + static const vector queueStatIds = + { + SAI_QUEUE_STAT_PACKETS, + SAI_QUEUE_STAT_DROPPED_PACKETS, + }; + + vector queueStats; + queueStats.resize(queueStatIds.size()); + + sai_status_t status = sai_queue_api->get_queue_stats( + getQueue(), + static_cast(queueStatIds.size()), + queueStatIds.data(), + queueStats.data()); + + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to fetch queue 0x%" PRIx64 " stats: %d", getQueue(), status); + return false; + } + + counters.txPkt = queueStats[0]; + counters.txDropPkt = queueStats[1]; + counters.rxPkt = 0; + counters.rxDropPkt = 0; + + return true; +} + PfcWdAclHandler::PfcWdAclHandler(sai_object_id_t port, sai_object_id_t queue, uint8_t queueId, shared_ptr
countersTable): PfcWdLossyHandler(port, queue, queueId, countersTable) diff --git a/orchagent/pfcactionhandler.h b/orchagent/pfcactionhandler.h index e381a798c6..381f9bdca8 100644 --- a/orchagent/pfcactionhandler.h +++ b/orchagent/pfcactionhandler.h @@ -163,4 +163,15 @@ class PfcWdZeroBufferHandler: public PfcWdLossyHandler sai_object_id_t m_originalPgBufferProfile = SAI_NULL_OBJECT_ID; }; +// PFC queue that implements drop action by draining queue via SAI +// attribute SAI_QUEUE_ATTR_PFC_DLR_INIT. +class PfcWdSaiDlrInitHandler: public PfcWdActionHandler +{ + public: + PfcWdSaiDlrInitHandler(sai_object_id_t port, sai_object_id_t queue, + uint8_t queueId, shared_ptr
countersTable); + virtual ~PfcWdSaiDlrInitHandler(void); + virtual bool getHwCounters(PfcWdHwStats& counters); +}; + #endif diff --git a/orchagent/pfcwdorch.cpp b/orchagent/pfcwdorch.cpp index ca37a85be2..be4c1e51c4 100644 --- a/orchagent/pfcwdorch.cpp +++ b/orchagent/pfcwdorch.cpp @@ -36,9 +36,15 @@ template PfcWdOrch::PfcWdOrch(DBConnector *db, vector &tableNames): Orch(db, tableNames), m_countersDb(new DBConnector("COUNTERS_DB", 0)), - m_countersTable(new Table(m_countersDb.get(), COUNTERS_TABLE)) + m_countersTable(new Table(m_countersDb.get(), COUNTERS_TABLE)), + m_platform(getenv("platform") ? getenv("platform") : "") { SWSS_LOG_ENTER(); + if (m_platform == "") + { + SWSS_LOG_ERROR("Platform environment variable is not defined"); + return; + } } @@ -219,6 +225,10 @@ task_process_status PfcWdOrch::createEntry(const st SWSS_LOG_ERROR("Invalid PFC Watchdog action %s", value.c_str()); return task_process_status::task_invalid_entry; } + if ((m_platform == CISCO_8000_PLATFORM_SUBSTRING) && (action == PfcWdAction::PFC_WD_ACTION_FORWARD)) { + SWSS_LOG_ERROR("Unsupported action %s for platform %s", value.c_str(), m_platform.c_str()); + return task_process_status::task_invalid_entry; + } } else { @@ -657,16 +667,14 @@ PfcWdSwOrch::PfcWdSwOrch( { SWSS_LOG_ENTER(); - string platform = getenv("platform") ? getenv("platform") : ""; - if (platform == "") - { - SWSS_LOG_ERROR("Platform environment variable is not defined"); - return; - } - string detectSha, restoreSha; - string detectPluginName = "pfc_detect_" + platform + ".lua"; - string restorePluginName = "pfc_restore.lua"; + string detectPluginName = "pfc_detect_" + this->m_platform + ".lua"; + string restorePluginName; + if (this->m_platform == CISCO_8000_PLATFORM_SUBSTRING) { + restorePluginName = "pfc_restore_" + this->m_platform + ".lua"; + } else { + restorePluginName = "pfc_restore.lua"; + } try { @@ -1056,3 +1064,4 @@ bool PfcWdSwOrch::bake() // Trick to keep member functions in a separate file template class PfcWdSwOrch; template class PfcWdSwOrch; +template class PfcWdSwOrch; diff --git a/orchagent/pfcwdorch.h b/orchagent/pfcwdorch.h index 1f56d28c3a..4013ab9ad5 100644 --- a/orchagent/pfcwdorch.h +++ b/orchagent/pfcwdorch.h @@ -52,6 +52,7 @@ class PfcWdOrch: public Orch protected: virtual bool startWdActionOnQueue(const string &event, sai_object_id_t queueId) = 0; + string m_platform = ""; private: