From a39673dcf492c816ad723ee3b8a2ceeb4d6be2db Mon Sep 17 00:00:00 2001 From: shlomibitton <60430976+shlomibitton@users.noreply.github.com> Date: Fri, 2 Jul 2021 00:39:29 +0300 Subject: [PATCH] [flex-counters] Delay flex counters stats init for faster boot time (#1803) What I did Update flex counters DB with counters stats only when counters are enabled. As long as the polling counters are not enabled, flex counters information will stored internally on PortsOrch. Why I did it Creating flex counters objects on the DB will trigger 'SYNCD' to access the HW for query statistics capabilities. This HW access takes time and will be better to finish boot before doing this (mainly for fast-reboot but good to have in general). The flex counters are not crucial at boot time, we can delay it to the end of the boot process. How I verified it Reboot a switch and observer the flex counters DB populated after counters are enabled. Signed-off-by: Shlomi Bitton --- orchagent/flexcounterorch.cpp | 50 +++++++---- orchagent/flexcounterorch.h | 4 + orchagent/orchdaemon.cpp | 6 +- orchagent/portsorch.cpp | 93 +++++++++++++++++--- orchagent/portsorch.h | 8 ++ tests/mock_tests/mock_orchagent_main.h | 3 + tests/mock_tests/portsorch_ut.cpp | 7 +- tests/test_flex_counters.py | 112 +++++++++++++++++++++++++ tests/test_pg_drop_counter.py | 2 + 9 files changed, 255 insertions(+), 30 deletions(-) create mode 100644 tests/test_flex_counters.py diff --git a/orchagent/flexcounterorch.cpp b/orchagent/flexcounterorch.cpp index bf40d7c05f..41f024c639 100644 --- a/orchagent/flexcounterorch.cpp +++ b/orchagent/flexcounterorch.cpp @@ -18,6 +18,11 @@ extern IntfsOrch *gIntfsOrch; extern BufferOrch *gBufferOrch; #define BUFFER_POOL_WATERMARK_KEY "BUFFER_POOL_WATERMARK" +#define PORT_KEY "PORT" +#define PORT_BUFFER_DROP_KEY "PORT_BUFFER_DROP" +#define QUEUE_KEY "QUEUE" +#define PG_WATERMARK_KEY "PG_WATERMARK" +#define RIF_KEY "RIF" unordered_map flexCounterGroupMap = { @@ -102,26 +107,31 @@ void FlexCounterOrch::doTask(Consumer &consumer) // which is automatically satisfied upon the creation of the orch object that requires // the syncd flex counter polling service // This postponement is introduced by design to accelerate the initialization process - // - // generateQueueMap() is called as long as a field "FLEX_COUNTER_STATUS" event is heard, - // regardless of whether the key is "QUEUE" or whether the value is "enable" or "disable" - // This can be because generateQueueMap() installs a fundamental list of queue stats - // that need to be polled. So my doubt here is if queue watermark stats shall be piggybacked - // into the same function as they may not be counted as fundamental - if(gPortsOrch) + if(gPortsOrch && (value == "enable")) { - gPortsOrch->generateQueueMap(); - gPortsOrch->generatePriorityGroupMap(); + if(key == PORT_KEY) + { + gPortsOrch->generatePortCounterMap(); + m_port_counter_enabled = true; + } + else if(key == PORT_BUFFER_DROP_KEY) + { + gPortsOrch->generatePortBufferDropCounterMap(); + m_port_buffer_drop_counter_enabled = true; + } + else if(key == QUEUE_KEY) + { + gPortsOrch->generateQueueMap(); + } + else if(key == PG_WATERMARK_KEY) + { + gPortsOrch->generatePriorityGroupMap(); + } } - if(gPortsOrch) - { - gPortsOrch->generatePriorityGroupMap(); - } - if(gIntfsOrch) + if(gIntfsOrch && (key == RIF_KEY) && (value == "enable")) { gIntfsOrch->generateInterfaceMap(); } - // Install COUNTER_ID_LIST/ATTR_ID_LIST only when hearing buffer pool watermark enable event if (gBufferOrch && (key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable")) { gBufferOrch->generateBufferPoolWatermarkCounterIdList(); @@ -144,3 +154,13 @@ void FlexCounterOrch::doTask(Consumer &consumer) consumer.m_toSync.erase(it++); } } + +bool FlexCounterOrch::getPortCountersState() const +{ + return m_port_counter_enabled; +} + +bool FlexCounterOrch::getPortBufferDropCountersState() const +{ + return m_port_buffer_drop_counter_enabled; +} diff --git a/orchagent/flexcounterorch.h b/orchagent/flexcounterorch.h index 27f6e64ab1..0fb9f70e4b 100644 --- a/orchagent/flexcounterorch.h +++ b/orchagent/flexcounterorch.h @@ -15,10 +15,14 @@ class FlexCounterOrch: public Orch void doTask(Consumer &consumer); FlexCounterOrch(swss::DBConnector *db, std::vector &tableNames); virtual ~FlexCounterOrch(void); + bool getPortCountersState() const; + bool getPortBufferDropCountersState() const; private: std::shared_ptr m_flexCounterDb = nullptr; std::shared_ptr m_flexCounterGroupTable = nullptr; + bool m_port_counter_enabled = false; + bool m_port_buffer_drop_counter_enabled = false; }; #endif diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index f8f00cd225..673c5f71ec 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -358,7 +358,11 @@ bool OrchDaemon::init() CFG_FLEX_COUNTER_TABLE_NAME }; - m_orchList.push_back(new FlexCounterOrch(m_configDb, flex_counter_tables)); + auto* flexCounterOrch = new FlexCounterOrch(m_configDb, flex_counter_tables); + m_orchList.push_back(flexCounterOrch); + + gDirectory.set(flexCounterOrch); + gDirectory.set(gPortsOrch); vector pfc_wd_tables = { CFG_PFC_WD_TABLE_NAME diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index 81866a9ae1..a699ddded1 100755 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -284,9 +284,9 @@ static char* hostif_vlan_tag[] = { PortsOrch::PortsOrch(DBConnector *db, DBConnector *stateDb, vector &tableNames, DBConnector *chassisAppDb) : Orch(db, tableNames), m_portStateTable(stateDb, STATE_PORT_TABLE_NAME), - port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true), - port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, true), - queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true) + port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false), + port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, false), + queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false) { SWSS_LOG_ENTER(); @@ -2273,19 +2273,21 @@ bool PortsOrch::initPort(const string &alias, const string &role, const int inde vector fields; fields.push_back(tuple); m_counterTable->set("", fields); + // Install a flex counter for this port to track stats - std::unordered_set counter_stats; - for (const auto& it: port_stat_ids) + auto flex_counters_orch = gDirectory.get(); + /* Delay installing the counters if they are yet enabled + If they are enabled, install the counters immediately */ + if (flex_counters_orch->getPortCountersState()) { - counter_stats.emplace(sai_serialize_port_stat(it)); + auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP); + port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_counter_stats); } - port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, counter_stats); - std::unordered_set port_buffer_drop_stats; - for (const auto& it: port_buffer_drop_stat_ids) + if (flex_counters_orch->getPortBufferDropCountersState()) { - port_buffer_drop_stats.emplace(sai_serialize_port_stat(it)); + auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP); + port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats); } - port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats); PortUpdate update = { p, true }; notify(SUBJECT_TYPE_PORT_CHANGE, static_cast(&update)); @@ -2318,8 +2320,11 @@ void PortsOrch::deInitPort(string alias, sai_object_id_t port_id) p.m_port_id = port_id; /* remove port from flex_counter_table for updating counters */ - port_stat_manager.clearCounterIdList(p.m_port_id); - + auto flex_counters_orch = gDirectory.get(); + if ((flex_counters_orch->getPortCountersState())) + { + port_stat_manager.clearCounterIdList(p.m_port_id); + } /* remove port name map from counter table */ m_counter_db->hdel(COUNTERS_PORT_NAME_MAP, alias); @@ -5072,6 +5077,48 @@ void PortsOrch::generatePriorityGroupMapPerPort(const Port& port) CounterCheckOrch::getInstance().addPort(port); } +void PortsOrch::generatePortCounterMap() +{ + if (m_isPortCounterMapGenerated) + { + return; + } + + auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP); + for (const auto& it: m_portList) + { + // Set counter stats only for PHY ports to ensure syncd will not try to query the counter statistics from the HW for non-PHY ports. + if (it.second.m_type != Port::Type::PHY) + { + continue; + } + port_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_counter_stats); + } + + m_isPortCounterMapGenerated = true; +} + +void PortsOrch::generatePortBufferDropCounterMap() +{ + if (m_isPortBufferDropCounterMapGenerated) + { + return; + } + + auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP); + for (const auto& it: m_portList) + { + // Set counter stats only for PHY ports to ensure syncd will not try to query the counter statistics from the HW for non-PHY ports. + if (it.second.m_type != Port::Type::PHY) + { + continue; + } + port_buffer_drop_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_buffer_drop_stats); + } + + m_isPortBufferDropCounterMapGenerated = true; +} + void PortsOrch::doTask(NotificationConsumer &consumer) { SWSS_LOG_ENTER(); @@ -6232,3 +6279,23 @@ void PortsOrch::voqSyncDelLagMember(Port &lag, Port &port) string key = lag.m_system_lag_info.alias + ":" + port.m_system_port_info.alias; m_tableVoqSystemLagMemberTable->del(key); } + +std::unordered_set PortsOrch::generateCounterStats(const string& type) +{ + std::unordered_set counter_stats; + if (type == PORT_STAT_COUNTER_FLEX_COUNTER_GROUP) + { + for (const auto& it: port_stat_ids) + { + counter_stats.emplace(sai_serialize_port_stat(it)); + } + } + else if (type == PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP) + { + for (const auto& it: port_buffer_drop_stat_ids) + { + counter_stats.emplace(sai_serialize_port_stat(it)); + } + } + return counter_stats; +} diff --git a/orchagent/portsorch.h b/orchagent/portsorch.h index 6a92b9d9dd..c29b59837f 100755 --- a/orchagent/portsorch.h +++ b/orchagent/portsorch.h @@ -13,6 +13,7 @@ #include "gearboxutils.h" #include "saihelper.h" #include "lagid.h" +#include "flexcounterorch.h" #define FCS_LEN 4 @@ -125,6 +126,8 @@ class PortsOrch : public Orch, public Subject void generateQueueMap(); void generatePriorityGroupMap(); + void generatePortCounterMap(); + void generatePortBufferDropCounterMap(); void refreshPortStatus(); bool removeAclTableGroup(const Port &p); @@ -290,6 +293,9 @@ class PortsOrch : public Orch, public Subject bool m_isPriorityGroupMapGenerated = false; void generatePriorityGroupMapPerPort(const Port& port); + bool m_isPortCounterMapGenerated = false; + bool m_isPortBufferDropCounterMapGenerated = false; + bool setPortAutoNeg(sai_object_id_t id, int an); bool setPortFecMode(sai_object_id_t id, int fec); bool setPortInterfaceType(sai_object_id_t id, sai_port_interface_type_t interface_type); @@ -333,6 +339,8 @@ class PortsOrch : public Orch, public Subject void voqSyncDelLagMember(Port &lag, Port &port); unique_ptr m_lagIdAllocator; + std::unordered_set generateCounterStats(const string& type); + }; #endif /* SWSS_PORTSORCH_H */ diff --git a/tests/mock_tests/mock_orchagent_main.h b/tests/mock_tests/mock_orchagent_main.h index 587055b87e..181ebac889 100644 --- a/tests/mock_tests/mock_orchagent_main.h +++ b/tests/mock_tests/mock_orchagent_main.h @@ -15,6 +15,8 @@ #include "vxlanorch.h" #include "policerorch.h" #include "fgnhgorch.h" +#include "flexcounterorch.h" +#include "directory.h" extern int gBatchSize; extern bool gSwssRecord; @@ -42,6 +44,7 @@ extern FdbOrch *gFdbOrch; extern MirrorOrch *gMirrorOrch; extern BufferOrch *gBufferOrch; extern VRFOrch *gVrfOrch; +extern Directory gDirectory; extern sai_acl_api_t *sai_acl_api; extern sai_switch_api_t *sai_switch_api; diff --git a/tests/mock_tests/portsorch_ut.cpp b/tests/mock_tests/portsorch_ut.cpp index 365c5144b5..32b1d373ee 100644 --- a/tests/mock_tests/portsorch_ut.cpp +++ b/tests/mock_tests/portsorch_ut.cpp @@ -146,8 +146,13 @@ namespace portsorch_test ASSERT_EQ(gPortsOrch, nullptr); - gPortsOrch = new PortsOrch(m_app_db.get(), m_state_db.get(), ports_tables, m_chassis_app_db.get()); + vector flex_counter_tables = { + CFG_FLEX_COUNTER_TABLE_NAME + }; + auto* flexCounterOrch = new FlexCounterOrch(m_config_db.get(), flex_counter_tables); + gDirectory.set(flexCounterOrch); + gPortsOrch = new PortsOrch(m_app_db.get(), m_state_db.get(), ports_tables, m_chassis_app_db.get()); vector buffer_tables = { APP_BUFFER_POOL_TABLE_NAME, APP_BUFFER_PROFILE_TABLE_NAME, APP_BUFFER_QUEUE_TABLE_NAME, diff --git a/tests/test_flex_counters.py b/tests/test_flex_counters.py new file mode 100644 index 0000000000..ecdc844572 --- /dev/null +++ b/tests/test_flex_counters.py @@ -0,0 +1,112 @@ +import time +import pytest + +# Counter keys on ConfigDB +PORT_KEY = "PORT" +QUEUE_KEY = "QUEUE" +RIF_KEY = "RIF" +BUFFER_POOL_WATERMARK_KEY = "BUFFER_POOL_WATERMARK" +PORT_BUFFER_DROP_KEY = "PORT_BUFFER_DROP" +PG_WATERMARK_KEY = "PG_WATERMARK" + +# Counter stats on FlexCountersDB +PORT_STAT = "PORT_STAT_COUNTER" +QUEUE_STAT = "QUEUE_STAT_COUNTER" +RIF_STAT = "RIF_STAT_COUNTER" +BUFFER_POOL_WATERMARK_STAT = "BUFFER_POOL_WATERMARK_STAT_COUNTER" +PORT_BUFFER_DROP_STAT = "PORT_BUFFER_DROP_STAT" +PG_WATERMARK_STAT = "PG_WATERMARK_STAT_COUNTER" + +# Counter maps on CountersDB +PORT_MAP = "COUNTERS_PORT_NAME_MAP" +QUEUE_MAP = "COUNTERS_QUEUE_NAME_MAP" +RIF_MAP = "COUNTERS_RIF_NAME_MAP" +BUFFER_POOL_WATERMARK_MAP = "COUNTERS_BUFFER_POOL_NAME_MAP" +PORT_BUFFER_DROP_MAP = "COUNTERS_PORT_NAME_MAP" +PG_WATERMARK_MAP = "COUNTERS_PG_NAME_MAP" + +NUMBER_OF_RETRIES = 10 +CPU_PORT_OID = "0x0" + +counter_type_dict = {"port_counter":[PORT_KEY, PORT_STAT, PORT_MAP], + "queue_counter":[QUEUE_KEY, QUEUE_STAT, QUEUE_MAP], + "rif_counter":[RIF_KEY, RIF_STAT, RIF_MAP], + "buffer_pool_watermark_counter":[BUFFER_POOL_WATERMARK_KEY, BUFFER_POOL_WATERMARK_STAT, BUFFER_POOL_WATERMARK_MAP], + "port_buffer_drop_counter":[PORT_BUFFER_DROP_KEY, PORT_BUFFER_DROP_STAT, PORT_BUFFER_DROP_MAP], + "pg_watermark_counter":[PG_WATERMARK_KEY, PG_WATERMARK_STAT, PG_WATERMARK_MAP]} + +class TestFlexCounters(object): + + def setup_dbs(self, dvs): + self.config_db = dvs.get_config_db() + self.flex_db = dvs.get_flex_db() + self.counters_db = dvs.get_counters_db() + + def wait_for_table(self, table): + for retry in range(NUMBER_OF_RETRIES): + counters_keys = self.counters_db.db_connection.hgetall(table) + if len(counters_keys) > 0: + return + else: + time.sleep(1) + + assert False, str(table) + " not created in Counters DB" + + def wait_for_id_list(self, stat, name, oid): + for retry in range(NUMBER_OF_RETRIES): + id_list = self.flex_db.db_connection.hgetall("FLEX_COUNTER_TABLE:" + stat + ":" + oid).items() + if len(id_list) > 0: + return + else: + time.sleep(1) + + assert False, "No ID list for counter " + str(name) + + def verify_no_flex_counters_tables(self, counter_stat): + counters_stat_keys = self.flex_db.get_keys("FLEX_COUNTER_TABLE:" + counter_stat) + assert len(counters_stat_keys) == 0, "FLEX_COUNTER_TABLE:" + str(counter_stat) + " tables exist before enabling the flex counter group" + + def verify_flex_counters_populated(self, map, stat): + counters_keys = self.counters_db.db_connection.hgetall(map) + for counter_entry in counters_keys.items(): + name = counter_entry[0] + oid = counter_entry[1] + self.wait_for_id_list(stat, name, oid) + + def verify_only_phy_ports_created(self): + port_counters_keys = self.counters_db.db_connection.hgetall(PORT_MAP) + port_counters_stat_keys = self.flex_db.get_keys("FLEX_COUNTER_TABLE:" + PORT_STAT) + for port_stat in port_counters_stat_keys: + assert port_stat in dict(port_counters_keys.items()).values(), "Non PHY port created on PORT_STAT_COUNTER group: {}".format(port_stat) + + def enable_flex_counter_group(self, group, map): + group_stats_entry = {"FLEX_COUNTER_STATUS": "enable"} + self.config_db.create_entry("FLEX_COUNTER_TABLE", group, group_stats_entry) + self.wait_for_table(map) + + @pytest.mark.parametrize("counter_type", counter_type_dict.keys()) + def test_flex_counters(self, dvs, counter_type): + """ + The test will check there are no flex counters tables on FlexCounter DB when the counters are disabled. + After enabling each counter group, the test will check the flow of creating flex counters tables on FlexCounter DB. + For some counter types the MAPS on COUNTERS DB will be created as well after enabling the counter group, this will be also verified on this test. + """ + self.setup_dbs(dvs) + counter_key = counter_type_dict[counter_type][0] + counter_stat = counter_type_dict[counter_type][1] + counter_map = counter_type_dict[counter_type][2] + + self.verify_no_flex_counters_tables(counter_stat) + + if counter_type == "rif_counter": + self.config_db.db_connection.hset('INTERFACE|Ethernet0', "NULL", "NULL") + self.config_db.db_connection.hset('INTERFACE|Ethernet0|192.168.0.1/24', "NULL", "NULL") + + self.enable_flex_counter_group(counter_key, counter_map) + self.verify_flex_counters_populated(counter_map, counter_stat) + + if counter_type == "port_counter": + self.verify_only_phy_ports_created() + + if counter_type == "rif_counter": + self.config_db.db_connection.hdel('INTERFACE|Ethernet0|192.168.0.1/24', "NULL") diff --git a/tests/test_pg_drop_counter.py b/tests/test_pg_drop_counter.py index dedee82c1a..1cdd834747 100644 --- a/tests/test_pg_drop_counter.py +++ b/tests/test_pg_drop_counter.py @@ -65,12 +65,14 @@ def set_up_flex_counter(self): fc_status_enable = {"FLEX_COUNTER_STATUS": "enable"} self.config_db.create_entry("FLEX_COUNTER_TABLE", "PG_DROP", fc_status_enable) + self.config_db.create_entry("FLEX_COUNTER_TABLE", "PG_WATERMARK", fc_status_enable) def clear_flex_counter(self): for pg in self.pgs: self.flex_db.delete_entry("FLEX_COUNTER_TABLE", "PG_DROP_STAT_COUNTER:{}".format(pg)) self.config_db.delete_entry("FLEX_COUNTER_TABLE", "PG_DROP") + self.config_db.delete_entry("FLEX_COUNTER_TABLE", "PG_WATERMARK") def test_pg_drop_counters(self, dvs):