From b0fb3924c47a855926f3f2b3a1911bc46fed3c9c Mon Sep 17 00:00:00 2001 From: shlomibitton <60430976+shlomibitton@users.noreply.github.com> Date: Fri, 2 Jul 2021 00:39:01 +0300 Subject: [PATCH] [flex-counters] [202012] Delay flex counters stats init for faster boot time (#1804) What I did Update flex counters DB with counters stats only when counters are enabled. As long as the polling counters are not enabled, flex counters information will stored internally on PortsOrch. Why I did it Creating flex counters objects on the DB will trigger 'SYNCD' to access the HW for query statistics capabilities. This HW access takes time and will be better to finish boot before doing this (mainly for fast-reboot but good to have in general). The flex counters are not crucial at boot time, we can delay it to the end of the boot process. How I verified it Reboot a switch and observer the flex counters DB populated after counters are enabled. Signed-off-by: Shlomi Bitton --- orchagent/flexcounterorch.cpp | 52 +++++++++--- orchagent/flexcounterorch.h | 4 + orchagent/orchdaemon.cpp | 6 +- orchagent/portsorch.cpp | 92 +++++++++++++++++--- orchagent/portsorch.h | 8 ++ tests/mock_tests/mock_orchagent_main.h | 3 + tests/mock_tests/portsorch_ut.cpp | 8 ++ tests/test_flex_counters.py | 112 +++++++++++++++++++++++++ tests/test_pg_drop_counter.py | 2 + 9 files changed, 262 insertions(+), 25 deletions(-) create mode 100644 tests/test_flex_counters.py diff --git a/orchagent/flexcounterorch.cpp b/orchagent/flexcounterorch.cpp index a103433113..62f53771e4 100644 --- a/orchagent/flexcounterorch.cpp +++ b/orchagent/flexcounterorch.cpp @@ -16,6 +16,11 @@ extern IntfsOrch *gIntfsOrch; extern BufferOrch *gBufferOrch; #define BUFFER_POOL_WATERMARK_KEY "BUFFER_POOL_WATERMARK" +#define PORT_KEY "PORT" +#define PORT_BUFFER_DROP_KEY "PORT_BUFFER_DROP" +#define QUEUE_KEY "QUEUE" +#define PG_WATERMARK_KEY "PG_WATERMARK" +#define RIF_KEY "RIF" unordered_map flexCounterGroupMap = { @@ -95,17 +100,32 @@ void FlexCounterOrch::doTask(Consumer &consumer) // which is automatically satisfied upon the creation of the orch object that requires // the syncd flex counter polling service // This postponement is introduced by design to accelerate the initialization process - // - // generateQueueMap() is called as long as a field "FLEX_COUNTER_STATUS" event is heard, - // regardless of whether the key is "QUEUE" or whether the value is "enable" or "disable" - // This can be because generateQueueMap() installs a fundamental list of queue stats - // that need to be polled. So my doubt here is if queue watermark stats shall be piggybacked - // into the same function as they may not be counted as fundamental - gPortsOrch->generateQueueMap(); - gPortsOrch->generatePriorityGroupMap(); - gIntfsOrch->generateInterfaceMap(); - // Install COUNTER_ID_LIST/ATTR_ID_LIST only when hearing buffer pool watermark enable event - if ((key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable")) + if(gPortsOrch && (value == "enable")) + { + if(key == PORT_KEY) + { + gPortsOrch->generatePortCounterMap(); + m_port_counter_enabled = true; + } + else if(key == PORT_BUFFER_DROP_KEY) + { + gPortsOrch->generatePortBufferDropCounterMap(); + m_port_buffer_drop_counter_enabled = true; + } + else if(key == QUEUE_KEY) + { + gPortsOrch->generateQueueMap(); + } + else if(key == PG_WATERMARK_KEY) + { + gPortsOrch->generatePriorityGroupMap(); + } + } + if(gIntfsOrch && (key == RIF_KEY) && (value == "enable")) + { + gIntfsOrch->generateInterfaceMap(); + } + if (gBufferOrch && (key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable")) { gBufferOrch->generateBufferPoolWatermarkCounterIdList(); } @@ -124,3 +144,13 @@ void FlexCounterOrch::doTask(Consumer &consumer) consumer.m_toSync.erase(it++); } } + +bool FlexCounterOrch::getPortCountersState() const +{ + return m_port_counter_enabled; +} + +bool FlexCounterOrch::getPortBufferDropCountersState() const +{ + return m_port_buffer_drop_counter_enabled; +} diff --git a/orchagent/flexcounterorch.h b/orchagent/flexcounterorch.h index 27f6e64ab1..0fb9f70e4b 100644 --- a/orchagent/flexcounterorch.h +++ b/orchagent/flexcounterorch.h @@ -15,10 +15,14 @@ class FlexCounterOrch: public Orch void doTask(Consumer &consumer); FlexCounterOrch(swss::DBConnector *db, std::vector &tableNames); virtual ~FlexCounterOrch(void); + bool getPortCountersState() const; + bool getPortBufferDropCountersState() const; private: std::shared_ptr m_flexCounterDb = nullptr; std::shared_ptr m_flexCounterGroupTable = nullptr; + bool m_port_counter_enabled = false; + bool m_port_buffer_drop_counter_enabled = false; }; #endif diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 3cc8546d24..eab7ef4d61 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -330,7 +330,11 @@ bool OrchDaemon::init() CFG_FLEX_COUNTER_TABLE_NAME }; - m_orchList.push_back(new FlexCounterOrch(m_configDb, flex_counter_tables)); + auto* flexCounterOrch = new FlexCounterOrch(m_configDb, flex_counter_tables); + m_orchList.push_back(flexCounterOrch); + + gDirectory.set(flexCounterOrch); + gDirectory.set(gPortsOrch); vector pfc_wd_tables = { CFG_PFC_WD_TABLE_NAME diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index 02b2fd784b..126073cdbf 100755 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -226,9 +226,9 @@ static char* hostif_vlan_tag[] = { */ PortsOrch::PortsOrch(DBConnector *db, vector &tableNames) : Orch(db, tableNames), - port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true), - port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, true), - queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true) + port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false), + port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, false), + queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false) { SWSS_LOG_ENTER(); @@ -2012,19 +2012,21 @@ bool PortsOrch::initPort(const string &alias, const int index, const set &l vector fields; fields.push_back(tuple); m_counterTable->set("", fields); + // Install a flex counter for this port to track stats - std::unordered_set counter_stats; - for (const auto& it: port_stat_ids) + auto flex_counters_orch = gDirectory.get(); + /* Delay installing the counters if they are yet enabled + If they are enabled, install the counters immediately */ + if (flex_counters_orch->getPortCountersState()) { - counter_stats.emplace(sai_serialize_port_stat(it)); + auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP); + port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_counter_stats); } - port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, counter_stats); - std::unordered_set port_buffer_drop_stats; - for (const auto& it: port_buffer_drop_stat_ids) + if (flex_counters_orch->getPortBufferDropCountersState()) { - port_buffer_drop_stats.emplace(sai_serialize_port_stat(it)); + auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP); + port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats); } - port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats); PortUpdate update = { p, true }; notify(SUBJECT_TYPE_PORT_CHANGE, static_cast(&update)); @@ -2057,8 +2059,11 @@ void PortsOrch::deInitPort(string alias, sai_object_id_t port_id) p.m_port_id = port_id; /* remove port from flex_counter_table for updating counters */ - port_stat_manager.clearCounterIdList(p.m_port_id); - + auto flex_counters_orch = gDirectory.get(); + if ((flex_counters_orch->getPortCountersState())) + { + port_stat_manager.clearCounterIdList(p.m_port_id); + } /* remove port name map from counter table */ m_counter_db->hdel(COUNTERS_PORT_NAME_MAP, alias); @@ -4375,6 +4380,48 @@ void PortsOrch::generatePriorityGroupMapPerPort(const Port& port) CounterCheckOrch::getInstance().addPort(port); } +void PortsOrch::generatePortCounterMap() +{ + if (m_isPortCounterMapGenerated) + { + return; + } + + auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP); + for (const auto& it: m_portList) + { + // Set counter stats only for PHY ports to ensure syncd will not try to query the counter statistics from the HW for non-PHY ports. + if (it.second.m_type != Port::Type::PHY) + { + continue; + } + port_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_counter_stats); + } + + m_isPortCounterMapGenerated = true; +} + +void PortsOrch::generatePortBufferDropCounterMap() +{ + if (m_isPortBufferDropCounterMapGenerated) + { + return; + } + + auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP); + for (const auto& it: m_portList) + { + // Set counter stats only for PHY ports to ensure syncd will not try to query the counter statistics from the HW for non-PHY ports. + if (it.second.m_type != Port::Type::PHY) + { + continue; + } + port_buffer_drop_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_buffer_drop_stats); + } + + m_isPortBufferDropCounterMapGenerated = true; +} + void PortsOrch::doTask(NotificationConsumer &consumer) { SWSS_LOG_ENTER(); @@ -4958,3 +5005,22 @@ bool PortsOrch::initGearboxPort(Port &port) return true; } +std::unordered_set PortsOrch::generateCounterStats(const string& type) +{ + std::unordered_set counter_stats; + if (type == PORT_STAT_COUNTER_FLEX_COUNTER_GROUP) + { + for (const auto& it: port_stat_ids) + { + counter_stats.emplace(sai_serialize_port_stat(it)); + } + } + else if (type == PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP) + { + for (const auto& it: port_buffer_drop_stat_ids) + { + counter_stats.emplace(sai_serialize_port_stat(it)); + } + } + return counter_stats; +} diff --git a/orchagent/portsorch.h b/orchagent/portsorch.h index f6e9e4ac22..3c4d23fca9 100755 --- a/orchagent/portsorch.h +++ b/orchagent/portsorch.h @@ -12,6 +12,7 @@ #include "flex_counter_manager.h" #include "gearboxutils.h" #include "saihelper.h" +#include "flexcounterorch.h" #define FCS_LEN 4 @@ -123,6 +124,8 @@ class PortsOrch : public Orch, public Subject void generateQueueMap(); void generatePriorityGroupMap(); + void generatePortCounterMap(); + void generatePortBufferDropCounterMap(); void refreshPortStatus(); bool removeAclTableGroup(const Port &p); @@ -277,6 +280,9 @@ class PortsOrch : public Orch, public Subject bool m_isPriorityGroupMapGenerated = false; void generatePriorityGroupMapPerPort(const Port& port); + bool m_isPortCounterMapGenerated = false; + bool m_isPortBufferDropCounterMapGenerated = false; + bool setPortAutoNeg(sai_object_id_t id, int an); bool setPortFecMode(sai_object_id_t id, int fec); @@ -296,6 +302,8 @@ class PortsOrch : public Orch, public Subject void initGearbox(); bool initGearboxPort(Port &port); + std::unordered_set generateCounterStats(const string& type); + }; #endif /* SWSS_PORTSORCH_H */ diff --git a/tests/mock_tests/mock_orchagent_main.h b/tests/mock_tests/mock_orchagent_main.h index 587055b87e..181ebac889 100644 --- a/tests/mock_tests/mock_orchagent_main.h +++ b/tests/mock_tests/mock_orchagent_main.h @@ -15,6 +15,8 @@ #include "vxlanorch.h" #include "policerorch.h" #include "fgnhgorch.h" +#include "flexcounterorch.h" +#include "directory.h" extern int gBatchSize; extern bool gSwssRecord; @@ -42,6 +44,7 @@ extern FdbOrch *gFdbOrch; extern MirrorOrch *gMirrorOrch; extern BufferOrch *gBufferOrch; extern VRFOrch *gVrfOrch; +extern Directory gDirectory; extern sai_acl_api_t *sai_acl_api; extern sai_switch_api_t *sai_switch_api; diff --git a/tests/mock_tests/portsorch_ut.cpp b/tests/mock_tests/portsorch_ut.cpp index a52b505b06..27c26f2ba3 100644 --- a/tests/mock_tests/portsorch_ut.cpp +++ b/tests/mock_tests/portsorch_ut.cpp @@ -142,6 +142,14 @@ namespace portsorch_test }; ASSERT_EQ(gPortsOrch, nullptr); + + vector flex_counter_tables = { + CFG_FLEX_COUNTER_TABLE_NAME + }; + auto* flexCounterOrch = new FlexCounterOrch(m_config_db.get(), flex_counter_tables); + + gDirectory.set(flexCounterOrch); + gPortsOrch = new PortsOrch(m_app_db.get(), ports_tables); vector buffer_tables = { APP_BUFFER_POOL_TABLE_NAME, APP_BUFFER_PROFILE_TABLE_NAME, diff --git a/tests/test_flex_counters.py b/tests/test_flex_counters.py new file mode 100644 index 0000000000..ecdc844572 --- /dev/null +++ b/tests/test_flex_counters.py @@ -0,0 +1,112 @@ +import time +import pytest + +# Counter keys on ConfigDB +PORT_KEY = "PORT" +QUEUE_KEY = "QUEUE" +RIF_KEY = "RIF" +BUFFER_POOL_WATERMARK_KEY = "BUFFER_POOL_WATERMARK" +PORT_BUFFER_DROP_KEY = "PORT_BUFFER_DROP" +PG_WATERMARK_KEY = "PG_WATERMARK" + +# Counter stats on FlexCountersDB +PORT_STAT = "PORT_STAT_COUNTER" +QUEUE_STAT = "QUEUE_STAT_COUNTER" +RIF_STAT = "RIF_STAT_COUNTER" +BUFFER_POOL_WATERMARK_STAT = "BUFFER_POOL_WATERMARK_STAT_COUNTER" +PORT_BUFFER_DROP_STAT = "PORT_BUFFER_DROP_STAT" +PG_WATERMARK_STAT = "PG_WATERMARK_STAT_COUNTER" + +# Counter maps on CountersDB +PORT_MAP = "COUNTERS_PORT_NAME_MAP" +QUEUE_MAP = "COUNTERS_QUEUE_NAME_MAP" +RIF_MAP = "COUNTERS_RIF_NAME_MAP" +BUFFER_POOL_WATERMARK_MAP = "COUNTERS_BUFFER_POOL_NAME_MAP" +PORT_BUFFER_DROP_MAP = "COUNTERS_PORT_NAME_MAP" +PG_WATERMARK_MAP = "COUNTERS_PG_NAME_MAP" + +NUMBER_OF_RETRIES = 10 +CPU_PORT_OID = "0x0" + +counter_type_dict = {"port_counter":[PORT_KEY, PORT_STAT, PORT_MAP], + "queue_counter":[QUEUE_KEY, QUEUE_STAT, QUEUE_MAP], + "rif_counter":[RIF_KEY, RIF_STAT, RIF_MAP], + "buffer_pool_watermark_counter":[BUFFER_POOL_WATERMARK_KEY, BUFFER_POOL_WATERMARK_STAT, BUFFER_POOL_WATERMARK_MAP], + "port_buffer_drop_counter":[PORT_BUFFER_DROP_KEY, PORT_BUFFER_DROP_STAT, PORT_BUFFER_DROP_MAP], + "pg_watermark_counter":[PG_WATERMARK_KEY, PG_WATERMARK_STAT, PG_WATERMARK_MAP]} + +class TestFlexCounters(object): + + def setup_dbs(self, dvs): + self.config_db = dvs.get_config_db() + self.flex_db = dvs.get_flex_db() + self.counters_db = dvs.get_counters_db() + + def wait_for_table(self, table): + for retry in range(NUMBER_OF_RETRIES): + counters_keys = self.counters_db.db_connection.hgetall(table) + if len(counters_keys) > 0: + return + else: + time.sleep(1) + + assert False, str(table) + " not created in Counters DB" + + def wait_for_id_list(self, stat, name, oid): + for retry in range(NUMBER_OF_RETRIES): + id_list = self.flex_db.db_connection.hgetall("FLEX_COUNTER_TABLE:" + stat + ":" + oid).items() + if len(id_list) > 0: + return + else: + time.sleep(1) + + assert False, "No ID list for counter " + str(name) + + def verify_no_flex_counters_tables(self, counter_stat): + counters_stat_keys = self.flex_db.get_keys("FLEX_COUNTER_TABLE:" + counter_stat) + assert len(counters_stat_keys) == 0, "FLEX_COUNTER_TABLE:" + str(counter_stat) + " tables exist before enabling the flex counter group" + + def verify_flex_counters_populated(self, map, stat): + counters_keys = self.counters_db.db_connection.hgetall(map) + for counter_entry in counters_keys.items(): + name = counter_entry[0] + oid = counter_entry[1] + self.wait_for_id_list(stat, name, oid) + + def verify_only_phy_ports_created(self): + port_counters_keys = self.counters_db.db_connection.hgetall(PORT_MAP) + port_counters_stat_keys = self.flex_db.get_keys("FLEX_COUNTER_TABLE:" + PORT_STAT) + for port_stat in port_counters_stat_keys: + assert port_stat in dict(port_counters_keys.items()).values(), "Non PHY port created on PORT_STAT_COUNTER group: {}".format(port_stat) + + def enable_flex_counter_group(self, group, map): + group_stats_entry = {"FLEX_COUNTER_STATUS": "enable"} + self.config_db.create_entry("FLEX_COUNTER_TABLE", group, group_stats_entry) + self.wait_for_table(map) + + @pytest.mark.parametrize("counter_type", counter_type_dict.keys()) + def test_flex_counters(self, dvs, counter_type): + """ + The test will check there are no flex counters tables on FlexCounter DB when the counters are disabled. + After enabling each counter group, the test will check the flow of creating flex counters tables on FlexCounter DB. + For some counter types the MAPS on COUNTERS DB will be created as well after enabling the counter group, this will be also verified on this test. + """ + self.setup_dbs(dvs) + counter_key = counter_type_dict[counter_type][0] + counter_stat = counter_type_dict[counter_type][1] + counter_map = counter_type_dict[counter_type][2] + + self.verify_no_flex_counters_tables(counter_stat) + + if counter_type == "rif_counter": + self.config_db.db_connection.hset('INTERFACE|Ethernet0', "NULL", "NULL") + self.config_db.db_connection.hset('INTERFACE|Ethernet0|192.168.0.1/24', "NULL", "NULL") + + self.enable_flex_counter_group(counter_key, counter_map) + self.verify_flex_counters_populated(counter_map, counter_stat) + + if counter_type == "port_counter": + self.verify_only_phy_ports_created() + + if counter_type == "rif_counter": + self.config_db.db_connection.hdel('INTERFACE|Ethernet0|192.168.0.1/24', "NULL") diff --git a/tests/test_pg_drop_counter.py b/tests/test_pg_drop_counter.py index dedee82c1a..1cdd834747 100644 --- a/tests/test_pg_drop_counter.py +++ b/tests/test_pg_drop_counter.py @@ -65,12 +65,14 @@ def set_up_flex_counter(self): fc_status_enable = {"FLEX_COUNTER_STATUS": "enable"} self.config_db.create_entry("FLEX_COUNTER_TABLE", "PG_DROP", fc_status_enable) + self.config_db.create_entry("FLEX_COUNTER_TABLE", "PG_WATERMARK", fc_status_enable) def clear_flex_counter(self): for pg in self.pgs: self.flex_db.delete_entry("FLEX_COUNTER_TABLE", "PG_DROP_STAT_COUNTER:{}".format(pg)) self.config_db.delete_entry("FLEX_COUNTER_TABLE", "PG_DROP") + self.config_db.delete_entry("FLEX_COUNTER_TABLE", "PG_WATERMARK") def test_pg_drop_counters(self, dvs):