From e56c492ea7a3a49eed5799d3f4145d60d5557851 Mon Sep 17 00:00:00 2001 From: shlomibitton <60430976+shlomibitton@users.noreply.github.com> Date: Mon, 14 Jun 2021 05:37:15 +0300 Subject: [PATCH] [flex-counters] Delay flex counters stats init for faster boot time (#1749) **What I did** Update flex counters DB with counters stats only when counters are enabled. As long as the polling counters are not enabled, flex counters information will stored internally on PortsOrch. **Why I did it** Creating flex counters objects on the DB will trigger 'SYNCD' to access the HW for query statistics capabilities. This HW access takes time and will be better to finish boot before doing this (mainly for fast-reboot but good to have in general). The flex counters are not crucial at boot time, we can delay it to the end of the boot process. **How I verified it** Reboot a switch and observer the flex counters DB populated after counters are enabled. --- orchagent/flexcounterorch.cpp | 50 +++++++---- orchagent/flexcounterorch.h | 4 + orchagent/orchdaemon.cpp | 6 +- orchagent/portsorch.cpp | 87 ++++++++++++++++--- orchagent/portsorch.h | 8 ++ tests/mock_tests/mock_orchagent_main.h | 3 + tests/mock_tests/portsorch_ut.cpp | 7 +- tests/test_flex_counters.py | 110 +++++++++++++++++++++++++ tests/test_pg_drop_counter.py | 2 + 9 files changed, 247 insertions(+), 30 deletions(-) create mode 100644 tests/test_flex_counters.py diff --git a/orchagent/flexcounterorch.cpp b/orchagent/flexcounterorch.cpp index bf40d7c05f..41f024c639 100644 --- a/orchagent/flexcounterorch.cpp +++ b/orchagent/flexcounterorch.cpp @@ -18,6 +18,11 @@ extern IntfsOrch *gIntfsOrch; extern BufferOrch *gBufferOrch; #define BUFFER_POOL_WATERMARK_KEY "BUFFER_POOL_WATERMARK" +#define PORT_KEY "PORT" +#define PORT_BUFFER_DROP_KEY "PORT_BUFFER_DROP" +#define QUEUE_KEY "QUEUE" +#define PG_WATERMARK_KEY "PG_WATERMARK" +#define RIF_KEY "RIF" unordered_map flexCounterGroupMap = { @@ -102,26 +107,31 @@ void FlexCounterOrch::doTask(Consumer &consumer) // which is automatically satisfied upon the creation of the orch object that requires // the syncd flex counter polling service // This postponement is introduced by design to accelerate the initialization process - // - // generateQueueMap() is called as long as a field "FLEX_COUNTER_STATUS" event is heard, - // regardless of whether the key is "QUEUE" or whether the value is "enable" or "disable" - // This can be because generateQueueMap() installs a fundamental list of queue stats - // that need to be polled. So my doubt here is if queue watermark stats shall be piggybacked - // into the same function as they may not be counted as fundamental - if(gPortsOrch) + if(gPortsOrch && (value == "enable")) { - gPortsOrch->generateQueueMap(); - gPortsOrch->generatePriorityGroupMap(); + if(key == PORT_KEY) + { + gPortsOrch->generatePortCounterMap(); + m_port_counter_enabled = true; + } + else if(key == PORT_BUFFER_DROP_KEY) + { + gPortsOrch->generatePortBufferDropCounterMap(); + m_port_buffer_drop_counter_enabled = true; + } + else if(key == QUEUE_KEY) + { + gPortsOrch->generateQueueMap(); + } + else if(key == PG_WATERMARK_KEY) + { + gPortsOrch->generatePriorityGroupMap(); + } } - if(gPortsOrch) - { - gPortsOrch->generatePriorityGroupMap(); - } - if(gIntfsOrch) + if(gIntfsOrch && (key == RIF_KEY) && (value == "enable")) { gIntfsOrch->generateInterfaceMap(); } - // Install COUNTER_ID_LIST/ATTR_ID_LIST only when hearing buffer pool watermark enable event if (gBufferOrch && (key == BUFFER_POOL_WATERMARK_KEY) && (value == "enable")) { gBufferOrch->generateBufferPoolWatermarkCounterIdList(); @@ -144,3 +154,13 @@ void FlexCounterOrch::doTask(Consumer &consumer) consumer.m_toSync.erase(it++); } } + +bool FlexCounterOrch::getPortCountersState() const +{ + return m_port_counter_enabled; +} + +bool FlexCounterOrch::getPortBufferDropCountersState() const +{ + return m_port_buffer_drop_counter_enabled; +} diff --git a/orchagent/flexcounterorch.h b/orchagent/flexcounterorch.h index 27f6e64ab1..0fb9f70e4b 100644 --- a/orchagent/flexcounterorch.h +++ b/orchagent/flexcounterorch.h @@ -15,10 +15,14 @@ class FlexCounterOrch: public Orch void doTask(Consumer &consumer); FlexCounterOrch(swss::DBConnector *db, std::vector &tableNames); virtual ~FlexCounterOrch(void); + bool getPortCountersState() const; + bool getPortBufferDropCountersState() const; private: std::shared_ptr m_flexCounterDb = nullptr; std::shared_ptr m_flexCounterGroupTable = nullptr; + bool m_port_counter_enabled = false; + bool m_port_buffer_drop_counter_enabled = false; }; #endif diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 4c9fc3a4e7..9acb7b2c01 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -352,7 +352,11 @@ bool OrchDaemon::init() CFG_FLEX_COUNTER_TABLE_NAME }; - m_orchList.push_back(new FlexCounterOrch(m_configDb, flex_counter_tables)); + auto* flexCounterOrch = new FlexCounterOrch(m_configDb, flex_counter_tables); + m_orchList.push_back(flexCounterOrch); + + gDirectory.set(flexCounterOrch); + gDirectory.set(gPortsOrch); vector pfc_wd_tables = { CFG_PFC_WD_TABLE_NAME diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index 3238ee795e..94723ea049 100755 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -284,9 +284,9 @@ static char* hostif_vlan_tag[] = { PortsOrch::PortsOrch(DBConnector *db, DBConnector *stateDb, vector &tableNames, DBConnector *chassisAppDb) : Orch(db, tableNames), m_portStateTable(stateDb, STATE_PORT_TABLE_NAME), - port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true), - port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, true), - queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true) + port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false), + port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, false), + queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false) { SWSS_LOG_ENTER(); @@ -2273,19 +2273,21 @@ bool PortsOrch::initPort(const string &alias, const string &role, const int inde vector fields; fields.push_back(tuple); m_counterTable->set("", fields); + // Install a flex counter for this port to track stats - std::unordered_set counter_stats; - for (const auto& it: port_stat_ids) + auto flex_counters_orch = gDirectory.get(); + /* Delay installing the counters if they are yet enabled + If they are enabled, install the counters immediately */ + if (flex_counters_orch->getPortCountersState()) { - counter_stats.emplace(sai_serialize_port_stat(it)); + auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP); + port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_counter_stats); } - port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, counter_stats); - std::unordered_set port_buffer_drop_stats; - for (const auto& it: port_buffer_drop_stat_ids) + if (flex_counters_orch->getPortBufferDropCountersState()) { - port_buffer_drop_stats.emplace(sai_serialize_port_stat(it)); + auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP); + port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats); } - port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats); PortUpdate update = { p, true }; notify(SUBJECT_TYPE_PORT_CHANGE, static_cast(&update)); @@ -2318,8 +2320,11 @@ void PortsOrch::deInitPort(string alias, sai_object_id_t port_id) p.m_port_id = port_id; /* remove port from flex_counter_table for updating counters */ - port_stat_manager.clearCounterIdList(p.m_port_id); - + auto flex_counters_orch = gDirectory.get(); + if ((flex_counters_orch->getPortCountersState())) + { + port_stat_manager.clearCounterIdList(p.m_port_id); + } /* remove port name map from counter table */ m_counter_db->hdel(COUNTERS_PORT_NAME_MAP, alias); @@ -5072,6 +5077,42 @@ void PortsOrch::generatePriorityGroupMapPerPort(const Port& port) CounterCheckOrch::getInstance().addPort(port); } +void PortsOrch::generatePortCounterMap() +{ + if (m_isPortCounterMapGenerated) + { + return; + } + + auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP); + for (const auto& it: m_portList) + { + if (it.first == "CPU") + { + continue; + } + port_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_counter_stats); + } + + m_isPortCounterMapGenerated = true; +} + +void PortsOrch::generatePortBufferDropCounterMap() +{ + if (m_isPortBufferDropCounterMapGenerated) + { + return; + } + + auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP); + for (const auto& it: m_portList) + { + port_buffer_drop_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_buffer_drop_stats); + } + + m_isPortBufferDropCounterMapGenerated = true; +} + void PortsOrch::doTask(NotificationConsumer &consumer) { SWSS_LOG_ENTER(); @@ -6228,3 +6269,23 @@ void PortsOrch::voqSyncDelLagMember(Port &lag, Port &port) string key = lag.m_system_lag_info.alias + ":" + port.m_system_port_info.alias; m_tableVoqSystemLagMemberTable->del(key); } + +std::unordered_set PortsOrch::generateCounterStats(const string& type) +{ + std::unordered_set counter_stats; + if (type == PORT_STAT_COUNTER_FLEX_COUNTER_GROUP) + { + for (const auto& it: port_stat_ids) + { + counter_stats.emplace(sai_serialize_port_stat(it)); + } + } + else if (type == PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP) + { + for (const auto& it: port_buffer_drop_stat_ids) + { + counter_stats.emplace(sai_serialize_port_stat(it)); + } + } + return counter_stats; +} diff --git a/orchagent/portsorch.h b/orchagent/portsorch.h index 6a92b9d9dd..c29b59837f 100755 --- a/orchagent/portsorch.h +++ b/orchagent/portsorch.h @@ -13,6 +13,7 @@ #include "gearboxutils.h" #include "saihelper.h" #include "lagid.h" +#include "flexcounterorch.h" #define FCS_LEN 4 @@ -125,6 +126,8 @@ class PortsOrch : public Orch, public Subject void generateQueueMap(); void generatePriorityGroupMap(); + void generatePortCounterMap(); + void generatePortBufferDropCounterMap(); void refreshPortStatus(); bool removeAclTableGroup(const Port &p); @@ -290,6 +293,9 @@ class PortsOrch : public Orch, public Subject bool m_isPriorityGroupMapGenerated = false; void generatePriorityGroupMapPerPort(const Port& port); + bool m_isPortCounterMapGenerated = false; + bool m_isPortBufferDropCounterMapGenerated = false; + bool setPortAutoNeg(sai_object_id_t id, int an); bool setPortFecMode(sai_object_id_t id, int fec); bool setPortInterfaceType(sai_object_id_t id, sai_port_interface_type_t interface_type); @@ -333,6 +339,8 @@ class PortsOrch : public Orch, public Subject void voqSyncDelLagMember(Port &lag, Port &port); unique_ptr m_lagIdAllocator; + std::unordered_set generateCounterStats(const string& type); + }; #endif /* SWSS_PORTSORCH_H */ diff --git a/tests/mock_tests/mock_orchagent_main.h b/tests/mock_tests/mock_orchagent_main.h index 587055b87e..181ebac889 100644 --- a/tests/mock_tests/mock_orchagent_main.h +++ b/tests/mock_tests/mock_orchagent_main.h @@ -15,6 +15,8 @@ #include "vxlanorch.h" #include "policerorch.h" #include "fgnhgorch.h" +#include "flexcounterorch.h" +#include "directory.h" extern int gBatchSize; extern bool gSwssRecord; @@ -42,6 +44,7 @@ extern FdbOrch *gFdbOrch; extern MirrorOrch *gMirrorOrch; extern BufferOrch *gBufferOrch; extern VRFOrch *gVrfOrch; +extern Directory gDirectory; extern sai_acl_api_t *sai_acl_api; extern sai_switch_api_t *sai_switch_api; diff --git a/tests/mock_tests/portsorch_ut.cpp b/tests/mock_tests/portsorch_ut.cpp index 365c5144b5..77157aed19 100644 --- a/tests/mock_tests/portsorch_ut.cpp +++ b/tests/mock_tests/portsorch_ut.cpp @@ -146,8 +146,13 @@ namespace portsorch_test ASSERT_EQ(gPortsOrch, nullptr); - gPortsOrch = new PortsOrch(m_app_db.get(), m_state_db.get(), ports_tables, m_chassis_app_db.get()); + vector flex_counter_tables = { + CFG_FLEX_COUNTER_TABLE_NAME + }; + auto* flexCounterOrch = new FlexCounterOrch(m_config_db.get(), flex_counter_tables); + gDirectory.set(flexCounterOrch); + gPortsOrch = new PortsOrch(m_app_db.get(), m_state_db.get(), ports_tables, m_chassis_app_db.get()); vector buffer_tables = { APP_BUFFER_POOL_TABLE_NAME, APP_BUFFER_PROFILE_TABLE_NAME, APP_BUFFER_QUEUE_TABLE_NAME, diff --git a/tests/test_flex_counters.py b/tests/test_flex_counters.py new file mode 100644 index 0000000000..2089969cf8 --- /dev/null +++ b/tests/test_flex_counters.py @@ -0,0 +1,110 @@ +import time +import pytest + +# Counter keys on ConfigDB +PORT_KEY = "PORT" +QUEUE_KEY = "QUEUE" +RIF_KEY = "RIF" +BUFFER_POOL_WATERMARK_KEY = "BUFFER_POOL_WATERMARK" +PORT_BUFFER_DROP_KEY = "PORT_BUFFER_DROP" +PG_WATERMARK_KEY = "PG_WATERMARK" + +# Counter stats on FlexCountersDB +PORT_STAT = "PORT_STAT_COUNTER" +QUEUE_STAT = "QUEUE_STAT_COUNTER" +RIF_STAT = "RIF_STAT_COUNTER" +BUFFER_POOL_WATERMARK_STAT = "BUFFER_POOL_WATERMARK_STAT_COUNTER" +PORT_BUFFER_DROP_STAT = "PORT_BUFFER_DROP_STAT" +PG_WATERMARK_STAT = "PG_WATERMARK_STAT_COUNTER" + +# Counter maps on CountersDB +PORT_MAP = "COUNTERS_PORT_NAME_MAP" +QUEUE_MAP = "COUNTERS_QUEUE_NAME_MAP" +RIF_MAP = "COUNTERS_RIF_NAME_MAP" +BUFFER_POOL_WATERMARK_MAP = "COUNTERS_BUFFER_POOL_NAME_MAP" +PORT_BUFFER_DROP_MAP = "COUNTERS_PORT_NAME_MAP" +PG_WATERMARK_MAP = "COUNTERS_PG_NAME_MAP" + +NUMBER_OF_RETRIES = 10 +CPU_PORT_OID = "0x0" + +counter_type_dict = {"port_counter":[PORT_KEY, PORT_STAT, PORT_MAP], + "queue_counter":[QUEUE_KEY, QUEUE_STAT, QUEUE_MAP], + "rif_counter":[RIF_KEY, RIF_STAT, RIF_MAP], + "buffer_pool_watermark_counter":[BUFFER_POOL_WATERMARK_KEY, BUFFER_POOL_WATERMARK_STAT, BUFFER_POOL_WATERMARK_MAP], + "port_buffer_drop_counter":[PORT_BUFFER_DROP_KEY, PORT_BUFFER_DROP_STAT, PORT_BUFFER_DROP_MAP], + "pg_watermark_counter":[PG_WATERMARK_KEY, PG_WATERMARK_STAT, PG_WATERMARK_MAP]} + +class TestFlexCounters(object): + + def setup_dbs(self, dvs): + self.config_db = dvs.get_config_db() + self.flex_db = dvs.get_flex_db() + self.counters_db = dvs.get_counters_db() + + def wait_for_table(self, table): + for retry in range(NUMBER_OF_RETRIES): + counters_keys = self.counters_db.db_connection.hgetall(table) + if len(counters_keys) > 0: + return + else: + time.sleep(1) + + assert False, str(table) + " not created in Counters DB" + + def wait_for_id_list(self, stat, name, oid): + for retry in range(NUMBER_OF_RETRIES): + id_list = self.flex_db.db_connection.hgetall("FLEX_COUNTER_TABLE:" + stat + ":" + oid).items() + if len(id_list) > 0: + return + else: + time.sleep(1) + + assert False, "No ID list for counter " + str(name) + + def verify_no_flex_counters_tables(self, counter_stat): + counters_stat_keys = self.flex_db.get_keys("FLEX_COUNTER_TABLE:" + counter_stat) + assert len(counters_stat_keys) == 0, "FLEX_COUNTER_TABLE:" + str(counter_stat) + " tables exist before enabling the flex counter group" + + def verify_flex_counters_populated(self, map, stat): + counters_keys = self.counters_db.db_connection.hgetall(map) + for counter_entry in counters_keys.items(): + name = counter_entry[0] + oid = counter_entry[1] + self.wait_for_id_list(stat, name, oid) + + def verify_cpu_interface_not_in_db(self, stat): + cpu = self.flex_db.db_connection.hgetall("FLEX_COUNTER_TABLE:" + stat + ":" + CPU_PORT_OID) + assert cpu.size() == 0, "FLEX_COUNTER_TABLE:" + stat + ":" + CPU_PORT_OID + " - CPU port exist in DB" + + def enable_flex_counter_group(self, group, map): + group_stats_entry = {"FLEX_COUNTER_STATUS": "enable"} + self.config_db.create_entry("FLEX_COUNTER_TABLE", group, group_stats_entry) + self.wait_for_table(map) + + @pytest.mark.parametrize("counter_type", counter_type_dict.keys()) + def test_flex_counters(self, dvs, counter_type): + """ + The test will check there are no flex counters tables on FlexCounter DB when the counters are disabled. + After enabling each counter group, the test will check the flow of creating flex counters tables on FlexCounter DB. + For some counter types the MAPS on COUNTERS DB will be created as well after enabling the counter group, this will be also verified on this test. + """ + self.setup_dbs(dvs) + counter_key = counter_type_dict[counter_type][0] + counter_stat = counter_type_dict[counter_type][1] + counter_map = counter_type_dict[counter_type][2] + + self.verify_no_flex_counters_tables(counter_stat) + + if counter_type == "rif_counter": + self.config_db.db_connection.hset('INTERFACE|Ethernet0', "NULL", "NULL") + self.config_db.db_connection.hset('INTERFACE|Ethernet0|192.168.0.1/24', "NULL", "NULL") + + self.enable_flex_counter_group(counter_key, counter_map) + self.verify_flex_counters_populated(counter_map, counter_stat) + + if counter_type == "port_counter": + self.verify_cpu_interface_not_in_db(counter_stat) + + if counter_type == "rif_counter": + self.config_db.db_connection.hdel('INTERFACE|Ethernet0|192.168.0.1/24', "NULL") diff --git a/tests/test_pg_drop_counter.py b/tests/test_pg_drop_counter.py index dedee82c1a..1cdd834747 100644 --- a/tests/test_pg_drop_counter.py +++ b/tests/test_pg_drop_counter.py @@ -65,12 +65,14 @@ def set_up_flex_counter(self): fc_status_enable = {"FLEX_COUNTER_STATUS": "enable"} self.config_db.create_entry("FLEX_COUNTER_TABLE", "PG_DROP", fc_status_enable) + self.config_db.create_entry("FLEX_COUNTER_TABLE", "PG_WATERMARK", fc_status_enable) def clear_flex_counter(self): for pg in self.pgs: self.flex_db.delete_entry("FLEX_COUNTER_TABLE", "PG_DROP_STAT_COUNTER:{}".format(pg)) self.config_db.delete_entry("FLEX_COUNTER_TABLE", "PG_DROP") + self.config_db.delete_entry("FLEX_COUNTER_TABLE", "PG_WATERMARK") def test_pg_drop_counters(self, dvs):