Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[flex-counters] Delay flex counters stats init for faster boot time #1749

Merged
merged 3 commits into from
Jun 14, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 28 additions & 4 deletions orchagent/flexcounterorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ extern IntfsOrch *gIntfsOrch;
extern BufferOrch *gBufferOrch;

#define BUFFER_POOL_WATERMARK_KEY "BUFFER_POOL_WATERMARK"
#define PORT_KEY "PORT"
#define PORT_BUFFER_DROP_KEY "PORT_BUFFER_DROP"
#define QUEUE_KEY "QUEUE"
#define PG_WATERMARK_KEY "PG_WATERMARK"
#define RIF_KEY "RIF"

unordered_map<string, string> flexCounterGroupMap =
{
Expand Down Expand Up @@ -94,6 +99,16 @@ void FlexCounterOrch::doTask(Consumer &consumer)
}
else if(field == FLEX_COUNTER_STATUS_FIELD)
{
if(gPortsOrch && (key == PORT_KEY) && (value == "enable"))
shlomibitton marked this conversation as resolved.
Show resolved Hide resolved
{
gPortsOrch->generatePortCounterMap();
m_port_counter_enabled = true;
}
if(gPortsOrch && (key == PORT_BUFFER_DROP_KEY) && (value == "enable"))
{
gPortsOrch->generatePortBufferDropCounterMap();
m_port_buffer_drop_counter_enabled = true;
}
// Currently, the counters are disabled for polling by default
// The queue maps will be generated as soon as counters are enabled for polling
// Counter polling is enabled by pushing the COUNTER_ID_LIST/ATTR_ID_LIST, which contains
Expand All @@ -108,16 +123,15 @@ void FlexCounterOrch::doTask(Consumer &consumer)
// This can be because generateQueueMap() installs a fundamental list of queue stats
// that need to be polled. So my doubt here is if queue watermark stats shall be piggybacked
// into the same function as they may not be counted as fundamental
if(gPortsOrch)
if(gPortsOrch && (key == QUEUE_KEY) && (value == "enable"))
{
gPortsOrch->generateQueueMap();
gPortsOrch->generatePriorityGroupMap();
}
if(gPortsOrch)
if(gPortsOrch && (key == PG_WATERMARK_KEY) && (value == "enable"))
{
gPortsOrch->generatePriorityGroupMap();
}
if(gIntfsOrch)
if(gIntfsOrch && (key == RIF_KEY) && (value == "enable"))
{
gIntfsOrch->generateInterfaceMap();
}
Expand All @@ -144,3 +158,13 @@ void FlexCounterOrch::doTask(Consumer &consumer)
consumer.m_toSync.erase(it++);
}
}

bool FlexCounterOrch::getPortCountersState()
{
return m_port_counter_enabled;
}
shlomibitton marked this conversation as resolved.
Show resolved Hide resolved

bool FlexCounterOrch::getPortBufferDropCountersState()
{
return m_port_buffer_drop_counter_enabled;
}
4 changes: 4 additions & 0 deletions orchagent/flexcounterorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,14 @@ class FlexCounterOrch: public Orch
void doTask(Consumer &consumer);
FlexCounterOrch(swss::DBConnector *db, std::vector<std::string> &tableNames);
virtual ~FlexCounterOrch(void);
bool getPortCountersState();
shlomibitton marked this conversation as resolved.
Show resolved Hide resolved
bool getPortBufferDropCountersState();

private:
std::shared_ptr<swss::DBConnector> m_flexCounterDb = nullptr;
std::shared_ptr<swss::ProducerTable> m_flexCounterGroupTable = nullptr;
bool m_port_counter_enabled = false;
bool m_port_buffer_drop_counter_enabled = false;
};

#endif
6 changes: 5 additions & 1 deletion orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,11 @@ bool OrchDaemon::init()
CFG_FLEX_COUNTER_TABLE_NAME
};

m_orchList.push_back(new FlexCounterOrch(m_configDb, flex_counter_tables));
auto* flexCounterOrch = new FlexCounterOrch(m_configDb, flex_counter_tables);
m_orchList.push_back(flexCounterOrch);

gDirectory.set(flexCounterOrch);
gDirectory.set(gPortsOrch);

vector<string> pfc_wd_tables = {
CFG_PFC_WD_TABLE_NAME
Expand Down
87 changes: 74 additions & 13 deletions orchagent/portsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,9 +284,9 @@ static char* hostif_vlan_tag[] = {
PortsOrch::PortsOrch(DBConnector *db, DBConnector *stateDb, vector<table_name_with_pri_t> &tableNames, DBConnector *chassisAppDb) :
Orch(db, tableNames),
m_portStateTable(stateDb, STATE_PORT_TABLE_NAME),
port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true),
port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, true),
queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true)
port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false),
port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_BUFFER_DROP_STAT_POLLING_INTERVAL_MS, false),
queue_stat_manager(QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false)
{
SWSS_LOG_ENTER();

Expand Down Expand Up @@ -2221,19 +2221,21 @@ bool PortsOrch::initPort(const string &alias, const int index, const set<int> &l
vector<FieldValueTuple> fields;
fields.push_back(tuple);
m_counterTable->set("", fields);

// Install a flex counter for this port to track stats
std::unordered_set<std::string> counter_stats;
for (const auto& it: port_stat_ids)
auto flex_counters_orch = gDirectory.get<FlexCounterOrch*>();
/* Delay installing the counters if they are yet enabled
Copy link
Contributor

@qiluo-msft qiluo-msft Aug 1, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Delay

How long do you delay? I cannot find the constant in code. How did you test the delay? #Closed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@qiluo-msft please check this file:
https://github.com/Azure/sonic-buildimage/blob/master/dockers/docker-orchagent/enable_counters.py#L55
it will enable the counters on CONFIG DB, we handle this change here:
https://github.com/Azure/sonic-swss/pull/1749/files#diff-77a2299548b8c0b1cfa565b28cdc46051dbb6c325d91011fab5fe9df283a03c8R112
and then update m_port_counter_enabled = true
If the counter is already enabled, the normal flow will take place like it used to be before.

If they are enabled, install the counters immediately */
if (flex_counters_orch->getPortCountersState())
{
counter_stats.emplace(sai_serialize_port_stat(it));
auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP);
port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_counter_stats);
}
port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, counter_stats);
std::unordered_set<std::string> port_buffer_drop_stats;
for (const auto& it: port_buffer_drop_stat_ids)
if (flex_counters_orch->getPortBufferDropCountersState())
{
port_buffer_drop_stats.emplace(sai_serialize_port_stat(it));
auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP);
port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats);
}
port_buffer_drop_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, port_buffer_drop_stats);

PortUpdate update = { p, true };
notify(SUBJECT_TYPE_PORT_CHANGE, static_cast<void *>(&update));
Expand Down Expand Up @@ -2266,8 +2268,11 @@ void PortsOrch::deInitPort(string alias, sai_object_id_t port_id)
p.m_port_id = port_id;

/* remove port from flex_counter_table for updating counters */
port_stat_manager.clearCounterIdList(p.m_port_id);

auto flex_counters_orch = gDirectory.get<FlexCounterOrch*>();
if ((flex_counters_orch->getPortCountersState()))
{
port_stat_manager.clearCounterIdList(p.m_port_id);
}
/* remove port name map from counter table */
m_counter_db->hdel(COUNTERS_PORT_NAME_MAP, alias);

Expand Down Expand Up @@ -4913,6 +4918,42 @@ void PortsOrch::generatePriorityGroupMapPerPort(const Port& port)
CounterCheckOrch::getInstance().addPort(port);
}

void PortsOrch::generatePortCounterMap()
{
if (m_isPortCounterMapGenerated)
{
return;
}

auto port_counter_stats = generateCounterStats(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP);
for (const auto& it: m_portList)
{
if (it.first == "CPU")
{
continue;
}
port_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_counter_stats);
}

m_isPortCounterMapGenerated = true;
}

void PortsOrch::generatePortBufferDropCounterMap()
{
if (m_isPortBufferDropCounterMapGenerated)
{
return;
}

auto port_buffer_drop_stats = generateCounterStats(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP);
for (const auto& it: m_portList)
{
port_buffer_drop_stat_manager.setCounterIdList(it.second.m_port_id, CounterType::PORT, port_buffer_drop_stats);
}

m_isPortBufferDropCounterMapGenerated = true;
}

void PortsOrch::doTask(NotificationConsumer &consumer)
{
SWSS_LOG_ENTER();
Expand Down Expand Up @@ -5982,3 +6023,23 @@ void PortsOrch::voqSyncDelLagMember(Port &lag, Port &port)
string key = lag.m_system_lag_info.alias + ":" + port.m_system_port_info.alias;
m_tableVoqSystemLagMemberTable->del(key);
}

std::unordered_set<std::string> PortsOrch::generateCounterStats(const string& type)
{
std::unordered_set<std::string> counter_stats;
if (type == PORT_STAT_COUNTER_FLEX_COUNTER_GROUP)
{
for (const auto& it: port_stat_ids)
{
counter_stats.emplace(sai_serialize_port_stat(it));
}
}
else if (type == PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP)
{
for (const auto& it: port_buffer_drop_stat_ids)
{
counter_stats.emplace(sai_serialize_port_stat(it));
}
}
return counter_stats;
}
8 changes: 8 additions & 0 deletions orchagent/portsorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "gearboxutils.h"
#include "saihelper.h"
#include "lagid.h"
#include "flexcounterorch.h"


#define FCS_LEN 4
Expand Down Expand Up @@ -125,6 +126,8 @@ class PortsOrch : public Orch, public Subject

void generateQueueMap();
void generatePriorityGroupMap();
void generatePortCounterMap();
void generatePortBufferDropCounterMap();

void refreshPortStatus();
bool removeAclTableGroup(const Port &p);
Expand Down Expand Up @@ -286,6 +289,9 @@ class PortsOrch : public Orch, public Subject
bool m_isPriorityGroupMapGenerated = false;
void generatePriorityGroupMapPerPort(const Port& port);

bool m_isPortCounterMapGenerated = false;
bool m_isPortBufferDropCounterMapGenerated = false;

bool setPortAutoNeg(sai_object_id_t id, int an);
bool setPortFecMode(sai_object_id_t id, int fec);
bool setPortInterfaceType(sai_object_id_t id, sai_port_interface_type_t interface_type);
Expand Down Expand Up @@ -328,6 +334,8 @@ class PortsOrch : public Orch, public Subject
void voqSyncDelLagMember(Port &lag, Port &port);
unique_ptr<LagIdAllocator> m_lagIdAllocator;

std::unordered_set<std::string> generateCounterStats(const string& type);

};
#endif /* SWSS_PORTSORCH_H */

3 changes: 3 additions & 0 deletions tests/mock_tests/mock_orchagent_main.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include "vxlanorch.h"
#include "policerorch.h"
#include "fgnhgorch.h"
#include "flexcounterorch.h"
#include "directory.h"

extern int gBatchSize;
extern bool gSwssRecord;
Expand Down Expand Up @@ -42,6 +44,7 @@ extern FdbOrch *gFdbOrch;
extern MirrorOrch *gMirrorOrch;
extern BufferOrch *gBufferOrch;
extern VRFOrch *gVrfOrch;
extern Directory<Orch*> gDirectory;

extern sai_acl_api_t *sai_acl_api;
extern sai_switch_api_t *sai_switch_api;
Expand Down
7 changes: 6 additions & 1 deletion tests/mock_tests/portsorch_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,13 @@ namespace portsorch_test

ASSERT_EQ(gPortsOrch, nullptr);

gPortsOrch = new PortsOrch(m_app_db.get(), m_state_db.get(), ports_tables, m_chassis_app_db.get());
vector<string> flex_counter_tables = {
CFG_FLEX_COUNTER_TABLE_NAME
};
auto* flexCounterOrch = new FlexCounterOrch(m_config_db.get(), flex_counter_tables);
gDirectory.set(flexCounterOrch);

gPortsOrch = new PortsOrch(m_app_db.get(), m_state_db.get(), ports_tables, m_chassis_app_db.get());
shlomibitton marked this conversation as resolved.
Show resolved Hide resolved
vector<string> buffer_tables = { APP_BUFFER_POOL_TABLE_NAME,
APP_BUFFER_PROFILE_TABLE_NAME,
APP_BUFFER_QUEUE_TABLE_NAME,
Expand Down
110 changes: 110 additions & 0 deletions tests/test_flex_counters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import time
import pytest

# Counter keys on ConfigDB
PORT_KEY = "PORT"
QUEUE_KEY = "QUEUE"
RIF_KEY = "RIF"
BUFFER_POOL_WATERMARK_KEY = "BUFFER_POOL_WATERMARK"
PORT_BUFFER_DROP_KEY = "PORT_BUFFER_DROP"
PG_WATERMARK_KEY = "PG_WATERMARK"

# Counter stats on FlexCountersDB
PORT_STAT = "PORT_STAT_COUNTER"
QUEUE_STAT = "QUEUE_STAT_COUNTER"
RIF_STAT = "RIF_STAT_COUNTER"
BUFFER_POOL_WATERMARK_STAT = "BUFFER_POOL_WATERMARK_STAT_COUNTER"
PORT_BUFFER_DROP_STAT = "PORT_BUFFER_DROP_STAT"
PG_WATERMARK_STAT = "PG_WATERMARK_STAT_COUNTER"

# Counter maps on CountersDB
PORT_MAP = "COUNTERS_PORT_NAME_MAP"
QUEUE_MAP = "COUNTERS_QUEUE_NAME_MAP"
RIF_MAP = "COUNTERS_RIF_NAME_MAP"
BUFFER_POOL_WATERMARK_MAP = "COUNTERS_BUFFER_POOL_NAME_MAP"
PORT_BUFFER_DROP_MAP = "COUNTERS_PORT_NAME_MAP"
PG_WATERMARK_MAP = "COUNTERS_PG_NAME_MAP"

NUMBER_OF_RETRIES = 10
CPU_PORT_OID = "0x0"

counter_type_dict = {"port_counter":[PORT_KEY, PORT_STAT, PORT_MAP],
"queue_counter":[QUEUE_KEY, QUEUE_STAT, QUEUE_MAP],
"rif_counter":[RIF_KEY, RIF_STAT, RIF_MAP],
"buffer_pool_watermark_counter":[BUFFER_POOL_WATERMARK_KEY, BUFFER_POOL_WATERMARK_STAT, BUFFER_POOL_WATERMARK_MAP],
"port_buffer_drop_counter":[PORT_BUFFER_DROP_KEY, PORT_BUFFER_DROP_STAT, PORT_BUFFER_DROP_MAP],
"pg_watermark_counter":[PG_WATERMARK_KEY, PG_WATERMARK_STAT, PG_WATERMARK_MAP]}

class TestFlexCounters(object):

def setup_dbs(self, dvs):
self.config_db = dvs.get_config_db()
self.flex_db = dvs.get_flex_db()
self.counters_db = dvs.get_counters_db()

def wait_for_table(self, table):
for retry in range(NUMBER_OF_RETRIES):
counters_keys = self.counters_db.db_connection.hgetall(table)
if len(counters_keys) > 0:
return
else:
time.sleep(1)

assert False, str(table) + " not created in Counters DB"

def wait_for_id_list(self, stat, name, oid):
for retry in range(NUMBER_OF_RETRIES):
id_list = self.flex_db.db_connection.hgetall("FLEX_COUNTER_TABLE:" + stat + ":" + oid).items()
if len(id_list) > 0:
return
else:
time.sleep(1)

assert False, "No ID list for counter " + str(name)

def verify_no_flex_counters_tables(self, counter_stat):
counters_stat_keys = self.flex_db.get_keys("FLEX_COUNTER_TABLE:" + counter_stat)
assert len(counters_stat_keys) == 0, "FLEX_COUNTER_TABLE:" + str(counter_stat) + " tables exist before enabling the flex counter group"

def verify_flex_counters_populated(self, map, stat):
counters_keys = self.counters_db.db_connection.hgetall(map)
for counter_entry in counters_keys.items():
name = counter_entry[0]
oid = counter_entry[1]
self.wait_for_id_list(stat, name, oid)

def verify_cpu_interface_not_in_db(self, stat):
cpu = self.flex_db.db_connection.hgetall("FLEX_COUNTER_TABLE:" + stat + ":" + CPU_PORT_OID)
assert cpu.size() == 0, "FLEX_COUNTER_TABLE:" + stat + ":" + CPU_PORT_OID + " - CPU port exist in DB"

def enable_flex_counter_group(self, group, map):
group_stats_entry = {"FLEX_COUNTER_STATUS": "enable"}
self.config_db.create_entry("FLEX_COUNTER_TABLE", group, group_stats_entry)
self.wait_for_table(map)

@pytest.mark.parametrize("counter_type", counter_type_dict.keys())
def test_flex_counters(self, dvs, counter_type):
"""
The test will check there are no flex counters tables on FlexCounter DB when the counters are disabled.
After enabling each counter group, the test will check the flow of creating flex counters tables on FlexCounter DB.
For some counter types the MAPS on COUNTERS DB will be created as well after enabling the counter group, this will be also verified on this test.
"""
self.setup_dbs(dvs)
counter_key = counter_type_dict[counter_type][0]
counter_stat = counter_type_dict[counter_type][1]
counter_map = counter_type_dict[counter_type][2]

self.verify_no_flex_counters_tables(counter_stat)

if counter_type == "rif_counter":
self.config_db.db_connection.hset('INTERFACE|Ethernet0', "NULL", "NULL")
self.config_db.db_connection.hset('INTERFACE|Ethernet0|192.168.0.1/24', "NULL", "NULL")

self.enable_flex_counter_group(counter_key, counter_map)
self.verify_flex_counters_populated(counter_map, counter_stat)

if counter_type == "port_counter":
self.verify_cpu_interface_not_in_db(counter_stat)

if counter_type == "rif_counter":
self.config_db.db_connection.hdel('INTERFACE|Ethernet0|192.168.0.1/24', "NULL")
Loading