Skip to content

Commit

Permalink
Collect ICMP packet loss information (#14)
Browse files Browse the repository at this point in the history
### Description of PR
Summary:
Fixes # (issue)

This PR is to collect ICMP packet loss information. 

sign-off: Jing Zhang zhangjing@microsoft.com

### Type of change
- [x] New feature

### Approach
#### What is the motivation for this PR?

When ICMP heartbeat loss happens, we want to know how long it lasts. We also want to collect the packet loss ratio information. 

#### How did you do it?

* Post link prober state change events and time stamps to state db metrics, when get in or out link prober unknown state;
* Post link prober pck loss ratio every 300 ms to state db. 

#### How did you verify/test it?

Tested on dual testbed.

Table entries created as expected: 
```
$ redis-cli -n 6 
127.0.0.1:6379[6]> KEYS *LINK_PROBE*
1) "LINK_PROBE_STATS|Ethernet48"
2) "LINK_PROBE_STATS|Ethernet84"
      ... ...
```

Before ICMP responder was turned on: 
```
~$ redis-cli -n 6 HGETALL "LINK_PROBE_STATS|Ethernet44" 
1) "pck_loss_count"
2) "24"
3) "pck_expected_count"
4) "24"
5) "link_prober_unknown_start"
6) "2022-Jan-26 02:59:58.138248"
```

After running a link_failure test case, link_prober_unknown_start and link_prober_unknown_end are updated: 
```
~$ redis-cli -n 6 HGETALL "LINK_PROBE_STATS|Ethernet44"
1) "pck_loss_count"
2) "612"
3) "pck_expected_count"
4) "840"
5) "link_prober_unknown_start"
6) "2022-Jan-26 03:13:05.366900"
7) "link_prober_unknown_end"
8) "2022-Jan-26 03:17:35.446580"
```

After resetting the packet loss counts: 
```
~$ redis-cli -n 4 HSET "MUX_CABLE|Ethernet44" pck_loss_data_reset reset 
(integer) 0

~$ redis-cli -n 6 HGETALL "LINK_PROBE_STATS|Ethernet44"
1) "pck_loss_count"
2) "0"
3) "pck_expected_count"
4) "0"
5) "link_prober_unknown_start"
6) "2022-Jan-26 03:13:05.366900"
7) "link_prober_unknown_end"
8) "2022-Jan-26 03:17:35.446580"

~$ redis-cli -n 6 HGETALL "LINK_PROBE_STATS|Ethernet44"
1) "pck_loss_count"
2) "0"
3) "pck_expected_count"
4) "6"
5) "link_prober_unknown_start"
6) "2022-Jan-26 03:13:05.366900"
7) "link_prober_unknown_end"
8) "2022-Jan-26 03:17:35.446580"
```
  • Loading branch information
zjswhhh authored Feb 1, 2022
1 parent 0c23756 commit bcd74b4
Show file tree
Hide file tree
Showing 18 changed files with 529 additions and 1 deletion.
131 changes: 131 additions & 0 deletions src/DbInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ constexpr auto DEFAULT_TIMEOUT_MSEC = 1000;
std::vector<std::string> DbInterface::mMuxState = {"active", "standby", "unknown", "Error"};
std::vector<std::string> DbInterface::mMuxLinkmgrState = {"uninitialized", "unhealthy", "healthy"};
std::vector<std::string> DbInterface::mMuxMetrics = {"start", "end"};
std::vector<std::string> DbInterface::mLinkProbeMetrics = {"link_prober_unknown_start", "link_prober_unknown_end"};

//
// ---> DbInterface(mux::MuxManager *muxManager);
Expand Down Expand Up @@ -161,6 +162,62 @@ void DbInterface::postMetricsEvent(
)));
}

//
// ---> postLinkProberMetricsEvent(
// const std::string &portName,
// link_manager::LinkManagerStateMachine::LinkProberMetrics metrics
// );
//
// post link probe pck loss event to state db
void DbInterface::postLinkProberMetricsEvent(
const std::string &portName,
link_manager::LinkManagerStateMachine::LinkProberMetrics metrics
)
{
MUXLOGWARNING(boost::format("%s: posting link prober pck loss event %s") %
portName %
mLinkProbeMetrics[static_cast<int> (metrics)]
);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
&DbInterface::handlePostLinkProberMetrics,
this,
portName,
metrics,
boost::posix_time::microsec_clock::universal_time()
)));
}

//
// ---> postPckLossRatio(
// const std::string &portName,
// const uint64_t unknownEventCount,
// const uint64_t expectedPacketCount
// );
// post pck loss ratio update to state db
void DbInterface::postPckLossRatio(
const std::string &portName,
const uint64_t unknownEventCount,
const uint64_t expectedPacketCount
)
{
MUXLOGDEBUG(boost::format("%s: posting pck loss ratio, pck_loss_count / pck_expected_count : %d / %d") %
portName %
unknownEventCount %
expectedPacketCount
);

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
&DbInterface::handlePostPckLossRatio,
this,
portName,
unknownEventCount,
expectedPacketCount
)));
}

//
// ---> initialize();
//
Expand All @@ -184,6 +241,9 @@ void DbInterface::initialize()
mStateDbMuxMetricsTablePtr = std::make_shared<swss::Table> (
mStateDbPtr.get(), STATE_MUX_METRICS_TABLE_NAME
);
mStateDbLinkProbeStatsTablePtr = std::make_shared<swss::Table> (
mStateDbPtr.get(), LINK_PROBE_STATS_TABLE_NAME
);
mMuxStateTablePtr = std::make_shared<swss::Table> (mStateDbPtr.get(), STATE_MUX_CABLE_TABLE_NAME);

mSwssThreadPtr = std::make_shared<boost::thread> (&DbInterface::handleSwssNotification, this);
Expand Down Expand Up @@ -319,6 +379,57 @@ void DbInterface::handlePostMuxMetrics(
);
}

//
// ---> handlePostLinkProberMetrics(
// const std::string portName,
// link_manager::LinkManagerStateMachine::LinkProberMetrics,
// boost::posix_time::ptime time
// );
//
// post link prober pck loss event to state db
void DbInterface::handlePostLinkProberMetrics(
const std::string portName,
link_manager::LinkManagerStateMachine::LinkProberMetrics metrics,
boost::posix_time::ptime time
)
{
MUXLOGWARNING(boost::format("%s: posting link prober pck loss event %s") %
portName %
mLinkProbeMetrics[static_cast<int> (metrics)]
);

if (metrics == link_manager::LinkManagerStateMachine::LinkProberMetrics::LinkProberUnknownStart) {
mStateDbLinkProbeStatsTablePtr->hdel(portName, mLinkProbeMetrics[0]);
mStateDbLinkProbeStatsTablePtr->hdel(portName, mLinkProbeMetrics[1]);
}

mStateDbLinkProbeStatsTablePtr->hset(portName, mLinkProbeMetrics[static_cast<int> (metrics)], boost::posix_time::to_simple_string(time));
}

//
// ---> handlePostPckLossRatio(
// const std::string portName,
// const uint64_t unknownEventCount,
// const uint64_t expectedPacketCount
// );
//
// handle post pck loss ratio
void DbInterface::handlePostPckLossRatio(
const std::string portName,
const uint64_t unknownEventCount,
const uint64_t expectedPacketCount
)
{
MUXLOGDEBUG(boost::format("%s: posting pck loss ratio, pck_loss_count / pck_expected_count : %d / %d") %
portName %
unknownEventCount %
expectedPacketCount
);

mStateDbLinkProbeStatsTablePtr->hset(portName, "pck_loss_count", std::to_string(unknownEventCount));
mStateDbLinkProbeStatsTablePtr->hset(portName, "pck_expected_count", std::to_string(expectedPacketCount));
}

//
// ---> processTorMacAddress(std::string& mac);
//
Expand Down Expand Up @@ -500,8 +611,28 @@ void DbInterface::processMuxPortConfigNotifiction(std::deque<swss::KeyOpFieldsVa
f %
v
);

mMuxManagerPtr->updateMuxPortConfig(port, v);
}

std::vector<swss::FieldValueTuple>::const_iterator c_it = std::find_if(
fieldValues.cbegin(),
fieldValues.cend(),
[] (const swss::FieldValueTuple &fv) {return fvField(fv) == "pck_loss_data_reset";}
);
if (c_it != fieldValues.cend()) {
const std::string f = c_it->first;
const std::string v = c_it->second;

MUXLOGDEBUG(boost::format("key: %s, Operation: %s, f: %s, v: %s") %
port %
operation %
f %
v
);

mMuxManagerPtr->resetPckLossCount(port);
}
}
}

Expand Down
72 changes: 72 additions & 0 deletions src/DbInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class MuxManagerTest;

namespace mux
{
#define LINK_PROBE_STATS_TABLE_NAME "LINK_PROBE_STATS"

class MuxManager;
using ServerIpPortMap = std::map<boost::asio::ip::address, std::string>;

Expand Down Expand Up @@ -170,6 +172,39 @@ class DbInterface
mux_state::MuxState::Label label
);

/**
* @method postLinkProberMetricsEvent
*
* @brief post link prober pck loss event
*
* @param portName (in) port name
* @param metrics (in) pck loss event name
*
* @return none
*
*/
virtual void postLinkProberMetricsEvent(
const std::string &portName,
link_manager::LinkManagerStateMachine::LinkProberMetrics metrics
);

/**
* @method postPckLossRatio
*
* @brief post pck loss ratio update to state db
*
* @param portName (in) port name
* @param unknownEventCount (in) count of missing icmp packets
* @param expectedPacketCount (in) count of expected icmp packets
*
* @return none
*/
virtual void postPckLossRatio(
const std::string &portName,
const uint64_t unknownEventCount,
const uint64_t expectedPacketCount
);

/**
*@method initialize
*
Expand Down Expand Up @@ -277,6 +312,40 @@ class DbInterface
boost::posix_time::ptime time
);

/**
* @method handlePostLinkProberMetrics
*
* @brief post link prober pck loss event to state db
*
* @param portName (in) port name
* @param metrics (in) metrics data
* @param time (in) event time stamp
*
* @return none
*/
void handlePostLinkProberMetrics(
const std::string portName,
link_manager::LinkManagerStateMachine::LinkProberMetrics metrics,
boost::posix_time::ptime time
);

/**
* @method handlePostPckLossRatio
*
* @brief handle post pck loss ratio update
*
* @param portName (in) port name
* @param unknownEventCount (in) count of missing icmp packets
* @param expectedPacketCount (in) count of expected icmp packets
*
* @return none
*/
void handlePostPckLossRatio(
const std::string portName,
const uint64_t unknownEventCount,
const uint64_t expectedPacketCount
);

/**
*@method processTorMacAddress
*
Expand Down Expand Up @@ -488,6 +557,7 @@ class DbInterface
static std::vector<std::string> mMuxState;
static std::vector<std::string> mMuxLinkmgrState;
static std::vector<std::string> mMuxMetrics;
static std::vector<std::string> mLinkProbeMetrics;

private:
mux::MuxManager *mMuxManagerPtr;
Expand All @@ -505,6 +575,8 @@ class DbInterface
std::shared_ptr<swss::Table> mStateDbMuxLinkmgrTablePtr;
// for writing mux metrics
std::shared_ptr<swss::Table> mStateDbMuxMetricsTablePtr;
// for writing link probe statistics data
std::shared_ptr<swss::Table> mStateDbLinkProbeStatsTablePtr;

std::shared_ptr<boost::thread> mSwssThreadPtr;

Expand Down
15 changes: 15 additions & 0 deletions src/MuxManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,21 @@ void MuxManager::updateMuxPortConfig(const std::string &portName, const std::str
}
}

//
// ---> resetPckLossCount(const std::string &portName);
//
// reset ICMP packet loss count
//
void MuxManager::resetPckLossCount(const std::string &portName)
{
MUXLOGWARNING(boost::format("%s: reset ICMP packet loss count ") % portName);

PortMapIterator portMapIterator = mPortMap.find(portName);
if(portMapIterator != mPortMap.end()) {
portMapIterator->second->resetPckLossCount();
}
}

//
// ---> addOrUpdateMuxPortLinkState(const std::string &portName, const std::string &linkState);
//
Expand Down
11 changes: 11 additions & 0 deletions src/MuxManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,17 @@ class MuxManager
*/
void updateMuxPortConfig(const std::string &portName, const std::string &linkState);

/**
* @method resetPckLossCount
*
* @brief reset ICMP packet loss count.
*
* @param portName (in) Mux port name
*
* @return none
*/
void resetPckLossCount(const std::string &portName);

/**
*@method addOrUpdateMuxPortLinkState
*
Expand Down
17 changes: 17 additions & 0 deletions src/MuxPort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,4 +244,21 @@ void MuxPort::handleDefaultRouteState(const std::string &routeState)
routeState
)));
}

//
// ---> resetPckLossCount();
//
// reset ICMP packet loss count
//
void MuxPort::resetPckLossCount()
{
MUXLOGDEBUG(boost::format("port: %s, reset ICMP packet loss counts ") % mMuxPortConfig.getPortName());

boost::asio::io_service &ioService = mStrand.context();
ioService.post(mStrand.wrap(boost::bind(
&link_manager::LinkManagerStateMachine::handleResetLinkProberPckLossCount,
&mLinkManagerStateMachine
)));
}

} /* namespace mux */
36 changes: 36 additions & 0 deletions src/MuxPort.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,33 @@ class MuxPort: public std::enable_shared_from_this<MuxPort>
mDbInterfacePtr->postMetricsEvent(mMuxPortConfig.getPortName(), metrics, label);
};

/**
* @method postLinkProberMetricsEvent
*
* @brief post link prober pck loss event
*
* @param metrics (in) metrics to post
*
* @return none
*/
inline void postLinkProberMetricsEvent(link_manager::LinkManagerStateMachine::LinkProberMetrics metrics) {
mDbInterfacePtr->postLinkProberMetricsEvent(mMuxPortConfig.getPortName(), metrics);
};

/**
* @method postPckLossRatio
*
* @brief post pck loss ratio update to state db
*
* @param unknownEventCount (in) count of missing icmp packets
* @param expectedPacketCount (in) count of expected icmp packets
*
* @return none
*/
inline void postPckLossRatio(const uint64_t unknownEventCount, const uint64_t expectedPacketCount) {
mDbInterfacePtr->postPckLossRatio(mMuxPortConfig.getPortName(), unknownEventCount, expectedPacketCount);
};

/**
*@method setServerIpv4Address
*
Expand Down Expand Up @@ -263,6 +290,15 @@ class MuxPort: public std::enable_shared_from_this<MuxPort>
*/
void handleDefaultRouteState(const std::string &routeState);

/**
* @method resetPckLossCount
*
* @brief reset ICMP packet loss count
*
* @return none
*/
void resetPckLossCount();

protected:
friend class test::MuxManagerTest;
friend class test::FakeMuxPort;
Expand Down
Loading

0 comments on commit bcd74b4

Please sign in to comment.