Skip to content

Commit

Permalink
feat: add CRC error monitoring to net_monitor
Browse files Browse the repository at this point in the history
Signed-off-by: noriyuki.h <n-hamaike@esol.co.jp>
Signed-off-by: v-nakayama7440-esol <v-nakayama7440@esol.co.jp>
  • Loading branch information
n-hamaike-esol authored and v-nakayama7440-esol committed Apr 6, 2022
1 parent b1e10da commit bf82519
Show file tree
Hide file tree
Showing 7 changed files with 327 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
devices: ["*"]
traffic_reader_port: 7636
monitor_program: "greengrass"
crc_error_check_duration: 1
crc_error_count_threshold: 1
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@
contains: [": Network Traffic"]
timeout: 3.0

netowork_crc_error:
type: diagnostic_aggregator/GenericAnalyzer
path: network_crc_error
contains: [": Network CRC Error"]
timeout: 3.0

storage:
type: diagnostic_aggregator/AnalyzerGroup
path: storage
Expand Down
2 changes: 2 additions & 0 deletions system/system_monitor/config/net_monitor.param.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
devices: ["*"]
traffic_reader_port: 7636
monitor_program: "greengrass"
crc_error_check_duration: 1
crc_error_count_threshold: 1
10 changes: 6 additions & 4 deletions system/system_monitor/docs/ros_parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@ mem_monitor:

net_monitor:

| Name | Type | Unit | Default | Notes |
| :--------- | :----------: | :-----: | :-----: | :----------------------------------------------------------------------------------- |
| devices | list[string] | n/a | none | The name of network interface to monitor. (e.g. eth0, \* for all network interfaces) |
| usage_warn | float | %(1e-2) | 0.95 | Generates warning when network usage reaches a specified value or higher. |
| Name | Type | Unit | Default | Notes |
| :------------------------ | :----------: | :-----: | :-----: | :-------------------------------------------------------------------------------------------------------------- |
| devices | list[string] | n/a | none | The name of network interface to monitor. (e.g. eth0, \* for all network interfaces) |
| usage_warn | float | %(1e-2) | 0.95 | Generates warning when network usage reaches a specified value or higher. |
| crc_error_check_duration | int | sec | 1 | CRC error check duration. |
| crc_error_count_threshold | int | n/a | 1 | Generates warning when count of CRC errors during CRC error check duration reaches a specified value or higher. |

## <u>NTP Monitor</u>

Expand Down
18 changes: 18 additions & 0 deletions system/system_monitor/docs/topics_net_monitor.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,21 @@
| key | value (example) |
| ----- | ----------------------------------------------------- |
| error | [nethogs -t] execve failed: No such file or directory |

## <u>Network CRC Error</u>

/diagnostics/net_monitor: Network CRC Error

<b>[summary]</b>

| level | message |
| ----- | ------- |
| OK | OK |

<b>[values]</b>

| key | value (example) |
| ------------------------------------------ | --------------- |
| Network [0-9]: interface name | wlp82s0 |
| Network [0-9]: total rx_crc_errors | 0 |
| Network [0-9]: rx_crc_errors per unit time | 0 |
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <diagnostic_updater/diagnostic_updater.hpp>

#include <climits>
#include <deque>
#include <map>
#include <string>
#include <vector>
Expand Down Expand Up @@ -55,11 +56,6 @@ class NetMonitor : public rclcpp::Node
*/
~NetMonitor();

/**
* @brief Update the diagnostic state.
*/
void update();

/**
* @brief Shutdown nl80211 object
*/
Expand All @@ -86,24 +82,120 @@ class NetMonitor : public rclcpp::Node
void monitorTraffic(
diagnostic_updater::DiagnosticStatusWrapper & stat); // NOLINT(runtime/references)

/**
* @brief check CRC error
* @param [out] stat diagnostic message passed directly to diagnostic publish calls
* @note NOLINT syntax is needed since diagnostic_updater asks for a non-const reference
* to pass diagnostic message updated in this function to diagnostic publish calls.
*/
void checkCrcError(
diagnostic_updater::DiagnosticStatusWrapper & stat); // NOLINT(runtime/references)

/**
* @brief get wireless speed
* @param [in] ifa_name interface name
* @return wireless speed
*/
float getWirelessSpeed(const char * ifa_name);

/**
* @brief timer callback
*/
void onTimer();

/**
* @brief update Network information list
*/
void updateNetworkInfoList();

/**
* @brief check NetMonitor General Infomation
* @param [out] stat diagnostic message passed directly to diagnostic publish calls
* @return check result
*/
bool checkGeneralInfo(diagnostic_updater::DiagnosticStatusWrapper & stat);

/**
* @brief Network information
*/
struct NetworkInfo
{
int mtu_errno; //!< @brief errno set by ioctl() with SIOCGIFMTU
int ethtool_errno; //!< @brief errno set by ioctl() with SIOCETHTOOL
bool is_running; //!< @brief resource allocated flag
std::string interface_name; //!< @brief interface name
float speed; //!< @brief network capacity
int mtu; //!< @brief MTU
float rx_traffic; //!< @brief traffic received
float tx_traffic; //!< @brief traffic transmitted
float rx_usage; //!< @brief network capacity usage rate received
float tx_usage; //!< @brief network capacity usage rate transmitted
unsigned int rx_bytes; //!< @brief total bytes received
unsigned int rx_errors; //!< @brief bad packets received
unsigned int tx_bytes; //!< @brief total bytes transmitted
unsigned int tx_errors; //!< @brief packet transmit problems
unsigned int collisions; //!< @brief number of collisions during packet transmissions

NetworkInfo()
: mtu_errno(0),
ethtool_errno(0),
is_running(false),
interface_name(""),
speed(0.0),
mtu(0),
rx_traffic(0.0),
tx_traffic(0.0),
rx_usage(0.0),
tx_usage(0.0),
rx_bytes(0),
rx_errors(0),
tx_bytes(0),
tx_errors(0),
collisions(0)
{
}
};

/**
* @brief determine if it is a supported network
* @param [in] net_info network infomation
* @param [in] index index of network infomation index
* @param [out] stat diagnostic message passed directly to diagnostic publish calls
* @param [out] error_str error string
* @return result of determining whether it is a supported network
*/
bool isSupportedNetwork(
const NetworkInfo & net_info, int index, diagnostic_updater::DiagnosticStatusWrapper & stat,
std::string & error_str);

diagnostic_updater::Updater updater_; //!< @brief Updater class which advertises to /diagnostics
rclcpp::TimerBase::SharedPtr timer_; //!< @brief timer to get Network information

char hostname_[HOST_NAME_MAX + 1]; //!< @brief host name
std::map<std::string, bytes> bytes_; //!< @brief list of bytes
rclcpp::Time last_update_time_; //!< @brief last update time
std::vector<std::string> device_params_; //!< @brief list of devices
NL80211 nl80211_; // !< @brief 802.11 netlink-based interface
NL80211 nl80211_; //!< @brief 802.11 netlink-based interface
int getifaddrs_errno_; //!< @brief errno set by getifaddrs()
std::vector<NetworkInfo> net_info_list_; //!< @brief list of Network information

std::string monitor_program_; //!< @brief nethogs monitor program name
bool nethogs_all_; //!< @brief nethogs result all mode
int traffic_reader_port_; //!< @brief port number to connect to traffic_reader
/**
* @brief CRC errors information
*/
typedef struct crc_errors
{
std::deque<unsigned int> errors_queue; //!< @brief queue that holds count of CRC errors
unsigned int last_rx_crc_errors; //!< @brief rx_crc_error at the time of the last monitoring

crc_errors() : last_rx_crc_errors(0) {}
} crc_errors;
std::map<std::string, crc_errors> crc_errors_; //!< @brief list of CRC errors

std::string monitor_program_; //!< @brief nethogs monitor program name
bool nethogs_all_; //!< @brief nethogs result all mode
int traffic_reader_port_; //!< @brief port number to connect to traffic_reader
unsigned int crc_error_check_duration_; //!< @brief CRC error check duration
unsigned int crc_error_count_threshold_; //!< @brief CRC error count threshold

/**
* @brief Network usage status messages
Expand Down
Loading

0 comments on commit bf82519

Please sign in to comment.