Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[synchronous mode] Add failure notification for SAI failures in synchronous mode #1596

Merged
merged 8 commits into from
Feb 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions cfgmgr/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ INCLUDES = -I$(top_srcdir)/lib -I $(top_srcdir) -I $(top_srcdir)/orchagent -I $(
CFLAGS_SAI = -I /usr/include/sai
LIBNL_CFLAGS = -I/usr/include/libnl3
LIBNL_LIBS = -lnl-genl-3 -lnl-route-3 -lnl-3
SAIMETA_LIBS = -lsaimeta -lsaimetadata

bin_PROGRAMS = vlanmgrd teammgrd portmgrd intfmgrd buffermgrd vrfmgrd nbrmgrd vxlanmgrd sflowmgrd natmgrd coppmgrd tunnelmgrd macsecmgrd

Expand All @@ -24,64 +25,64 @@ endif
vlanmgrd_SOURCES = vlanmgrd.cpp vlanmgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
vlanmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
vlanmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
vlanmgrd_LDADD = -lswsscommon
vlanmgrd_LDADD = -lswsscommon $(SAIMETA_LIBS)

teammgrd_SOURCES = teammgrd.cpp teammgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
teammgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
teammgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
teammgrd_LDADD = -lswsscommon
teammgrd_LDADD = -lswsscommon $(SAIMETA_LIBS)

portmgrd_SOURCES = portmgrd.cpp portmgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
portmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
portmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
portmgrd_LDADD = -lswsscommon
portmgrd_LDADD = -lswsscommon $(SAIMETA_LIBS)

intfmgrd_SOURCES = intfmgrd.cpp intfmgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
intfmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
intfmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
intfmgrd_LDADD = -lswsscommon
intfmgrd_LDADD = -lswsscommon $(SAIMETA_LIBS)

buffermgrd_SOURCES = buffermgrd.cpp buffermgr.cpp buffermgrdyn.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
buffermgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
buffermgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
buffermgrd_LDADD = -lswsscommon
buffermgrd_LDADD = -lswsscommon $(SAIMETA_LIBS)

vrfmgrd_SOURCES = vrfmgrd.cpp vrfmgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
vrfmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
vrfmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
vrfmgrd_LDADD = -lswsscommon
vrfmgrd_LDADD = -lswsscommon $(SAIMETA_LIBS)

nbrmgrd_SOURCES = nbrmgrd.cpp nbrmgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
nbrmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI) $(LIBNL_CFLAGS)
nbrmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI) $(LIBNL_CPPFLAGS)
nbrmgrd_LDADD = -lswsscommon $(LIBNL_LIBS)
nbrmgrd_LDADD = -lswsscommon $(SAIMETA_LIBS) $(LIBNL_LIBS)

vxlanmgrd_SOURCES = vxlanmgrd.cpp vxlanmgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
vxlanmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
vxlanmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
vxlanmgrd_LDADD = -lswsscommon
vxlanmgrd_LDADD = -lswsscommon $(SAIMETA_LIBS)

sflowmgrd_SOURCES = sflowmgrd.cpp sflowmgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
sflowmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
sflowmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
sflowmgrd_LDADD = -lswsscommon
sflowmgrd_LDADD = -lswsscommon $(SAIMETA_LIBS)

natmgrd_SOURCES = natmgrd.cpp natmgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
natmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
natmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
natmgrd_LDADD = -lswsscommon
natmgrd_LDADD = -lswsscommon $(SAIMETA_LIBS)

coppmgrd_SOURCES = coppmgrd.cpp coppmgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
coppmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
coppmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
coppmgrd_LDADD = -lswsscommon
coppmgrd_LDADD = -lswsscommon $(SAIMETA_LIBS)

tunnelmgrd_SOURCES = tunnelmgrd.cpp tunnelmgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
tunnelmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
tunnelmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
tunnelmgrd_LDADD = -lswsscommon
tunnelmgrd_LDADD = -lswsscommon $(SAIMETA_LIBS)

macsecmgrd_SOURCES = macsecmgrd.cpp macsecmgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
macsecmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
macsecmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
macsecmgrd_LDADD = -lswsscommon
macsecmgrd_LDADD = -lswsscommon $(SAIMETA_LIBS)
12 changes: 6 additions & 6 deletions orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ bool NeighOrch::addNextHop(const IpAddress &ipAddress, const string &alias)
{
SWSS_LOG_ERROR("Failed to create next hop %s on %s, rv:%d",
ipAddress.to_string().c_str(), alias.c_str(), status);
return false;
return handleSaiCreateStatus(SAI_API_NEXT_HOP, status);
}

SWSS_LOG_NOTICE("Created next hop %s on %s",
Expand Down Expand Up @@ -678,7 +678,7 @@ bool NeighOrch::addNeighbor(const NeighborEntry &neighborEntry, const MacAddress
{
SWSS_LOG_ERROR("Failed to create neighbor %s on %s, rv:%d",
macAddress.to_string().c_str(), alias.c_str(), status);
return false;
return handleSaiCreateStatus(SAI_API_NEIGHBOR, status);
}
}
SWSS_LOG_NOTICE("Created neighbor ip %s, %s on %s", ip_address.to_string().c_str(),
Expand All @@ -701,7 +701,7 @@ bool NeighOrch::addNeighbor(const NeighborEntry &neighborEntry, const MacAddress
{
SWSS_LOG_ERROR("Failed to remove neighbor %s on %s, rv:%d",
macAddress.to_string().c_str(), alias.c_str(), status);
return false;
return handleSaiRemoveStatus(SAI_API_NEIGHBOR, status);
}
m_intfsOrch->decreaseRouterIntfsRefCount(alias);

Expand All @@ -725,7 +725,7 @@ bool NeighOrch::addNeighbor(const NeighborEntry &neighborEntry, const MacAddress
{
SWSS_LOG_ERROR("Failed to update neighbor %s on %s, rv:%d",
macAddress.to_string().c_str(), alias.c_str(), status);
return false;
return handleSaiSetStatus(SAI_API_NEIGHBOR, status);
}
SWSS_LOG_NOTICE("Updated neighbor %s on %s", macAddress.to_string().c_str(), alias.c_str());
}
Expand Down Expand Up @@ -798,7 +798,7 @@ bool NeighOrch::removeNeighbor(const NeighborEntry &neighborEntry, bool disable)
{
SWSS_LOG_ERROR("Failed to remove next hop %s on %s, rv:%d",
ip_address.to_string().c_str(), alias.c_str(), status);
return false;
return handleSaiRemoveStatus(SAI_API_NEXT_HOP, status);
}
}

Expand Down Expand Up @@ -830,7 +830,7 @@ bool NeighOrch::removeNeighbor(const NeighborEntry &neighborEntry, bool disable)
{
SWSS_LOG_ERROR("Failed to remove neighbor %s on %s, rv:%d",
m_syncdNeighbors[neighborEntry].mac.to_string().c_str(), alias.c_str(), status);
return false;
return handleSaiRemoveStatus(SAI_API_NEIGHBOR, status);
}
}

Expand Down
77 changes: 77 additions & 0 deletions orchagent/orch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "tokenize.h"
#include "logger.h"
#include "consumerstatetable.h"
#include "sai_serialize.h"

using namespace swss;

Expand Down Expand Up @@ -685,6 +686,82 @@ Executor *Orch::getExecutor(string executorName)
return NULL;
}

bool Orch::handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context)
{
/*
* This function aims to provide coarse handling of failures in sairedis create
* operation (i.e., notify users by throwing excepions when failures happen).
* Return value: true - Handled the status successfully. No need to retry this SAI operation.
* false - Cannot handle the status. Need to retry the SAI operation.
* TODO: 1. Add general handling logic for specific statuses (e.g., SAI_STATUS_ITEM_ALREADY_EXISTS)
* 2. Develop fine-grain failure handling mechanisms and replace this coarse handling
* in each orch.
* 3. Take the type of sai api into consideration.
*/
switch (status)
{
case SAI_STATUS_SUCCESS:
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus");
return true;
default:
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
exit(EXIT_FAILURE);
}
return false;
shi-su marked this conversation as resolved.
Show resolved Hide resolved
}

bool Orch::handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context)
{
/*
* This function aims to provide coarse handling of failures in sairedis set
* operation (i.e., notify users by throwing excepions when failures happen).
* Return value: true - Handled the status successfully. No need to retry this SAI operation.
* false - Cannot handle the status. Need to retry the SAI operation.
* TODO: 1. Add general handling logic for specific statuses
* 2. Develop fine-grain failure handling mechanisms and replace this coarse handling
* in each orch.
* 3. Take the type of sai api into consideration.
*/
switch (status)
{
case SAI_STATUS_SUCCESS:
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiSetStatus");
return true;
default:
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
exit(EXIT_FAILURE);
}
return false;
}

bool Orch::handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context)
{
/*
* This function aims to provide coarse handling of failures in sairedis remove
* operation (i.e., notify users by throwing excepions when failures happen).
* Return value: true - Handled the status successfully. No need to retry this SAI operation.
* false - Cannot handle the status. Need to retry the SAI operation.
* TODO: 1. Add general handling logic for specific statuses (e.g., SAI_STATUS_OBJECT_IN_USE,
* SAI_STATUS_ITEM_NOT_FOUND)
* 2. Develop fine-grain failure handling mechanisms and replace this coarse handling
* in each orch.
* 3. Take the type of sai api into consideration.
*/
switch (status)
{
case SAI_STATUS_SUCCESS:
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus");
return true;
default:
SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
exit(EXIT_FAILURE);
}
return false;
}

void Orch2::doTask(Consumer &consumer)
{
SWSS_LOG_ENTER();
Expand Down
5 changes: 5 additions & 0 deletions orchagent/orch.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,11 @@ class Orch
/* Note: consumer will be owned by this class */
void addExecutor(Executor* executor);
Executor *getExecutor(std::string executorName);

/* Handling SAI status*/
virtual bool handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual bool handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual bool handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
private:
void removeMeFromObjsReferencedByMe(type_map &type_maps, const std::string &table, const std::string &obj_name, const std::string &field, const std::string &old_referenced_obj_name);
void addConsumer(swss::DBConnector *db, std::string tableName, int pri = default_orch_pri);
Expand Down
29 changes: 15 additions & 14 deletions orchagent/routeorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ bool RouteOrch::validnexthopinNextHopGroup(const NextHopKey &nexthop, uint32_t&
{
SWSS_LOG_ERROR("Failed to add next hop member to group %" PRIx64 ": %d\n",
nhopgroup->second.next_hop_group_id, status);
return false;
return handleSaiCreateStatus(SAI_API_NEXT_HOP_GROUP, status);
}

++count;
Expand Down Expand Up @@ -371,7 +371,7 @@ bool RouteOrch::invalidnexthopinNextHopGroup(const NextHopKey &nexthop, uint32_t
{
SWSS_LOG_ERROR("Failed to remove next hop member %" PRIx64 " from group %" PRIx64 ": %d\n",
nexthop_id, nhopgroup->second.next_hop_group_id, status);
return false;
return handleSaiRemoveStatus(SAI_API_NEXT_HOP_GROUP, status);
}

++count;
Expand Down Expand Up @@ -950,7 +950,7 @@ bool RouteOrch::createFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to create next hop group rv:%d", status);
return false;
return handleSaiCreateStatus(SAI_API_NEXT_HOP_GROUP, status);
}

gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP);
Expand All @@ -968,7 +968,7 @@ bool RouteOrch::removeFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id
{
SWSS_LOG_ERROR("Failed to remove next hop group %" PRIx64 ", rv:%d",
next_hop_group_id, status);
return false;
return handleSaiRemoveStatus(SAI_API_NEXT_HOP_GROUP, status);
}

gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP);
Expand Down Expand Up @@ -1033,7 +1033,7 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops)
{
SWSS_LOG_ERROR("Failed to create next hop group %s, rv:%d",
nexthops.to_string().c_str(), status);
return false;
return handleSaiCreateStatus(SAI_API_NEXT_HOP_GROUP, status);
}

m_nextHopGroupCount ++;
Expand Down Expand Up @@ -1150,7 +1150,7 @@ bool RouteOrch::removeNextHopGroup(const NextHopGroupKey &nexthops)
{
SWSS_LOG_ERROR("Failed to remove next hop group member[%zu] %" PRIx64 ", rv:%d",
i, next_hop_ids[i], statuses[i]);
return false;
return handleSaiRemoveStatus(SAI_API_NEXT_HOP_GROUP, statuses[i]);
}

gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER);
Expand All @@ -1160,7 +1160,7 @@ bool RouteOrch::removeNextHopGroup(const NextHopGroupKey &nexthops)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to remove next hop group %" PRIx64 ", rv:%d", next_hop_group_id, status);
return false;
return handleSaiRemoveStatus(SAI_API_NEXT_HOP_GROUP, status);
}

m_nextHopGroupCount --;
Expand Down Expand Up @@ -1626,7 +1626,8 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey
}
else if (it_route == m_syncdRoutes.at(vrf_id).end())
{
if (*it_status++ != SAI_STATUS_SUCCESS)
sai_status_t status = *it_status++;
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to create route %s with next hop(s) %s",
ipPrefix.to_string().c_str(), nextHops.to_string().c_str());
Expand All @@ -1635,7 +1636,7 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey
{
removeNextHopGroup(nextHops);
}
return false;
return handleSaiCreateStatus(SAI_API_ROUTE, status);
}

if (ipPrefix.isV4())
Expand Down Expand Up @@ -1665,7 +1666,7 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey
{
SWSS_LOG_ERROR("Failed to set route %s with packet action forward, %d",
ipPrefix.to_string().c_str(), status);
return false;
return handleSaiSetStatus(SAI_API_ROUTE, status);
}
}

Expand All @@ -1674,7 +1675,7 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey
{
SWSS_LOG_ERROR("Failed to set route %s with next hop(s) %s",
ipPrefix.to_string().c_str(), nextHops.to_string().c_str());
return false;
return handleSaiSetStatus(SAI_API_ROUTE, status);
}

/* Increase the ref_count for the next hop (group) entry */
Expand Down Expand Up @@ -1792,7 +1793,7 @@ bool RouteOrch::removeRoutePost(const RouteBulkContext& ctx)
{
SWSS_LOG_ERROR("Failed to set route %s packet action to drop, rv:%d",
ipPrefix.to_string().c_str(), status);
return false;
return handleSaiSetStatus(SAI_API_ROUTE, status);
}

SWSS_LOG_INFO("Set route %s packet action to drop", ipPrefix.to_string().c_str());
Expand All @@ -1802,7 +1803,7 @@ bool RouteOrch::removeRoutePost(const RouteBulkContext& ctx)
{
SWSS_LOG_ERROR("Failed to set route %s next hop ID to NULL, rv:%d",
ipPrefix.to_string().c_str(), status);
return false;
return handleSaiSetStatus(SAI_API_ROUTE, status);
}

SWSS_LOG_INFO("Set route %s next hop ID to NULL", ipPrefix.to_string().c_str());
Expand All @@ -1813,7 +1814,7 @@ bool RouteOrch::removeRoutePost(const RouteBulkContext& ctx)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to remove route prefix:%s\n", ipPrefix.to_string().c_str());
return false;
return handleSaiRemoveStatus(SAI_API_ROUTE, status);
}

if (ipPrefix.isV4())
Expand Down