diff --git a/orchagent/bulker.h b/orchagent/bulker.h new file mode 100644 index 0000000000..e3e464c62d --- /dev/null +++ b/orchagent/bulker.h @@ -0,0 +1,701 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "sai.h" +#include "logger.h" + +static inline bool operator==(const sai_ip_prefix_t& a, const sai_ip_prefix_t& b) +{ + if (a.addr_family != b.addr_family) return false; + + if (a.addr_family == SAI_IP_ADDR_FAMILY_IPV4) + { + return a.addr.ip4 == b.addr.ip4 + && a.mask.ip4 == b.mask.ip4 + ; + } + else if (a.addr_family == SAI_IP_ADDR_FAMILY_IPV6) + { + return memcmp(a.addr.ip6, b.addr.ip6, sizeof(a.addr.ip6)) == 0 + && memcmp(a.mask.ip6, b.mask.ip6, sizeof(a.mask.ip6)) == 0 + ; + } + else + { + throw std::invalid_argument("a has invalid addr_family"); + } +} + +static inline bool operator==(const sai_route_entry_t& a, const sai_route_entry_t& b) +{ + return a.switch_id == b.switch_id + && a.vr_id == b.vr_id + && a.destination == b.destination + ; +} + +static inline std::size_t hash_value(const sai_ip_prefix_t& a) +{ + size_t seed = 0; + boost::hash_combine(seed, a.addr_family); + if (a.addr_family == SAI_IP_ADDR_FAMILY_IPV4) + { + boost::hash_combine(seed, a.addr.ip4); + boost::hash_combine(seed, a.mask.ip4); + } + else if (a.addr_family == SAI_IP_ADDR_FAMILY_IPV6) + { + boost::hash_combine(seed, a.addr.ip6); + boost::hash_combine(seed, a.mask.ip6); + } + return seed; +} + +namespace std +{ + template <> + struct hash + { + size_t operator()(const sai_route_entry_t& a) const noexcept + { + size_t seed = 0; + boost::hash_combine(seed, a.switch_id); + boost::hash_combine(seed, a.vr_id); + boost::hash_combine(seed, a.destination); + return seed; + } + }; + + template <> + struct hash + { + size_t operator()(const sai_fdb_entry_t& a) const noexcept + { + size_t seed = 0; + boost::hash_combine(seed, a.switch_id); + boost::hash_combine(seed, a.mac_address); + boost::hash_combine(seed, a.bv_id); + return seed; + } + }; +} + +// SAI typedef which is not available in SAI 1.5 +// TODO: remove after available +typedef sai_status_t (*sai_bulk_create_fdb_entry_fn)( + _In_ uint32_t object_count, + _In_ const sai_fdb_entry_t *fdb_entry, + _In_ const uint32_t *attr_count, + _In_ const sai_attribute_t **attr_list, + _In_ sai_bulk_op_error_mode_t mode, + _Out_ sai_status_t *object_statuses); +typedef sai_status_t (*sai_bulk_remove_fdb_entry_fn)( + _In_ uint32_t object_count, + _In_ const sai_fdb_entry_t *fdb_entry, + _In_ sai_bulk_op_error_mode_t mode, + _Out_ sai_status_t *object_statuses); +typedef sai_status_t (*sai_bulk_set_fdb_entry_attribute_fn)( + _In_ uint32_t object_count, + _In_ const sai_fdb_entry_t *fdb_entry, + _In_ const sai_attribute_t *attr_list, + _In_ sai_bulk_op_error_mode_t mode, + _Out_ sai_status_t *object_statuses); + +template +struct SaiBulkerTraits { }; + +template<> +struct SaiBulkerTraits +{ + using entry_t = sai_route_entry_t; + using api_t = sai_route_api_t; + using create_entry_fn = sai_create_route_entry_fn; + using remove_entry_fn = sai_remove_route_entry_fn; + using set_entry_attribute_fn = sai_set_route_entry_attribute_fn; + using bulk_create_entry_fn = sai_bulk_create_route_entry_fn; + using bulk_remove_entry_fn = sai_bulk_remove_route_entry_fn; + using bulk_set_entry_attribute_fn = sai_bulk_set_route_entry_attribute_fn; +}; + +template<> +struct SaiBulkerTraits +{ + using entry_t = sai_fdb_entry_t; + using api_t = sai_fdb_api_t; + using create_entry_fn = sai_create_fdb_entry_fn; + using remove_entry_fn = sai_remove_fdb_entry_fn; + using set_entry_attribute_fn = sai_set_fdb_entry_attribute_fn; + using bulk_create_entry_fn = sai_bulk_create_fdb_entry_fn; + using bulk_remove_entry_fn = sai_bulk_remove_fdb_entry_fn; + using bulk_set_entry_attribute_fn = sai_bulk_set_fdb_entry_attribute_fn; +}; + +template<> +struct SaiBulkerTraits +{ + using entry_t = sai_object_id_t; + using api_t = sai_next_hop_group_api_t; + using create_entry_fn = sai_create_next_hop_group_member_fn; + using remove_entry_fn = sai_remove_next_hop_group_member_fn; + using set_entry_attribute_fn = sai_set_next_hop_group_member_attribute_fn; + using bulk_create_entry_fn = sai_bulk_object_create_fn; + using bulk_remove_entry_fn = sai_bulk_object_remove_fn; + // TODO: wait until available in SAI + //using bulk_set_entry_attribute_fn = sai_bulk_object_set_attribute_fn; +}; + +template +class EntityBulker +{ +public: + using Ts = SaiBulkerTraits; + using Te = typename Ts::entry_t; + + EntityBulker(typename Ts::api_t *api) + { + throw std::logic_error("Not implemented"); + } + + sai_status_t create_entry( + _Out_ sai_status_t *object_status, + _In_ const Te *entry, + _In_ uint32_t attr_count, + _In_ const sai_attribute_t *attr_list) + { + assert(object_status); + if (!object_status) throw std::invalid_argument("object_status is null"); + assert(entry); + if (!entry) throw std::invalid_argument("entry is null"); + assert(attr_list); + if (!attr_list) throw std::invalid_argument("attr_list is null"); + + auto rc = creating_entries.emplace(std::piecewise_construct, + std::forward_as_tuple(*entry), + std::forward_as_tuple()); + auto it = rc.first; + bool inserted = rc.second; + if (!inserted) + { + SWSS_LOG_INFO("EntityBulker.create_entry not inserted %zu\n", creating_entries.size()); + *object_status = SAI_STATUS_ITEM_ALREADY_EXISTS; + return *object_status; + } + + auto& attrs = it->second.first; + attrs.insert(attrs.end(), attr_list, attr_list + attr_count); + it->second.second = object_status; + SWSS_LOG_INFO("EntityBulker.create_entry %zu, %zu, %d, %d\n", creating_entries.size(), it->second.first.size(), (int)it->second.first[0].id, inserted); + *object_status = SAI_STATUS_NOT_EXECUTED; + return *object_status; + } + + sai_status_t remove_entry( + _Out_ sai_status_t *object_status, + _In_ const Te *entry) + { + assert(object_status); + if (!object_status) throw std::invalid_argument("object_status is null"); + assert(entry); + if (!entry) throw std::invalid_argument("entry is null"); + + auto found_setting = setting_entries.find(*entry); + if (found_setting != setting_entries.end()) + { + // Mark old one as done + auto& attrmap = found_setting->second; + for (auto& attr: attrmap) + { + *attr.second.second = SAI_STATUS_SUCCESS; + } + // Erase old one + setting_entries.erase(found_setting); + } + + auto found_creating = creating_entries.find(*entry); + if (found_creating != creating_entries.end()) + { + // Mark old ones as done + *found_creating->second.second = SAI_STATUS_SUCCESS; + // Erase old one + creating_entries.erase(found_creating); + // No need to keep in bulker, claim success immediately + *object_status = SAI_STATUS_SUCCESS; + SWSS_LOG_INFO("EntityBulker.remove_entry quickly removed %zu, creating_entries.size=%zu\n", removing_entries.size(), creating_entries.size()); + return *object_status; + } + auto rc = removing_entries.emplace(std::piecewise_construct, + std::forward_as_tuple(*entry), + std::forward_as_tuple(object_status)); + bool inserted = rc.second; + SWSS_LOG_INFO("EntityBulker.remove_entry %zu, %d\n", removing_entries.size(), inserted); + + *object_status = SAI_STATUS_NOT_EXECUTED; + return *object_status; + } + + void set_entry_attribute( + _Out_ sai_status_t *object_status, + _In_ const Te *entry, + _In_ const sai_attribute_t *attr) + { + assert(object_status); + if (!object_status) throw std::invalid_argument("object_status is null"); + assert(entry); + if (!entry) throw std::invalid_argument("entry is null"); + assert(attr); + if (!attr) throw std::invalid_argument("attr is null"); + + // Insert or find the key (entry) + auto& attrmap = setting_entries.emplace(std::piecewise_construct, + std::forward_as_tuple(*entry), + std::forward_as_tuple() + ).first->second; + + // Insert or find the key (attr) + auto rc = attrmap.emplace(std::piecewise_construct, + std::forward_as_tuple(attr->id), + std::forward_as_tuple()); + bool inserted = rc.second; + auto it = rc.first; + + // If inserted new key, assign the attr + // If found existing key, overwrite the old attr + it->second.first = *attr; + if (!inserted) + { + // If found existing key, mark old status as success + *it->second.second = SAI_STATUS_SUCCESS; + } + it->second.second = object_status; + *object_status = SAI_STATUS_NOT_EXECUTED; + } + + void flush() + { + // Removing + if (!removing_entries.empty()) + { + std::vector rs; + + for (auto& i: removing_entries) + { + auto const& entry = i.first; + sai_status_t *object_status = i.second; + if (*object_status == SAI_STATUS_NOT_EXECUTED) + { + rs.push_back(entry); + } + } + size_t count = rs.size(); + std::vector statuses(count); + (*remove_entries)((uint32_t)count, rs.data(), SAI_BULK_OP_ERROR_MODE_IGNORE_ERROR, statuses.data()); + SWSS_LOG_INFO("EntityBulker.flush removing_entries %zu\n", removing_entries.size()); + + for (size_t ir = 0; ir < count; ir++) + { + auto& entry = rs[ir]; + sai_status_t *object_status = removing_entries[entry]; + if (object_status) + { + *object_status = statuses[ir]; + } + } + removing_entries.clear(); + } + + // Creating + if (!creating_entries.empty()) + { + std::vector rs; + std::vector tss; + std::vector cs; + + for (auto const& i: creating_entries) + { + auto const& entry = i.first; + auto const& attrs = i.second.first; + sai_status_t *object_status = i.second.second; + if (*object_status == SAI_STATUS_NOT_EXECUTED) + { + rs.push_back(entry); + tss.push_back(attrs.data()); + cs.push_back((uint32_t)attrs.size()); + } + } + size_t count = rs.size(); + std::vector statuses(count); + (*create_entries)((uint32_t)count, rs.data(), cs.data(), tss.data() + , SAI_BULK_OP_ERROR_MODE_IGNORE_ERROR, statuses.data()); + SWSS_LOG_INFO("EntityBulker.flush creating_entries %zu\n", creating_entries.size()); + + for (size_t ir = 0; ir < count; ir++) + { + auto& entry = rs[ir]; + sai_status_t *object_status = creating_entries[entry].second; + if (object_status) + { + *object_status = statuses[ir]; + } + } + creating_entries.clear(); + } + + // Setting + if (!setting_entries.empty()) + { + std::vector rs; + std::vector ts; + + for (auto const& i: setting_entries) + { + auto const& entry = i.first; + auto const& attrmap = i.second; + for (auto const& ia: attrmap) + { + auto const& attr = ia.second.first; + sai_status_t *object_status = ia.second.second; + if (*object_status == SAI_STATUS_NOT_EXECUTED) + { + rs.push_back(entry); + ts.push_back(attr); + } + } + } + size_t count = rs.size(); + std::vector statuses(count); + (*set_entries_attribute)((uint32_t)count, rs.data(), ts.data() + , SAI_BULK_OP_ERROR_MODE_IGNORE_ERROR, statuses.data()); + SWSS_LOG_INFO("EntityBulker.flush setting_entries %zu, count %zu\n", setting_entries.size(), count); + + for (size_t ir = 0; ir < count; ir++) + { + auto& entry = rs[ir]; + auto& attr_id = ts[ir].id; + sai_status_t *object_status = setting_entries[entry][attr_id].second; + if (object_status) + { + SWSS_LOG_INFO("EntityBulker.flush setting_entries status[%zu]=%d(0x%8p)\n", ir, statuses[ir], object_status); + *object_status = statuses[ir]; + } + } + setting_entries.clear(); + } + } + + void clear() + { + removing_entries.clear(); + creating_entries.clear(); + setting_entries.clear(); + } + + size_t creating_entries_count() const + { + return creating_entries.size(); + } + + size_t setting_entries_count() const + { + return setting_entries.size(); + } + + size_t removing_entries_count() const + { + return removing_entries.size(); + } + + size_t creating_entries_count(const Te& entry) const + { + return creating_entries.count(entry); + } + +private: + std::unordered_map< // A map of + Te, // entry -> + std::pair< + std::vector, // (attributes, OUT object_status) + sai_status_t * + > + > creating_entries; + + std::unordered_map< // A map of + Te, // entry -> + std::unordered_map< // another map of + sai_attr_id_t, // attr_id -> + std::pair< + sai_attribute_t, // (attr_value, OUT object_status) + sai_status_t * + > + > + > setting_entries; + + std::unordered_map< // A map of + Te, // entry -> + sai_status_t * // OUT object_status + > removing_entries; + + typename Ts::bulk_create_entry_fn create_entries; + typename Ts::bulk_remove_entry_fn remove_entries; + typename Ts::bulk_set_entry_attribute_fn set_entries_attribute; +}; + +template <> +inline EntityBulker::EntityBulker(sai_route_api_t *api) +{ + create_entries = api->create_route_entries; + remove_entries = api->remove_route_entries; + set_entries_attribute = api->set_route_entries_attribute; +} + +template <> +inline EntityBulker::EntityBulker(sai_fdb_api_t *api) +{ + // TODO: implement after create_fdb_entries() is available in SAI + throw std::logic_error("Not implemented"); + /* + create_entries = api->create_fdb_entries; + remove_entries = api->remove_fdb_entries; + set_entries_attribute = api->set_fdb_entries_attribute; + */ +} + +template +class ObjectBulker +{ +public: + using Ts = SaiBulkerTraits; + + ObjectBulker(typename Ts::api_t* next_hop_group_api, sai_object_id_t switch_id) + { + throw std::logic_error("Not implemented"); + } + + sai_status_t create_entry( + _Out_ sai_object_id_t *object_id, + _In_ uint32_t attr_count, + _In_ const sai_attribute_t *attr_list) + { + assert(object_id); + if (!object_id) throw std::invalid_argument("object_id is null"); + assert(attr_list); + if (!attr_list) throw std::invalid_argument("attr_list is null"); + + creating_entries.emplace_back(std::piecewise_construct, std::forward_as_tuple(object_id), std::forward_as_tuple(attr_list, attr_list + attr_count)); + + auto& last_attrs = std::get<1>(creating_entries.back()); + SWSS_LOG_INFO("ObjectBulker.create_entry %zu, %zu, %u\n", creating_entries.size(), last_attrs.size(), last_attrs[0].id); + + *object_id = SAI_NULL_OBJECT_ID; // not created immediately, postponed until flush + return SAI_STATUS_NOT_EXECUTED; + } + + sai_status_t remove_entry( + _Out_ sai_status_t *object_status, + _In_ sai_object_id_t object_id) + { + assert(object_status); + if (!object_status) throw std::invalid_argument("object_status is null"); + assert(object_id != SAI_NULL_OBJECT_ID); + if (object_id == SAI_NULL_OBJECT_ID) throw std::invalid_argument("object_id is null"); + + auto found_setting = setting_entries.find(object_id); + if (found_setting != setting_entries.end()) + { + setting_entries.erase(found_setting); + } + + removing_entries.emplace(object_id, object_status); + *object_status = SAI_STATUS_NOT_EXECUTED; + return *object_status; + } + + // TODO: wait until available in SAI + /* + sai_status_t set_entry_attribute( + _In_ sai_object_id_t object_id, + _In_ const sai_attribute_t *attr) + { + auto found_setting = setting_entries.find(object_id); + if (found_setting != setting_entries.end()) + { + // For simplicity, just insert new attribute at the vector end, no merging + found_setting->second.emplace_back(*attr); + } + else + { + // Create a new key if not exists in the map + setting_entries.emplace(std::piecewise_construct, + std::forward_as_tuple(object_id), + std::forward_as_tuple(1, *attr)); + } + + return SAI_STATUS_SUCCESS; + } + */ + + void flush() + { + // Removing + if (!removing_entries.empty()) + { + std::vector rs; + for (auto const& i: removing_entries) + { + auto const& entry = i.first; + sai_status_t *object_status = i.second; + if (*object_status == SAI_STATUS_NOT_EXECUTED) + { + rs.push_back(entry); + } + } + size_t count = rs.size(); + std::vector statuses(count); + sai_status_t status = (*remove_entries)((uint32_t)count, rs.data(), SAI_BULK_OP_ERROR_MODE_STOP_ON_ERROR, statuses.data()); + SWSS_LOG_INFO("ObjectBulker.flush removing_entries %zu rc=%d statuses[0]=%d\n", removing_entries.size(), status, statuses[0]); + + for (size_t i = 0; i < count; i++) + { + auto const& entry = rs[i]; + sai_status_t object_status = statuses[i]; + *removing_entries[entry] = object_status; + } + removing_entries.clear(); + } + + // Creating + if (!creating_entries.empty()) + { + std::vector tss; + std::vector cs; + + for (auto const& i: creating_entries) + { + sai_object_id_t *pid = std::get<0>(i); + auto const& attrs = std::get<1>(i); + if (*pid == SAI_NULL_OBJECT_ID) + { + tss.push_back(attrs.data()); + cs.push_back((uint32_t)attrs.size()); + } + } + size_t count = creating_entries.size(); + std::vector object_ids(count); + std::vector statuses(count); + (*create_entries)(switch_id, (uint32_t)count, cs.data(), tss.data() + , SAI_BULK_OP_ERROR_MODE_STOP_ON_ERROR, object_ids.data(), statuses.data()); + SWSS_LOG_INFO("ObjectBulker.flush creating_entries %zu\n", creating_entries.size()); + + for (size_t i = 0; i < count; i++) + { + sai_object_id_t *pid = std::get<0>(creating_entries[i]); + *pid = (statuses[i] == SAI_STATUS_SUCCESS) ? object_ids[i] : SAI_NULL_OBJECT_ID; + } + + creating_entries.clear(); + } + + // Setting + // TODO: wait until available in SAI + /* + if (!setting_entries.empty()) + { + std::vector rs; + std::vector ts; + + for (auto const& i: setting_entries) + { + auto const& entry = i.first; + auto const& attrs = i.second; + for (auto const& attr: attrs) + { + rs.push_back(entry); + ts.push_back(attr); + } + } + size_t count = setting_entries.size(); + std::vector statuses(count); + (*set_entries_attribute)((uint32_t)count, rs.data(), ts.data() + , SAI_BULK_OP_ERROR_MODE_STOP_ON_ERROR, statuses.data()); + + SWSS_LOG_INFO("ObjectBulker.flush setting_entries %zu\n", setting_entries.size()); + + setting_entries.clear(); + } + */ + } + + void clear() + { + removing_entries.clear(); + creating_entries.clear(); + setting_entries.clear(); + } + + size_t creating_entries_count() const + { + return creating_entries.size(); + } + + size_t setting_entries_count() const + { + return setting_entries.size(); + } + + size_t removing_entries_count() const + { + return removing_entries.size(); + } + +private: + struct object_entry + { + sai_object_id_t *object_id; + std::vector attrs; + template + object_entry(sai_object_id_t *object_id, InputIterator first, InputIterator last) + : object_id(object_id) + , attrs(first, last) + { + } + }; + + sai_object_id_t switch_id; + + std::vector // - attrs + >> creating_entries; + + std::unordered_map< // A map of + sai_object_id_t, // object_id -> (OUT object_status, attributes) + std::pair< + sai_status_t *, + std::vector + > + > setting_entries; + + // A map of + // object_id -> object_status + std::unordered_map removing_entries; + + typename Ts::bulk_create_entry_fn create_entries; + typename Ts::bulk_remove_entry_fn remove_entries; + // TODO: wait until available in SAI + //typename Ts::bulk_set_entry_attribute_fn set_entries_attribute; +}; + +template <> +inline ObjectBulker::ObjectBulker(SaiBulkerTraits::api_t *api, sai_object_id_t switch_id) + : switch_id(switch_id) +{ + create_entries = api->create_next_hop_group_members; + remove_entries = api->remove_next_hop_group_members; + // TODO: wait until available in SAI + //set_entries_attribute = ; +} diff --git a/orchagent/nexthopgroupkey.h b/orchagent/nexthopgroupkey.h index d60aee8a32..aeeca8ba96 100644 --- a/orchagent/nexthopgroupkey.h +++ b/orchagent/nexthopgroupkey.h @@ -131,6 +131,11 @@ class NextHopGroupKey return nhs_str; } + void clear() + { + m_nexthops.clear(); + } + private: std::set m_nexthops; }; diff --git a/orchagent/routeorch.cpp b/orchagent/routeorch.cpp index 983dbd0314..bc9174be93 100644 --- a/orchagent/routeorch.cpp +++ b/orchagent/routeorch.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "routeorch.h" #include "logger.h" #include "swssnet.h" @@ -22,6 +23,8 @@ extern CrmOrch *gCrmOrch; const int routeorch_pri = 5; RouteOrch::RouteOrch(DBConnector *db, string tableName, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch) : + gRouteBulker(sai_route_api), + gNextHopGroupMemberBulker(sai_next_hop_group_api, gSwitchId), Orch(db, tableName, routeorch_pri), m_neighOrch(neighOrch), m_intfsOrch(intfsOrch), @@ -373,217 +376,321 @@ void RouteOrch::doTask(Consumer& consumer) auto it = consumer.m_toSync.begin(); while (it != consumer.m_toSync.end()) { - KeyOpFieldsValuesTuple t = it->second; - - string key = kfvKey(t); - string op = kfvOp(t); - - /* Get notification from application */ - /* resync application: - * When routeorch receives 'resync' message, it marks all current - * routes as dirty and waits for 'resync complete' message. For all - * newly received routes, if they match current dirty routes, it unmarks - * them dirty. After receiving 'resync complete' message, it creates all - * newly added routes and removes all dirty routes. - */ - if (key == "resync") + // Route bulk results will be stored in a map + std::map< + std::pair< + std::string, // Key + std::string // Op + >, + RouteBulkContext + > toBulk; + + // Add or remove routes with a route bulker + while (it != consumer.m_toSync.end()) { - if (op == "SET") - { - /* Mark all current routes as dirty (DEL) in consumer.m_toSync map */ - SWSS_LOG_NOTICE("Start resync routes\n"); - for (auto j : m_syncdRoutes) - { - string vrf; + KeyOpFieldsValuesTuple t = it->second; - if (j.first != gVirtualRouterId) - { - vrf = m_vrfOrch->getVRFname(j.first) + ":"; - } + string key = kfvKey(t); + string op = kfvOp(t); - for (auto i : j.second) - { - vector v; - key = vrf + i.first.to_string(); - auto x = KeyOpFieldsValuesTuple(key, DEL_COMMAND, v); - consumer.addToSync(x); - } - } - m_resync = true; - } - else + auto rc = toBulk.emplace(std::piecewise_construct, + std::forward_as_tuple(key, op), + std::forward_as_tuple()); + + bool inserted = rc.second; + auto& ctx = rc.first->second; + if (!inserted) { - SWSS_LOG_NOTICE("Complete resync routes\n"); - m_resync = false; + ctx.clear(); } - it = consumer.m_toSync.erase(it); - continue; - } - - if (m_resync) - { - it++; - continue; - } - - sai_object_id_t vrf_id; - IpPrefix ip_prefix; + /* Get notification from application */ + /* resync application: + * When routeorch receives 'resync' message, it marks all current + * routes as dirty and waits for 'resync complete' message. For all + * newly received routes, if they match current dirty routes, it unmarks + * them dirty. After receiving 'resync complete' message, it creates all + * newly added routes and removes all dirty routes. + */ + if (key == "resync") + { + if (op == "SET") + { + /* Mark all current routes as dirty (DEL) in consumer.m_toSync map */ + SWSS_LOG_NOTICE("Start resync routes\n"); + for (auto j : m_syncdRoutes) + { + string vrf; + + if (j.first != gVirtualRouterId) + { + vrf = m_vrfOrch->getVRFname(j.first) + ":"; + } + + for (auto i : j.second) + { + vector v; + key = vrf + i.first.to_string(); + auto x = KeyOpFieldsValuesTuple(key, DEL_COMMAND, v); + consumer.addToSync(x); + } + } + m_resync = true; + } + else + { + SWSS_LOG_NOTICE("Complete resync routes\n"); + m_resync = false; + } - if (!key.compare(0, strlen(VRF_PREFIX), VRF_PREFIX)) - { - size_t found = key.find(':'); - string vrf_name = key.substr(0, found); + it = consumer.m_toSync.erase(it); + continue; + } - if (!m_vrfOrch->isVRFexists(vrf_name)) + if (m_resync) { it++; continue; } - vrf_id = m_vrfOrch->getVRFid(vrf_name); - ip_prefix = IpPrefix(key.substr(found+1)); - } - else - { - vrf_id = gVirtualRouterId; - ip_prefix = IpPrefix(key); - } - if (op == SET_COMMAND) - { - string ips; - string aliases; - bool excp_intfs_flag = false; + sai_object_id_t& vrf_id = ctx.vrf_id; + IpPrefix& ip_prefix = ctx.ip_prefix; - for (auto i : kfvFieldsValues(t)) + if (!key.compare(0, strlen(VRF_PREFIX), VRF_PREFIX)) { - if (fvField(i) == "nexthop") - ips = fvValue(i); - - if (fvField(i) == "ifname") - aliases = fvValue(i); - } - vector ipv = tokenize(ips, ','); - vector alsv = tokenize(aliases, ','); - - /* - * For backward compatibility, adjust ip string from old format to - * new format. Meanwhile it can deal with some abnormal cases. - */ + size_t found = key.find(':'); + string vrf_name = key.substr(0, found); - /* Resize the ip vector to match ifname vector - * as tokenize(",", ',') will miss the last empty segment. */ - if (alsv.size() == 0) - { - SWSS_LOG_WARN("Skip the route %s, for it has an empty ifname field.", key.c_str()); - it = consumer.m_toSync.erase(it); - continue; + if (!m_vrfOrch->isVRFexists(vrf_name)) + { + it++; + continue; + } + vrf_id = m_vrfOrch->getVRFid(vrf_name); + ip_prefix = IpPrefix(key.substr(found+1)); } - else if (alsv.size() != ipv.size()) + else { - SWSS_LOG_NOTICE("Route %s: resize ipv to match alsv, %zd -> %zd.", key.c_str(), ipv.size(), alsv.size()); - ipv.resize(alsv.size()); + vrf_id = gVirtualRouterId; + ip_prefix = IpPrefix(key); } - /* Set the empty ip(s) to zero - * as IpAddress("") will construst a incorrect ip. */ - for (auto &ip : ipv) + if (op == SET_COMMAND) { - if (ip.empty()) + string ips; + string aliases; + bool& excp_intfs_flag = ctx.excp_intfs_flag; + + for (auto i : kfvFieldsValues(t)) { - SWSS_LOG_NOTICE("Route %s: set the empty nexthop ip to zero.", key.c_str()); - ip = ip_prefix.isV4() ? "0.0.0.0" : "::"; + if (fvField(i) == "nexthop") + ips = fvValue(i); + + if (fvField(i) == "ifname") + aliases = fvValue(i); + } + vector& ipv = ctx.ipv; + ipv = tokenize(ips, ','); + vector alsv = tokenize(aliases, ','); + + /* + * For backward compatibility, adjust ip string from old format to + * new format. Meanwhile it can deal with some abnormal cases. + */ + + /* Resize the ip vector to match ifname vector + * as tokenize(",", ',') will miss the last empty segment. */ + if (alsv.size() == 0) + { + SWSS_LOG_WARN("Skip the route %s, for it has an empty ifname field.", key.c_str()); + it = consumer.m_toSync.erase(it); + continue; + } + else if (alsv.size() != ipv.size()) + { + SWSS_LOG_NOTICE("Route %s: resize ipv to match alsv, %zd -> %zd.", key.c_str(), ipv.size(), alsv.size()); + ipv.resize(alsv.size()); } - } - for (auto alias : alsv) - { - if (alias == "eth0" || alias == "lo" || alias == "docker0") + /* Set the empty ip(s) to zero + * as IpAddress("") will construst a incorrect ip. */ + for (auto &ip : ipv) + { + if (ip.empty()) + { + SWSS_LOG_NOTICE("Route %s: set the empty nexthop ip to zero.", key.c_str()); + ip = ip_prefix.isV4() ? "0.0.0.0" : "::"; + } + } + + for (auto alias : alsv) + { + if (alias == "eth0" || alias == "lo" || alias == "docker0") + { + excp_intfs_flag = true; + break; + } + } + + // TODO: cannot trust m_portsOrch->getPortIdByAlias because sometimes alias is empty + if (excp_intfs_flag) + { + /* If any existing routes are updated to point to the + * above interfaces, remove them from the ASIC. */ + if (removeRoute(ctx)) + it = consumer.m_toSync.erase(it); + else + it++; + continue; + } + + string nhg_str = ipv[0] + NH_DELIMITER + alsv[0]; + for (uint32_t i = 1; i < ipv.size(); i++) + { + nhg_str += NHG_DELIMITER + ipv[i] + NH_DELIMITER + alsv[i]; + } + + NextHopGroupKey& nhg = ctx.nhg; + nhg = NextHopGroupKey(nhg_str); + + if (ipv.size() == 1 && IpAddress(ipv[0]).isZero()) + { + /* blackhole to be done */ + if (alsv[0] == "unknown") + { + /* add addBlackholeRoute or addRoute support empty nhg */ + it = consumer.m_toSync.erase(it); + } + /* directly connected route to VRF interface which come from kernel */ + else if (!alsv[0].compare(0, strlen(VRF_PREFIX), VRF_PREFIX)) + { + it = consumer.m_toSync.erase(it); + } + /* skip prefix which is linklocal or multicast */ + else if (ip_prefix.getIp().getAddrScope() != IpAddress::GLOBAL_SCOPE) + { + it = consumer.m_toSync.erase(it); + } + /* fullmask subnet route is same as ip2me route */ + else if (ip_prefix.isFullMask() && m_intfsOrch->isPrefixSubnet(ip_prefix, alsv[0])) + { + it = consumer.m_toSync.erase(it); + } + /* subnet route, vrf leaked route, etc */ + else + { + if (addRoute(ctx, nhg)) + it = consumer.m_toSync.erase(it); + else + it++; + } + } + else if (m_syncdRoutes.find(vrf_id) == m_syncdRoutes.end() || + m_syncdRoutes.at(vrf_id).find(ip_prefix) == m_syncdRoutes.at(vrf_id).end() || + m_syncdRoutes.at(vrf_id).at(ip_prefix) != nhg) + { + if (addRoute(ctx, nhg)) + it = consumer.m_toSync.erase(it); + else + it++; + } + else + /* Duplicate entry */ + it = consumer.m_toSync.erase(it); + + // If already exhaust the nexthop groups, and there are pending removing routes in bulker, + // flush the bulker and possibly collect some released nexthop groups + if (m_nextHopGroupCount >= m_maxNextHopGroupCount && gRouteBulker.removing_entries_count() > 0) { - excp_intfs_flag = true; break; } } - - // TODO: cannot trust m_portsOrch->getPortIdByAlias because sometimes alias is empty - if (excp_intfs_flag) + else if (op == DEL_COMMAND) { - /* If any existing routes are updated to point to the - * above interfaces, remove them from the ASIC. */ - if (removeRoute(vrf_id, ip_prefix)) + if (removeRoute(ctx)) it = consumer.m_toSync.erase(it); else it++; + } + else + { + SWSS_LOG_ERROR("Unknown operation type %s\n", op.c_str()); + it = consumer.m_toSync.erase(it); + } + } + + // Flush the route bulker, so routes will be written to syncd and ASIC + gRouteBulker.flush(); + + // Go through the bulker results + auto it_prev = consumer.m_toSync.begin(); + while (it_prev != it) + { + KeyOpFieldsValuesTuple t = it_prev->second; + + string key = kfvKey(t); + string op = kfvOp(t); + auto found = toBulk.find(make_pair(key, op)); + if (found == toBulk.end()) + { + it_prev++; continue; } - string nhg_str = ipv[0] + NH_DELIMITER + alsv[0]; - for (uint32_t i = 1; i < ipv.size(); i++) + const auto& ctx = found->second; + const auto& object_statuses = ctx.object_statuses; + if (object_statuses.empty()) { - nhg_str += NHG_DELIMITER + ipv[i] + NH_DELIMITER + alsv[i]; + it_prev++; + continue; } - NextHopGroupKey nhg(nhg_str); + const sai_object_id_t& vrf_id = ctx.vrf_id; + const IpPrefix& ip_prefix = ctx.ip_prefix; - if (ipv.size() == 1 && IpAddress(ipv[0]).isZero()) + if (op == SET_COMMAND) { - /* blackhole to be done */ - if (alsv[0] == "unknown") - { - /* add addBlackholeRoute or addRoute support empty nhg */ - it = consumer.m_toSync.erase(it); - } - /* directly connected route to VRF interface which come from kernel */ - else if (!alsv[0].compare(0, strlen(VRF_PREFIX), VRF_PREFIX)) - { - it = consumer.m_toSync.erase(it); - } - /* skip prefix which is linklocal or multicast */ - else if (ip_prefix.getIp().getAddrScope() != IpAddress::GLOBAL_SCOPE) + const bool& excp_intfs_flag = ctx.excp_intfs_flag; + const vector& ipv = ctx.ipv; + + if (excp_intfs_flag) { - it = consumer.m_toSync.erase(it); + /* If any existing routes are updated to point to the + * above interfaces, remove them from the ASIC. */ + if (removeRoutePost(ctx)) + it_prev = consumer.m_toSync.erase(it_prev); + else + it_prev++; + continue; } - /* fullmask subnet route is same as ip2me route */ - else if (ip_prefix.isFullMask() && m_intfsOrch->isPrefixSubnet(ip_prefix, alsv[0])) + + const NextHopGroupKey& nhg = ctx.nhg; + + if (ipv.size() == 1 && IpAddress(ipv[0]).isZero()) { - it = consumer.m_toSync.erase(it); + if (addRoutePost(ctx, nhg)) + it_prev = consumer.m_toSync.erase(it_prev); + else + it_prev++; } - /* subnet route, vrf leaked route, etc */ - else + else if (m_syncdRoutes.find(vrf_id) == m_syncdRoutes.end() || + m_syncdRoutes.at(vrf_id).find(ip_prefix) == m_syncdRoutes.at(vrf_id).end() || + m_syncdRoutes.at(vrf_id).at(ip_prefix) != nhg) { - if (addRoute(vrf_id, ip_prefix, nhg)) - it = consumer.m_toSync.erase(it); + if (addRoutePost(ctx, nhg)) + it_prev = consumer.m_toSync.erase(it_prev); else - it++; + it_prev++; } - continue; } - - if (m_syncdRoutes.find(vrf_id) == m_syncdRoutes.end() || - m_syncdRoutes.at(vrf_id).find(ip_prefix) == m_syncdRoutes.at(vrf_id).end() || - m_syncdRoutes.at(vrf_id).at(ip_prefix) != nhg) + else if (op == DEL_COMMAND) { - if (addRoute(vrf_id, ip_prefix, nhg)) - it = consumer.m_toSync.erase(it); + /* Cannot locate the route or remove succeed */ + if (removeRoutePost(ctx)) + it_prev = consumer.m_toSync.erase(it_prev); else - it++; + it_prev++; } - else - /* Duplicate entry */ - it = consumer.m_toSync.erase(it); - } - else if (op == DEL_COMMAND) - { - /* Cannot locate the route or remove succeed */ - if (removeRoute(vrf_id, ip_prefix)) - it = consumer.m_toSync.erase(it); - else - it++; - } - else - { - SWSS_LOG_ERROR("Unknown operation type %s\n", op.c_str()); - it = consumer.m_toSync.erase(it); } } } @@ -748,6 +855,12 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops) return false; } + // skip next hop group member create for neighbor from down port + if (m_neighOrch->isNextHopFlagSet(it, NHFLAGS_IFDOWN)) + { + continue; + } + sai_object_id_t next_hop_id = m_neighOrch->getNextHopId(it); next_hop_ids.push_back(next_hop_id); nhopgroup_members_set[next_hop_id] = it; @@ -781,12 +894,11 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops) NextHopGroupEntry next_hop_group_entry; next_hop_group_entry.next_hop_group_id = next_hop_group_id; - for (auto nhid: next_hop_ids) + size_t npid_count = next_hop_ids.size(); + vector nhgm_ids(npid_count); + for (size_t i = 0; i < npid_count; i++) { - // skip next hop group member create for neighbor from down port - if (m_neighOrch->isNextHopFlagSet(nhopgroup_members_set[nhid], NHFLAGS_IFDOWN)) { - continue; - } + auto nhid = next_hop_ids[i]; // Create a next hop group member vector nhgm_attrs; @@ -800,17 +912,21 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops) nhgm_attr.value.oid = nhid; nhgm_attrs.push_back(nhgm_attr); - sai_object_id_t next_hop_group_member_id; - status = sai_next_hop_group_api->create_next_hop_group_member(&next_hop_group_member_id, - gSwitchId, - (uint32_t)nhgm_attrs.size(), - nhgm_attrs.data()); + gNextHopGroupMemberBulker.create_entry(&nhgm_ids[i], + (uint32_t)nhgm_attrs.size(), + nhgm_attrs.data()); + } - if (status != SAI_STATUS_SUCCESS) + gNextHopGroupMemberBulker.flush(); + for (size_t i = 0; i < npid_count; i++) + { + auto nhid = next_hop_ids[i]; + auto nhgm_id = nhgm_ids[i]; + if (nhgm_id == SAI_NULL_OBJECT_ID) { // TODO: do we need to clean up? SWSS_LOG_ERROR("Failed to create next hop group %" PRIx64 " member %" PRIx64 ": %d\n", - next_hop_group_id, next_hop_group_member_id, status); + next_hop_group_id, nhgm_ids[i], status); return false; } @@ -818,7 +934,7 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops) // Save the membership into next hop structure next_hop_group_entry.nhopgroup_members[nhopgroup_members_set.find(nhid)->second] = - next_hop_group_member_id; + nhgm_id; } /* Increment the ref_count for the next hops used by the next hop group. */ @@ -854,27 +970,39 @@ bool RouteOrch::removeNextHopGroup(const NextHopGroupKey &nexthops) next_hop_group_id = next_hop_group_entry->second.next_hop_group_id; SWSS_LOG_NOTICE("Delete next hop group %s", nexthops.to_string().c_str()); - for (auto nhop = next_hop_group_entry->second.nhopgroup_members.begin(); - nhop != next_hop_group_entry->second.nhopgroup_members.end();) + vector next_hop_ids; + auto& nhgm = next_hop_group_entry->second.nhopgroup_members; + for (auto nhop = nhgm.begin(); nhop != nhgm.end();) { - if (m_neighOrch->isNextHopFlagSet(nhop->first, NHFLAGS_IFDOWN)) { SWSS_LOG_WARN("NHFLAGS_IFDOWN set for next hop group member %s with next_hop_id %" PRIx64, nhop->first.to_string().c_str(), nhop->second); - nhop = next_hop_group_entry->second.nhopgroup_members.erase(nhop); + nhop = nhgm.erase(nhop); continue; } - status = sai_next_hop_group_api->remove_next_hop_group_member(nhop->second); - if (status != SAI_STATUS_SUCCESS) + + next_hop_ids.push_back(nhop->second); + nhop = nhgm.erase(nhop); + } + + size_t nhid_count = next_hop_ids.size(); + vector statuses(nhid_count); + for (size_t i = 0; i < nhid_count; i++) + { + gNextHopGroupMemberBulker.remove_entry(&statuses[i], next_hop_ids[i]); + } + gNextHopGroupMemberBulker.flush(); + for (size_t i = 0; i < nhid_count; i++) + { + if (statuses[i] != SAI_STATUS_SUCCESS) { - SWSS_LOG_ERROR("Failed to remove next hop group member %" PRIx64 ", rv:%d", - nhop->second, status); + SWSS_LOG_ERROR("Failed to remove next hop group member[%zu] %" PRIx64 ", rv:%d", + i, next_hop_ids[i], statuses[i]); return false; } gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER); - nhop = next_hop_group_entry->second.nhopgroup_members.erase(nhop); } status = sai_next_hop_group_api->remove_next_hop_group(next_hop_group_id); @@ -897,10 +1025,12 @@ bool RouteOrch::removeNextHopGroup(const NextHopGroupKey &nexthops) return true; } -void RouteOrch::addTempRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops) +void RouteOrch::addTempRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) { SWSS_LOG_ENTER(); + IpPrefix& ipPrefix = ctx.ip_prefix; + auto next_hop_set = nextHops.getNextHops(); /* Remove next hops that are not in m_syncdNextHops */ @@ -926,13 +1056,18 @@ void RouteOrch::addTempRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, c /* Set the route's temporary next hop to be the randomly picked one */ NextHopGroupKey tmp_next_hop((*it).to_string()); - addRoute(vrf_id, ipPrefix, tmp_next_hop); + ctx.tmp_next_hop = tmp_next_hop; + + addRoute(ctx, tmp_next_hop); } -bool RouteOrch::addRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops) +bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) { SWSS_LOG_ENTER(); + sai_object_id_t& vrf_id = ctx.vrf_id; + IpPrefix& ipPrefix = ctx.ip_prefix; + /* next_hop_id indicates the next hop id or next hop group id of this route */ sai_object_id_t next_hop_id; @@ -998,7 +1133,7 @@ bool RouteOrch::addRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const /* Add a temporary route when a next hop group cannot be added, * and there is no temporary route right now or the current temporary * route is not pointing to a member of the next hop group to sync. */ - addTempRoute(vrf_id, ipPrefix, nextHops); + addTempRoute(ctx, nextHops); /* Return false since the original route is not successfully added */ return false; } @@ -1014,6 +1149,7 @@ bool RouteOrch::addRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const copy(route_entry.destination, ipPrefix); sai_attribute_t route_attr; + auto& object_statuses = ctx.object_statuses; /* If the prefix is not in m_syncdRoutes, then we need to create the route * for this prefix with the new next hop (group) id. If the prefix is already @@ -1027,8 +1163,97 @@ bool RouteOrch::addRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const route_attr.value.oid = next_hop_id; /* Default SAI_ROUTE_ATTR_PACKET_ACTION is SAI_PACKET_ACTION_FORWARD */ - sai_status_t status = sai_route_api->create_route_entry(&route_entry, 1, &route_attr); - if (status != SAI_STATUS_SUCCESS) + object_statuses.emplace_back(); + sai_status_t status = gRouteBulker.create_entry(&object_statuses.back(), &route_entry, 1, &route_attr); + if (status == SAI_STATUS_ITEM_ALREADY_EXISTS) + { + SWSS_LOG_ERROR("Failed to create route %s with next hop(s) %s: already exists in bulker", + ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); + return false; + } + } + else + { + /* Set the packet action to forward when there was no next hop (dropped) */ + if (it_route->second.getSize() == 0) + { + route_attr.id = SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION; + route_attr.value.s32 = SAI_PACKET_ACTION_FORWARD; + + object_statuses.emplace_back(); + gRouteBulker.set_entry_attribute(&object_statuses.back(), &route_entry, &route_attr); + } + + route_attr.id = SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID; + route_attr.value.oid = next_hop_id; + + /* Set the next hop ID to a new value */ + object_statuses.emplace_back(); + gRouteBulker.set_entry_attribute(&object_statuses.back(), &route_entry, &route_attr); + } + return false; +} + +bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey &nextHops) +{ + SWSS_LOG_ENTER(); + + const sai_object_id_t& vrf_id = ctx.vrf_id; + const IpPrefix& ipPrefix = ctx.ip_prefix; + + const auto& object_statuses = ctx.object_statuses; + + if (object_statuses.empty()) + { + // Something went wrong before router bulker, will retry + return false; + } + + /* next_hop_id indicates the next hop id or next hop group id of this route */ + sai_object_id_t next_hop_id; + + /* The route is pointing to a next hop */ + if (nextHops.getSize() == 1) + { + NextHopKey nexthop(nextHops.to_string()); + if (nexthop.ip_address.isZero()) + { + next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); + /* rif is not created yet */ + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_INFO("Failed to get next hop %s for %s", + nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + return false; + } + } + else + { + if (!m_neighOrch->hasNextHop(nexthop)) + { + SWSS_LOG_INFO("Failed to get next hop %s for %s", + nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + return false; + } + } + } + /* The route is pointing to a next hop group */ + else + { + if (!hasNextHopGroup(nextHops)) + { + // Previous added an temporary route + auto& tmp_next_hop = ctx.tmp_next_hop; + addRoutePost(ctx, tmp_next_hop); + return false; + } + } + + auto it_status = object_statuses.begin(); + auto it_route = m_syncdRoutes.at(vrf_id).find(ipPrefix); + if (it_route == m_syncdRoutes.at(vrf_id).end()) + { + if (*it_status++ != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to create route %s with next hop(s) %s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); @@ -1040,7 +1265,7 @@ bool RouteOrch::addRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const return false; } - if (route_entry.destination.addr_family == SAI_IP_ADDR_FAMILY_IPV4) + if (ipPrefix.isV4()) { gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_IPV4_ROUTE); } @@ -1051,7 +1276,7 @@ bool RouteOrch::addRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const /* Increase the ref_count for the next hop (group) entry */ increaseNextHopRefCount(nextHops); - SWSS_LOG_INFO("Create route %s with next hop(s) %s", + SWSS_LOG_INFO("Post create route %s with next hop(s) %s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); } else @@ -1061,10 +1286,7 @@ bool RouteOrch::addRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const /* Set the packet action to forward when there was no next hop (dropped) */ if (it_route->second.getSize() == 0) { - route_attr.id = SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION; - route_attr.value.s32 = SAI_PACKET_ACTION_FORWARD; - - status = sai_route_api->set_route_entry_attribute(&route_entry, &route_attr); + status = *it_status++; if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to set route %s with packet action forward, %d", @@ -1073,11 +1295,7 @@ bool RouteOrch::addRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const } } - route_attr.id = SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID; - route_attr.value.oid = next_hop_id; - - /* Set the next hop ID to a new value */ - status = sai_route_api->set_route_entry_attribute(&route_entry, &route_attr); + status = *it_status++; if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to set route %s with next hop(s) %s", @@ -1094,7 +1312,7 @@ bool RouteOrch::addRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const { removeNextHopGroup(it_route->second); } - SWSS_LOG_INFO("Set route %s with next hop(s) %s", + SWSS_LOG_INFO("Post set route %s with next hop(s) %s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); } @@ -1104,10 +1322,13 @@ bool RouteOrch::addRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const return true; } -bool RouteOrch::removeRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix) +bool RouteOrch::removeRoute(RouteBulkContext& ctx) { SWSS_LOG_ENTER(); + sai_object_id_t& vrf_id = ctx.vrf_id; + IpPrefix& ipPrefix = ctx.ip_prefix; + auto it_route_table = m_syncdRoutes.find(vrf_id); if (it_route_table == m_syncdRoutes.end()) { @@ -1115,18 +1336,21 @@ bool RouteOrch::removeRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix) return true; } + sai_route_entry_t route_entry; + route_entry.vr_id = vrf_id; + route_entry.switch_id = gSwitchId; + copy(route_entry.destination, ipPrefix); + auto it_route = it_route_table->second.find(ipPrefix); - if (it_route == it_route_table->second.end()) + size_t creating = gRouteBulker.creating_entries_count(route_entry); + if (it_route == it_route_table->second.end() && creating == 0) { SWSS_LOG_INFO("Failed to find route entry, vrf_id 0x%" PRIx64 ", prefix %s\n", vrf_id, ipPrefix.to_string().c_str()); return true; } - sai_route_entry_t route_entry; - route_entry.vr_id = vrf_id; - route_entry.switch_id = gSwitchId; - copy(route_entry.destination, ipPrefix); + auto& object_statuses = ctx.object_statuses; // set to blackhole for default route if (ipPrefix.isDefaultRoute()) @@ -1135,7 +1359,47 @@ bool RouteOrch::removeRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix) attr.id = SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION; attr.value.s32 = SAI_PACKET_ACTION_DROP; - sai_status_t status = sai_route_api->set_route_entry_attribute(&route_entry, &attr); + object_statuses.emplace_back(); + gRouteBulker.set_entry_attribute(&object_statuses.back(), &route_entry, &attr); + + attr.id = SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID; + attr.value.oid = SAI_NULL_OBJECT_ID; + + object_statuses.emplace_back(); + gRouteBulker.set_entry_attribute(&object_statuses.back(), &route_entry, &attr); + } + else + { + object_statuses.emplace_back(); + gRouteBulker.remove_entry(&object_statuses.back(), &route_entry); + } + + return false; +} + +bool RouteOrch::removeRoutePost(const RouteBulkContext& ctx) +{ + SWSS_LOG_ENTER(); + + const sai_object_id_t& vrf_id = ctx.vrf_id; + const IpPrefix& ipPrefix = ctx.ip_prefix; + + auto& object_statuses = ctx.object_statuses; + + if (object_statuses.empty()) + { + // Something went wrong before router bulker, will retry + return false; + } + + auto it_route_table = m_syncdRoutes.find(vrf_id); + auto it_route = it_route_table->second.find(ipPrefix); + auto it_status = object_statuses.begin(); + + // set to blackhole for default route + if (ipPrefix.isDefaultRoute()) + { + sai_status_t status = *it_status++; if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to set route %s packet action to drop, rv:%d", @@ -1145,10 +1409,7 @@ bool RouteOrch::removeRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix) SWSS_LOG_INFO("Set route %s packet action to drop", ipPrefix.to_string().c_str()); - attr.id = SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID; - attr.value.oid = SAI_NULL_OBJECT_ID; - - status = sai_route_api->set_route_entry_attribute(&route_entry, &attr); + status = *it_status++; if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to set route %s next hop ID to NULL, rv:%d", @@ -1160,14 +1421,14 @@ bool RouteOrch::removeRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix) } else { - sai_status_t status = sai_route_api->remove_route_entry(&route_entry); + sai_status_t status = *it_status++; if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to remove route prefix:%s\n", ipPrefix.to_string().c_str()); return false; } - if (route_entry.destination.addr_family == SAI_IP_ADDR_FAMILY_IPV4) + if (ipPrefix.isV4()) { gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_IPV4_ROUTE); } @@ -1175,7 +1436,6 @@ bool RouteOrch::removeRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix) { gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_IPV6_ROUTE); } - } /* diff --git a/orchagent/routeorch.h b/orchagent/routeorch.h index 84550d7589..8cd71aee27 100644 --- a/orchagent/routeorch.h +++ b/orchagent/routeorch.h @@ -10,6 +10,7 @@ #include "ipaddresses.h" #include "ipprefix.h" #include "nexthopgroupkey.h" +#include "bulker.h" #include @@ -54,6 +55,36 @@ struct NextHopObserverEntry list observers; }; +struct RouteBulkContext +{ + std::deque object_statuses; // Bulk statuses + NextHopGroupKey tmp_next_hop; // Temporary next hop + NextHopGroupKey nhg; + sai_object_id_t vrf_id; + IpPrefix ip_prefix; + bool excp_intfs_flag; + std::vector ipv; + + RouteBulkContext() + : excp_intfs_flag(false) + { + } + + // Disable any copy constructors + RouteBulkContext(const RouteBulkContext&) = delete; + RouteBulkContext(RouteBulkContext&&) = delete; + + void clear() + { + object_statuses.clear(); + tmp_next_hop.clear(); + nhg.clear(); + ipv.clear(); + excp_intfs_flag = false; + vrf_id = SAI_NULL_OBJECT_ID; + } +}; + class RouteOrch : public Orch, public Subject { public: @@ -90,9 +121,14 @@ class RouteOrch : public Orch, public Subject NextHopObserverTable m_nextHopObservers; - void addTempRoute(sai_object_id_t, const IpPrefix&, const NextHopGroupKey&); - bool addRoute(sai_object_id_t, const IpPrefix&, const NextHopGroupKey&); - bool removeRoute(sai_object_id_t, const IpPrefix&); + EntityBulker gRouteBulker; + ObjectBulker gNextHopGroupMemberBulker; + + void addTempRoute(RouteBulkContext& ctx, const NextHopGroupKey&); + bool addRoute(RouteBulkContext& ctx, const NextHopGroupKey&); + bool addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey &nextHops); + bool removeRoute(RouteBulkContext& ctx); + bool removeRoutePost(const RouteBulkContext& ctx); std::string getLinkLocalEui64Addr(void); void addLinkLocalRouteToMe(sai_object_id_t vrf_id, IpPrefix linklocal_prefix); diff --git a/tests/mock_tests/aclorch_ut.cpp b/tests/mock_tests/aclorch_ut.cpp index dc784c75ae..b7c057fce0 100644 --- a/tests/mock_tests/aclorch_ut.cpp +++ b/tests/mock_tests/aclorch_ut.cpp @@ -18,6 +18,7 @@ extern sai_port_api_t *sai_port_api; extern sai_vlan_api_t *sai_vlan_api; extern sai_bridge_api_t *sai_bridge_api; extern sai_route_api_t *sai_route_api; +extern sai_next_hop_group_api_t* sai_next_hop_group_api; namespace aclorch_test { @@ -258,6 +259,7 @@ namespace aclorch_test sai_api_query(SAI_API_VLAN, (void **)&sai_vlan_api); sai_api_query(SAI_API_ROUTE, (void **)&sai_route_api); sai_api_query(SAI_API_ACL, (void **)&sai_acl_api); + sai_api_query(SAI_API_NEXT_HOP_GROUP, (void **)&sai_next_hop_group_api); sai_attribute_t attr; diff --git a/tests/test_nhg.py b/tests/test_nhg.py index ac26b5107f..4a12590d9a 100644 --- a/tests/test_nhg.py +++ b/tests/test_nhg.py @@ -3,6 +3,7 @@ import time import json import pytest +import ipaddress from swsscommon import swsscommon @@ -26,13 +27,13 @@ def test_route_nhg(self, dvs, testlog): dvs.runcmd("arp -s 10.0.0.3 00:00:00:00:00:02") dvs.runcmd("arp -s 10.0.0.5 00:00:00:00:00:03") - dvs.servers[0].runcmd("ip link set down dev eth0") == 0 - dvs.servers[1].runcmd("ip link set down dev eth0") == 0 - dvs.servers[2].runcmd("ip link set down dev eth0") == 0 + assert dvs.servers[0].runcmd("ip link set down dev eth0") == 0 + assert dvs.servers[1].runcmd("ip link set down dev eth0") == 0 + assert dvs.servers[2].runcmd("ip link set down dev eth0") == 0 - dvs.servers[0].runcmd("ip link set up dev eth0") == 0 - dvs.servers[1].runcmd("ip link set up dev eth0") == 0 - dvs.servers[2].runcmd("ip link set up dev eth0") == 0 + assert dvs.servers[0].runcmd("ip link set up dev eth0") == 0 + assert dvs.servers[1].runcmd("ip link set up dev eth0") == 0 + assert dvs.servers[2].runcmd("ip link set up dev eth0") == 0 db = swsscommon.DBConnector(0, dvs.redis_sock, 0) ps = swsscommon.ProducerStateTable(db, "ROUTE_TABLE") @@ -138,3 +139,179 @@ def test_route_nhg(self, dvs, testlog): for v in fvs: if v[0] == "SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID": assert v[1] == nhgid + + def test_route_nhg_exhaust(self, dvs, testlog): + """ + Test the situation of exhausting ECMP group, assume SAI_SWITCH_ATTR_NUMBER_OF_ECMP_GROUPS is 512 + + In order to achieve that, we will config + 1. 9 ports + 2. 512 routes with different nexthop group + + See Also + -------- + SwitchStateBase::set_number_of_ecmp_groups() + https://github.com/Azure/sonic-sairedis/blob/master/vslib/src/SwitchStateBase.cpp + + """ + + # TODO: check ECMP 512 + + def port_name(i): + return "Ethernet" + str(i * 4) + + def port_ip(i): + return "10.0.0." + str(i * 2) + + def peer_ip(i): + return "10.0.0." + str(i * 2 + 1) + + def port_ipprefix(i): + return port_ip(i) + "/31" + + def port_mac(i): + return "00:00:00:00:00:0" + str(i) + + def gen_ipprefix(r): + """ Construct route like 2.X.X.0/24 """ + ip = ipaddress.IPv4Address(IP_INTEGER_BASE + r * 256) + ip = str(ip) + ipprefix = ip + "/24" + return ipprefix + + def gen_nhg_fvs(binary): + nexthop = [] + ifname = [] + for i in range(MAX_PORT_COUNT): + if binary[i] == '1': + nexthop.append(peer_ip(i)) + ifname.append(port_name(i)) + + nexthop = ','.join(nexthop) + ifname = ','.join(ifname) + fvs = swsscommon.FieldValuePairs([("nexthop", nexthop), ("ifname", ifname)]) + return fvs + + def asic_route_exists(keys, ipprefix): + for k in keys: + rt_key = json.loads(k) + + if rt_key['dest'] == ipprefix: + return k + else: + return None + + def asic_route_nhg_fvs(k): + fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY", k) + if not fvs: + return None + + print fvs + nhgid = fvs.get("SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID") + if nhgid is None: + return None + + fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", nhgid) + return fvs + + MAX_ECMP_COUNT = 512 + MAX_PORT_COUNT = 10 + IP_INTEGER_BASE = int(ipaddress.IPv4Address(unicode("2.2.2.0"))) + + config_db = dvs.get_config_db() + fvs = {"NULL": "NULL"} + + for i in range(MAX_PORT_COUNT): + config_db.create_entry("INTERFACE", port_name(i), fvs) + config_db.create_entry("INTERFACE", "{}|{}".format(port_name(i), port_ipprefix(i)), fvs) + dvs.runcmd("config interface startup " + port_name(i)) + dvs.runcmd("arp -s {} {}".format(peer_ip(i), port_mac(i))) + assert dvs.servers[i].runcmd("ip link set down dev eth0") == 0 + assert dvs.servers[i].runcmd("ip link set up dev eth0") == 0 + + app_db = dvs.get_app_db() + ps = swsscommon.ProducerStateTable(app_db.db_connection, "ROUTE_TABLE") + + # Add first batch of routes with unique nexthop groups in AppDB + route_count = 0 + r = 0 + while route_count < MAX_ECMP_COUNT: + r += 1 + fmt = '{{0:0{}b}}'.format(MAX_PORT_COUNT) + binary = fmt.format(r) + # We need at least 2 ports for a nexthop group + if binary.count('1') <= 1: + continue + fvs = gen_nhg_fvs(binary) + route_ipprefix = gen_ipprefix(route_count) + ps.set(route_ipprefix, fvs) + route_count += 1 + + asic_db = dvs.get_asic_db() + + # Wait and check ASIC DB the count of nexthop groups used + asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", MAX_ECMP_COUNT) + asic_routes_count = len(asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY")) + + # Add second batch of routes with unique nexthop groups in AppDB + # Add more routes with new nexthop group in AppDBdd + route_ipprefix = gen_ipprefix(route_count) + base_ipprefix = route_ipprefix + base = route_count + route_count = 0 + while route_count < 10: + r += 1 + fmt = '{{0:0{}b}}'.format(MAX_PORT_COUNT) + binary = fmt.format(r) + # We need at least 2 ports for a nexthop group + if binary.count('1') <= 1: + continue + fvs = gen_nhg_fvs(binary) + route_ipprefix = gen_ipprefix(base + route_count) + ps.set(route_ipprefix, fvs) + route_count += 1 + last_ipprefix = route_ipprefix + + # Wait until we get expected routes and check ASIC DB on the count of nexthop groups used, and it should not increase + keys = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY", asic_routes_count + 10) + asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", MAX_ECMP_COUNT) + + # Check the route points to next hop group + # Note: no need to wait here + k = asic_route_exists(keys, "2.2.2.0/24") + assert k is not None + fvs = asic_route_nhg_fvs(k) + assert fvs is not None + + # Check the second batch does not point to next hop group + k = asic_route_exists(keys, base_ipprefix) + assert k is not None + fvs = asic_route_nhg_fvs(k) + assert not(fvs) + + # Remove first batch of routes with unique nexthop groups in AppDB + route_count = 0 + r = 0 + while route_count < MAX_ECMP_COUNT: + r += 1 + fmt = '{{0:0{}b}}'.format(MAX_PORT_COUNT) + binary = fmt.format(r) + # We need at least 2 ports for a nexthop group + if binary.count('1') <= 1: + continue + route_ipprefix = gen_ipprefix(route_count) + ps._del(route_ipprefix) + route_count += 1 + + # Wait and check the second batch points to next hop group + # Check ASIC DB on the count of nexthop groups used, and it should not increase or decrease + asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", 10) + keys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY") + k = asic_route_exists(keys, base_ipprefix) + assert k is not None + fvs = asic_route_nhg_fvs(k) + assert fvs is not None + k = asic_route_exists(keys, last_ipprefix) + assert k is not None + fvs = asic_route_nhg_fvs(k) + assert fvs is not None