diff --git a/dockers/docker-fpm-frr/base_image_files/monit_bgp b/dockers/docker-fpm-frr/base_image_files/monit_bgp index 5dbb794c346b..b9726c619582 100644 --- a/dockers/docker-fpm-frr/base_image_files/monit_bgp +++ b/dockers/docker-fpm-frr/base_image_files/monit_bgp @@ -6,6 +6,7 @@ ## bgpd ## staticd ## bgpcfgd +## bgpmon ############################################################################### check process zebra matching "/usr/lib/frr/zebra" if does not exist for 5 times within 5 cycles then alert @@ -21,3 +22,6 @@ check process staticd matching "/usr/lib/frr/staticd" check process bgpcfgd matching "python /usr/local/bin/bgpcfgd" if does not exist for 5 times within 5 cycles then alert + +check process bgpmon matching "python /usr/local/bin/bgpmon" + if does not exist for 5 times within 5 cycles then alert diff --git a/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 b/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 index 862886b8afae..de6879c43725 100644 --- a/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 +++ b/dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2 @@ -84,6 +84,17 @@ stderr_logfile=syslog dependent_startup=true dependent_startup_wait_for=bgpd:running +[program:bgpmon] +command=/usr/local/bin/bgpmon +priority=6 +autostart=false +autorestart=false +startsecs=0 +stdout_logfile=syslog +stderr_logfile=syslog +dependent_startup=true +dependent_startup_wait_for=bgpd:running + {% if DEVICE_METADATA.localhost.docker_routing_config_mode is defined and DEVICE_METADATA.localhost.docker_routing_config_mode == "unified" %} [program:vtysh_b] command=/usr/bin/vtysh -b diff --git a/src/sonic-bgpcfgd/bgpmon.py b/src/sonic-bgpcfgd/bgpmon.py new file mode 100755 index 000000000000..e4a9561142be --- /dev/null +++ b/src/sonic-bgpcfgd/bgpmon.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python2 + +"""" +Description: bgpmon.py -- populating bgp related information in stateDB. + script is started by supervisord in bgp docker when the docker is started. + + Initial creation of this daemon is to assist SNMP agent in obtaining the + BGP related information for its MIB support. The MIB that this daemon is + assiting is for the CiscoBgp4MIB (Neighbor state only). If there are other + BGP related items that needs to be updated in a periodic manner in the + future, then more can be added into this process. + + The script check if there are any bgp activities by monitoring the bgp + frr.log file timestamp. If activity is detected, then it will request bgp + neighbor state via vtysh cli interface. This bgp activity monitoring is + done periodically (every 15 second). When triggered, it looks specifically + for the neighbor state in the json output of show ip bgp neighbors json + and update the state DB for each neighbor accordingly. + In order to not disturb and hold on to the State DB access too long and + removal of the stale neighbors (neighbors that was there previously on + previous get request but no longer there in the current get request), a + "previous" neighbor dictionary will be kept and used to determine if there + is a need to perform update or the peer is stale to be removed from the + state DB +""" +import commands +import json +import os +import syslog +import swsssdk +import time + +PIPE_BATCH_MAX_COUNT = 50 + +class BgpStateGet(): + def __init__(self): + # list peer_l stores the Neighbor peer Ip address + # dic peer_state stores the Neighbor peer state entries + # list new_peer_l stores the new snapshot of Neighbor peer ip address + # dic new_peer_state stores the new snapshot of Neighbor peer states + self.peer_l = [] + self.peer_state = {} + self.new_peer_l = [] + self.new_peer_state = {} + self.cached_timestamp = 0 + self.db = swsssdk.SonicV2Connector() + self.db.connect(self.db.STATE_DB, False) + client = self.db.get_redis_client(self.db.STATE_DB) + self.pipe = client.pipeline() + self.db.delete_all_by_pattern(self.db.STATE_DB, "NEIGH_STATE_TABLE|*" ) + + # A quick way to check if there are anything happening within BGP is to + # check its log file has any activities. This is by checking its modified + # timestamp against the cached timestamp that we keep and if there is a + # difference, there is activity detected. In case the log file got wiped + # out, it will default back to constant pulling every 15 seconds + def bgp_activity_detected(self): + try: + timestamp = os.stat("/var/log/frr/frr.log").st_mtime + if timestamp != self.cached_timestamp: + self.cached_timestamp = timestamp + return True + else: + return False + except (IOError, OSError): + return True + + def update_new_peer_states(self, peer_dict): + peer_l = peer_dict["peers"].keys() + self.new_peer_l.extend(peer_l) + for i in range (0, len(peer_l)): + self.new_peer_state[peer_l[i]] = peer_dict["peers"][peer_l[i]]["state"] + + # Get a new snapshot of BGP neighbors and store them in the "new" location + def get_all_neigh_states(self): + cmd = "vtysh -c 'show bgp summary json'" + rc, output = commands.getstatusoutput(cmd) + if rc: + syslog.syslog(syslog.LOG_ERR, "*ERROR* Failed with rc:{} when execute: {}".format(rc, cmd)) + return + + peer_info = json.loads(output) + # cmd ran successfully, safe to Clean the "new" lists/dic for new sanpshot + del self.new_peer_l[:] + self.new_peer_state.clear() + for key, value in peer_info.items(): + if key == "ipv4Unicast" or key == "ipv6Unicast": + self.update_new_peer_states(value) + + # This method will take the caller's dictionary which contains the peer state operation + # That need to be updated in StateDB using Redis pipeline. + # The data{} will be cleared at the end of this method before returning to caller. + def flush_pipe(self, data): + """Dump each entry in data{} into State DB via redis pipeline. + Args: + data: Neighbor state in dictionary format + { + 'NEIGH_STATE_TABLE|ip_address_a': {'state':state}, + 'NEIGH_STATE_TABLE|ip_address_b': {'state':state}, + 'NEIGH_STATE_TABLE|ip_address_c': {'state':state}, + 'NEIGH_STATE_TABLE|ip_address_x': None, + 'NEIGH_STATE_TABLE|ip_address_z': None + ... + } + """ + for key, value in data.items(): + if value is None: + # delete case + self.pipe.delete(key) + else: + # Add or Modify case + self.pipe.hmset(key, value) + self.pipe.execute() + data.clear() + + def update_neigh_states(self): + data = {} + for i in range (0, len(self.new_peer_l)): + peer = self.new_peer_l[i] + key = "NEIGH_STATE_TABLE|%s" % peer + if peer in self.peer_l: + # only update the entry if state changed + if self.peer_state[peer] != self.new_peer_state[peer]: + # state changed. Update state DB for this entry + state = self.new_peer_state[peer] + data[key] = {'state':state} + self.peer_state[peer] = state + # remove this neighbor from old list since it is accounted for + self.peer_l.remove(peer) + else: + # New neighbor found case. Add to dictionary and state DB + state = self.new_peer_state[peer] + data[key] = {'state':state} + self.peer_state[peer] = state + if len(data) > PIPE_BATCH_MAX_COUNT: + self.flush_pipe(data) + # Check for stale state entries to be cleaned up + while len(self.peer_l) > 0: + # remove this from the stateDB and the current nighbor state entry + peer = self.peer_l.pop(0) + del_key = "NEIGH_STATE_TABLE|%s" % peer + data[del_key] = None + del self.peer_state[peer] + if len(data) > PIPE_BATCH_MAX_COUNT: + self.flush_pipe(data) + # If anything in the pipeline not yet flushed, flush them now + if len(data) > 0: + self.flush_pipe(data) + # Save the new List + self.peer_l = self.new_peer_l[:] + +def main(): + + print "bgpmon service started" + + try: + bgp_state_get = BgpStateGet() + except Exception as e: + syslog.syslog(syslog.LOG_ERR, "{}: error exit 1, reason {}".format(THIS_MODULE, str(e))) + exit(1) + + # periodically obtain the new neighbor infomraton and update if necessary + while True: + time.sleep(15) + if bgp_state_get.bgp_activity_detected(): + bgp_state_get.get_all_neigh_states() + bgp_state_get.update_neigh_states() + +if __name__ == '__main__': + main() diff --git a/src/sonic-bgpcfgd/setup.py b/src/sonic-bgpcfgd/setup.py index fae1c313850e..2f485592c712 100755 --- a/src/sonic-bgpcfgd/setup.py +++ b/src/sonic-bgpcfgd/setup.py @@ -10,6 +10,11 @@ url='https://github.com/Azure/sonic-buildimage', packages=setuptools.find_packages(), scripts=['bgpcfgd'], + entry_points={ + 'console_scripts': [ + 'bgpmon = bgpmon:main', + ] + }, install_requires=['jinja2>=2.10', 'netaddr', 'pyyaml'], setup_requires=['pytest-runner', 'pytest'], )