Skip to content

Commit

Permalink
Separate syncd service from swss service (#2051)
Browse files Browse the repository at this point in the history
* [swss.sh] refactor ssh service script code

- Move checks and waits to helper functions.
- Remove early returns from code stream

Signed-off-by: Ying Xie <ying.xie@microsoft.com>

* [swss.sh] Add debug log for service state changes

Signed-off-by: Ying Xie <ying.xie@microsoft.com>

* [syncd] Separate out syncd service from swss service

Still make them start/stop/restart synchronously so existing scripts
continue working.

Signed-off-by: Ying Xie <ying.xie@microsoft.com>

* Remove extra 'After' in swss service and remove syncd docker warm boot code

Syncd warm boot needs more thinking, we can put it back once the work
flow has been defined and ready for coding/testing.

* [syncd] syncd start/stop/restart shouldn't affect swss state

Semi-detach syncd service state change from swss:

- swss state change still chase syncd service to follow except warm boot
- syncd state change will only affect itself.

Signed-off-by: Ying Xie <ying.xie@microsoft.com>

* add missing '{'
  • Loading branch information
yxieca authored Sep 24, 2018
1 parent 715806c commit cfe01f1
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 57 deletions.
3 changes: 2 additions & 1 deletion files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,9 @@ sudo LANG=C chroot $FILESYSTEM_ROOT fuser -km /sys || true
sudo LANG=C chroot $FILESYSTEM_ROOT umount -lf /sys
{% endif %}

# Copy swss service script
# Copy swss and syncd service script
sudo LANG=C cp $SCRIPTS_DIR/swss.sh $FILESYSTEM_ROOT/usr/local/bin/swss.sh
sudo LANG=C cp $SCRIPTS_DIR/syncd.sh $FILESYSTEM_ROOT/usr/local/bin/syncd.sh

# Copy systemd timer configuration
# It implements delayed start of services
Expand Down
5 changes: 0 additions & 5 deletions files/build_templates/swss.service.j2
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,6 @@ Requires=nps-modules-4.9.0-7-amd64.service
{% endif %}
After=database.service updategraph.service
After=interfaces-config.service
{% if sonic_asic_platform == 'broadcom' %}
After=opennsl-modules-4.9.0-7-amd64.service
{% elif sonic_asic_platform == 'nephos' %}
After=nps-modules-4.9.0-7-amd64.service
{% endif %}

[Service]
User=root
Expand Down
24 changes: 24 additions & 0 deletions files/build_templates/syncd.service.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[Unit]
Description=syncd service
Requires=database.service updategraph.service
{% if sonic_asic_platform == 'broadcom' %}
Requires=opennsl-modules-4.9.0-7-amd64.service
{% elif sonic_asic_platform == 'nephos' %}
Requires=nps-modules-4.9.0-7-amd64.service
{% endif %}
After=database.service updategraph.service
After=interfaces-config.service
{% if sonic_asic_platform == 'broadcom' %}
After=opennsl-modules-4.9.0-7-amd64.service
{% elif sonic_asic_platform == 'nephos' %}
After=nps-modules-4.9.0-7-amd64.service
{% endif %}

[Service]
User=root
Environment=sonic_asic_platform={{ sonic_asic_platform }}
ExecStart=/usr/local/bin/syncd.sh start
ExecStop=/usr/local/bin/syncd.sh stop

[Install]
WantedBy=multi-user.target
144 changes: 93 additions & 51 deletions files/scripts/swss.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,56 @@
#!/bin/bash

start() {
SERVICE="swss"
PEER="syncd"
DEBUGLOG="/tmp/swss-syncd-debug.log"
LOCKFILE="/tmp/swss-syncd-lock"

function debug()
{
/bin/echo `date` "- $1" >> ${DEBUGLOG}
}

function lock_service_state_change()
{
debug "Locking ${LOCKFILE} from ${SERVICE} service"

exec {LOCKFD}>${LOCKFILE}
/usr/bin/flock -x ${LOCKFD}
trap "/usr/bin/flock -u ${LOCKFD}" 0 2 3 15

debug "Locked ${LOCKFILE} (${LOCKFD}) from ${SERVICE} service"
}

function unlock_service_state_change()
{
debug "Unlocking ${LOCKFILE} (${LOCKFD}) from ${SERVICE} service"
/usr/bin/flock -u ${LOCKFD}
}

function check_warm_boot()
{
SYSTEM_WARM_START=`/usr/bin/redis-cli -n 4 hget "WARM_RESTART|system" enable`
SERVICE_WARM_START=`/usr/bin/redis-cli -n 4 hget "WARM_RESTART|${SERVICE}" enable`
if [[ x"$SYSTEM_WARM_START" == x"true" ]] || [[ x"$SERVICE_WARM_START" == x"true" ]]; then
WARM_BOOT="true"
else
WARM_BOOT="false"
fi
}

function validate_restart_count()
{
if [[ x"$WARM_BOOT" == x"true" ]]; then
RESTART_COUNT=`/usr/bin/redis-cli -n 6 hget "WARM_RESTART_TABLE|orchagent" restart_count`
# We have to make sure db data has not been flushed.
if [[ -z "$RESTART_COUNT" ]]; then
WARM_BOOT="false"
fi
fi
}

function wait_for_database_service()
{
# Wait for redis server start before database clean
until [[ $(/usr/bin/docker exec database redis-cli ping | grep -c PONG) -gt 0 ]];
do sleep 1;
Expand All @@ -10,66 +60,58 @@ start() {
until [[ $(/usr/bin/docker exec database redis-cli -n 4 GET "CONFIG_DB_INITIALIZED") ]];
do sleep 1;
done
}

SYSTEM_WARM_START=`/usr/bin/docker exec database redis-cli -n 4 HGET "WARM_RESTART|system" enable`
SWSS_WARM_START=`/usr/bin/docker exec database redis-cli -n 4 HGET "WARM_RESTART|swss" enable`
# if warm start enabled, just do swss docker start.
# Don't flush DB or try to start other modules.
if [[ "$SYSTEM_WARM_START" == "true" ]] || [[ "$SWSS_WARM_START" == "true" ]]; then
RESTART_COUNT=`redis-cli -n 6 hget "WARM_RESTART_TABLE|orchagent" restart_count`
# We have to make sure db data has not been flushed.
if [[ -n "$RESTART_COUNT" ]]; then
/usr/bin/swss.sh start
/usr/bin/swss.sh attach
return 0
fi
fi
start() {
debug "Starting ${SERVICE} service..."

lock_service_state_change

wait_for_database_service
check_warm_boot
validate_restart_count

# Flush DB
/usr/bin/docker exec database redis-cli -n 0 FLUSHDB
/usr/bin/docker exec database redis-cli -n 1 FLUSHDB
/usr/bin/docker exec database redis-cli -n 2 FLUSHDB
/usr/bin/docker exec database redis-cli -n 5 FLUSHDB
/usr/bin/docker exec database redis-cli -n 6 FLUSHDB

# platform specific tasks
if [ x$sonic_asic_platform == x'mellanox' ]; then
FAST_BOOT=1
/usr/bin/mst start
/usr/bin/mlnx-fw-upgrade.sh
/etc/init.d/sxdkernel start
/sbin/modprobe i2c-dev
/etc/mlnx/mlnx-hw-management start
elif [ x$sonic_asic_platform == x'cavium' ]; then
/etc/init.d/xpnet.sh start
debug "Warm boot flag: ${SERVICE} ${WARM_BOOT}."

# Don't flush DB during warm boot
if [[ x"$WARM_BOOT" != x"true" ]]; then
/usr/bin/docker exec database redis-cli -n 0 FLUSHDB
/usr/bin/docker exec database redis-cli -n 2 FLUSHDB
/usr/bin/docker exec database redis-cli -n 5 FLUSHDB
/usr/bin/docker exec database redis-cli -n 6 FLUSHDB
fi

# start swss and syncd docker
/usr/bin/swss.sh start
/usr/bin/syncd.sh start
/usr/bin/swss.sh attach
# start service docker
/usr/bin/${SERVICE}.sh start
debug "Started ${SERVICE} service..."

# Unlock has to happen before reaching out to peer service
unlock_service_state_change

if [[ x"$WARM_BOOT" != x"true" ]]; then
/bin/systemctl start ${PEER}
fi
/usr/bin/${SERVICE}.sh attach
}

stop() {
/usr/bin/swss.sh stop
debug "Stopping ${SERVICE} service..."

SYSTEM_WARM_START=`redis-cli -n 4 hget "WARM_RESTART|system" enable`
SWSS_WARM_START=`redis-cli -n 4 hget "WARM_RESTART|swss" enable`
# if warm start enabled, just stop swss docker, then return
if [[ "$SYSTEM_WARM_START" == "true" ]] || [[ "$SWSS_WARM_START" == "true" ]]; then
return 0
fi
[[ -f ${LOCKFILE} ]] || /usr/bin/touch ${LOCKFILE}

lock_service_state_change
check_warm_boot
debug "Warm boot flag: ${SERVICE} ${WARM_BOOT}."

/usr/bin/${SERVICE}.sh stop
debug "Stopped ${SERVICE} service..."

/usr/bin/syncd.sh stop
# Unlock has to happen before reaching out to peer service
unlock_service_state_change

# platform specific tasks
if [ x$sonic_asic_platform == x'mellanox' ]; then
/etc/mlnx/mlnx-hw-management stop
/etc/init.d/sxdkernel stop
/usr/bin/mst stop
elif [ x$sonic_asic_platform == x'cavium' ]; then
/etc/init.d/xpnet.sh stop
/etc/init.d/xpnet.sh start
# if warm start enabled or peer lock exists, don't stop peer service docker
if [[ x"$WARM_BOOT" != x"true" ]]; then
/bin/systemctl stop ${PEER}
fi
}

Expand Down
123 changes: 123 additions & 0 deletions files/scripts/syncd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/bin/bash

SERVICE="syncd"
PEER="swss"
DEBUGLOG="/tmp/swss-syncd-debug.log"
LOCKFILE="/tmp/swss-syncd-lock"

function debug()
{
/bin/echo `date` "- $1" >> ${DEBUGLOG}
}

function lock_service_state_change()
{
debug "Locking ${LOCKFILE} from ${SERVICE} service"

exec {LOCKFD}>${LOCKFILE}
/usr/bin/flock -x ${LOCKFD}
trap "/usr/bin/flock -u ${LOCKFD}" 0 2 3 15

debug "Locked ${LOCKFILE} (${LOCKFD}) from ${SERVICE} service"
}

function unlock_service_state_change()
{
debug "Unlocking ${LOCKFILE} (${LOCKFD}) from ${SERVICE} service"
/usr/bin/flock -u ${LOCKFD}
}

function check_warm_boot()
{
SYSTEM_WARM_START=`/usr/bin/redis-cli -n 4 hget "WARM_RESTART|system" enable`
# SYSTEM_WARM_START could be empty, always make WARM_BOOT meaningful.
if [[ x"$SYSTEM_WARM_START" == x"true" ]]; then
WARM_BOOT="true"
else
WARM_BOOT="false"
fi
}

function wait_for_database_service()
{
# Wait for redis server start before database clean
until [[ $(/usr/bin/docker exec database redis-cli ping | grep -c PONG) -gt 0 ]];
do sleep 1;
done

# Wait for configDB initialization
until [[ $(/usr/bin/docker exec database redis-cli -n 4 GET "CONFIG_DB_INITIALIZED") ]];
do sleep 1;
done
}

start() {
debug "Starting ${SERVICE} service..."

lock_service_state_change

wait_for_database_service
check_warm_boot

debug "Warm boot flag: ${SERVICE} ${WARM_BOOT}."

# Don't flush DB during warm boot
if [[ x"$WARM_BOOT" != x"true" ]]; then
/usr/bin/docker exec database redis-cli -n 1 FLUSHDB

# platform specific tasks
if [ x$sonic_asic_platform == x'mellanox' ]; then
FAST_BOOT=1
/usr/bin/mst start
/usr/bin/mlnx-fw-upgrade.sh
/etc/init.d/sxdkernel start
/sbin/modprobe i2c-dev
/etc/mlnx/mlnx-hw-management start
elif [ x$sonic_asic_platform == x'cavium' ]; then
/etc/init.d/xpnet.sh start
fi
fi

# start service docker
/usr/bin/${SERVICE}.sh start
debug "Started ${SERVICE} service..."

unlock_service_state_change
/usr/bin/${SERVICE}.sh attach
}

stop() {
debug "Stopping ${SERVICE} service..."

lock_service_state_change
check_warm_boot
debug "Warm boot flag: ${SERVICE} ${WARM_BOOT}."

/usr/bin/${SERVICE}.sh stop
debug "Stopped ${SERVICE} service..."

# if warm start enabled, don't stop peer service docker
if [[ x"$WARM_BOOT" != x"true" ]]; then
# platform specific tasks
if [ x$sonic_asic_platform == x'mellanox' ]; then
/etc/mlnx/mlnx-hw-management stop
/etc/init.d/sxdkernel stop
/usr/bin/mst stop
elif [ x$sonic_asic_platform == x'cavium' ]; then
/etc/init.d/xpnet.sh stop
/etc/init.d/xpnet.sh start
fi
fi

unlock_service_state_change
}

case "$1" in
start|stop)
$1
;;
*)
echo "Usage: $0 {start|stop}"
exit 1
;;
esac

0 comments on commit cfe01f1

Please sign in to comment.