diff --git a/.gitignore b/.gitignore index 069e663c1966..98ef49099546 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ src/sonic-device-data/src/device/ src/sonic-device-data/src/debian/ src/supervisor/* !src/supervisor/Makefile +!src/supervisor/patch/ src/thrift/* !src/thrift/Makefile diff --git a/rules/supervisor.mk b/rules/supervisor.mk index cedacd96373e..7e7466bbad86 100644 --- a/rules/supervisor.mk +++ b/rules/supervisor.mk @@ -1,5 +1,9 @@ # supervisor package -SUPERVISOR = python-supervisor_3.3.2-1_all.deb +SUPERVISOR_VERSION = 3.3.2 + +export SUPERVISOR_VERSION + +SUPERVISOR = python-supervisor_$(SUPERVISOR_VERSION)-1_all.deb $(SUPERVISOR)_SRC_PATH = $(SRC_PATH)/supervisor SONIC_MAKE_DEBS += $(SUPERVISOR) diff --git a/src/supervisor/Makefile b/src/supervisor/Makefile index 778903194ffa..3f989c3d0173 100644 --- a/src/supervisor/Makefile +++ b/src/supervisor/Makefile @@ -2,7 +2,7 @@ SHELL = /bin/bash .SHELLFLAGS += -e -MAIN_TARGET = python-supervisor_3.3.2-1_all.deb +MAIN_TARGET = python-supervisor_$(SUPERVISOR_VERSION)-1_all.deb $(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% : # Remove any stale files @@ -11,7 +11,15 @@ $(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% : # Clone supervisor repo git clone https://github.com/Supervisor/supervisor.git pushd ./supervisor - git checkout -f 3.3.2 + + # Reset HEAD to the commit of the proper tag + # NOTE: Using "git checkout " here detaches our HEAD, + # which stg doesn't like, so we use this method instead + git reset --hard $(SUPERVISOR_VERSION) + + # Apply patches + stg init + stg import -s ../patch/series # Build Python and Debian package python setup.py --command-packages=stdeb.command bdist_deb diff --git a/src/supervisor/patch/0001-Prevent-excessive-hanging-delays-in-process-state-tr.patch b/src/supervisor/patch/0001-Prevent-excessive-hanging-delays-in-process-state-tr.patch new file mode 100644 index 000000000000..c0263e9baf10 --- /dev/null +++ b/src/supervisor/patch/0001-Prevent-excessive-hanging-delays-in-process-state-tr.patch @@ -0,0 +1,83 @@ +From bac496af4c2051b3c15038e06533f9094b1c3604 Mon Sep 17 00:00:00 2001 +From: Joe LeVeque +Date: Tue, 16 Jan 2018 20:42:13 +0000 +Subject: [PATCH 1/2] Prevent excessive hanging/delays in process state + transitions if the system time rolls backward + +--- + supervisor/process.py | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +diff --git a/supervisor/process.py b/supervisor/process.py +index f9ddcd9..216372f 100644 +--- a/supervisor/process.py ++++ b/supervisor/process.py +@@ -357,6 +357,12 @@ class Subprocess: + """ Log a 'waiting for x to stop' message with throttling. """ + if self.state == ProcessStates.STOPPING: + now = time.time() ++ ++ # If the system clock has moved backward, reset ++ # self.laststopreport to current system time ++ if now < self.laststopreport: ++ self.laststopreport = now; ++ + if now > (self.laststopreport + 2): # every 2 seconds + self.config.options.logger.info( + 'waiting for %s to stop' % self.config.name) +@@ -604,12 +610,27 @@ class Subprocess: + # STOPPED -> STARTING + self.spawn() + elif state == ProcessStates.BACKOFF: ++ if self.delay > 0 and now < self.delay - self.backoff: ++ # The system clock appears to have moved backward ++ # Reset self.delay accordingly ++ self.delay = now + self.backoff ++ + if self.backoff <= self.config.startretries: + if now > self.delay: + # BACKOFF -> STARTING + self.spawn() + + if state == ProcessStates.STARTING: ++ if now < self.laststart: ++ # The system clock appears to have moved backward ++ # Reset self.laststart to current system time ++ self.laststart = now; ++ ++ if self.delay > 0 and now < self.delay - self.config.startsecs: ++ # The system clock appears to have moved backward ++ # Reset self.delay accordingly ++ self.delay = now + self.config.startsecs ++ + if now - self.laststart > self.config.startsecs: + # STARTING -> RUNNING if the proc has started + # successfully and it has stayed up for at least +@@ -633,6 +654,11 @@ class Subprocess: + logger.info('gave up: %s %s' % (self.config.name, msg)) + + elif state == ProcessStates.STOPPING: ++ if self.delay > 0 and now < self.delay - self.config.stopwaitsecs: ++ # The system clock appears to have moved backward ++ # Reset self.delay accordingly ++ self.delay = now + self.config.stopwaitsecs ++ + time_left = self.delay - now + if time_left <= 0: + # kill processes which are taking too long to stop with a final +@@ -805,6 +831,12 @@ class EventListenerPool(ProcessGroupBase): + if dispatch_capable: + if self.dispatch_throttle: + now = time.time() ++ ++ # If the system clock has moved backward, reset ++ # self.last_dispatch to current system time ++ if now < self.last_dispatch: ++ self.last_dispatch = now; ++ + if now - self.last_dispatch < self.dispatch_throttle: + return + self.dispatch() +-- +2.1.4 + diff --git a/src/supervisor/patch/0002-Add-unit-test-cases-to-ensure-state-transitions-work.patch b/src/supervisor/patch/0002-Add-unit-test-cases-to-ensure-state-transitions-work.patch new file mode 100644 index 000000000000..cb01665ba80b --- /dev/null +++ b/src/supervisor/patch/0002-Add-unit-test-cases-to-ensure-state-transitions-work.patch @@ -0,0 +1,184 @@ +From 43b6f3ee93593f666af2c6a7af897bf3b5ee762d Mon Sep 17 00:00:00 2001 +From: Joe LeVeque +Date: Wed, 17 Jan 2018 22:27:56 +0000 +Subject: [PATCH 2/2] Add unit test cases to ensure state transitions work if + system time has rolled backward + +--- + supervisor/tests/test_process.py | 146 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 146 insertions(+) + +diff --git a/supervisor/tests/test_process.py b/supervisor/tests/test_process.py +index 7f9bab2..ea16315 100644 +--- a/supervisor/tests/test_process.py ++++ b/supervisor/tests/test_process.py +@@ -736,6 +736,40 @@ class SubprocessTests(unittest.TestCase): + instance.stop_report() + self.assertEqual(len(options.logger.data), 1) # throttled + ++ def test_stop_report_laststopreport_in_future(self): ++ future_time = time.time() + 3600 # 1 hour into the future ++ options = DummyOptions() ++ config = DummyPConfig(options, 'test', '/test') ++ instance = self._makeOne(config) ++ instance.pid = 11 ++ dispatcher = DummyDispatcher(writable=True) ++ instance.dispatchers = {'foo':dispatcher} ++ from supervisor.states import ProcessStates ++ instance.state = ProcessStates.STOPPING ++ instance.laststopreport = future_time ++ ++ # This iteration of stop_report() should reset instance.laststopreport ++ # to the current time ++ instance.stop_report() ++ ++ # No logging should have taken place ++ self.assertEqual(len(options.logger.data), 0) ++ ++ # Ensure instance.laststopreport has rolled backward ++ self.assertTrue(instance.laststopreport < future_time) ++ ++ # Sleep for 2 seconds ++ time.sleep(2) ++ ++ # This iteration of stop_report() should actaully trigger the report ++ instance.stop_report() ++ ++ self.assertEqual(len(options.logger.data), 1) ++ self.assertEqual(options.logger.data[0], 'waiting for test to stop') ++ self.assertNotEqual(instance.laststopreport, 0) ++ instance.stop_report() ++ self.assertEqual(len(options.logger.data), 1) # throttled ++ + def test_give_up(self): + options = DummyOptions() + config = DummyPConfig(options, 'test', '/test') +@@ -1402,6 +1436,92 @@ class SubprocessTests(unittest.TestCase): + event = L[0] + self.assertEqual(event.__class__, events.ProcessStateRunningEvent) + ++ def test_transition_starting_to_running_laststart_in_future(self): ++ from supervisor import events ++ L = [] ++ events.subscribe(events.ProcessStateEvent, lambda x: L.append(x)) ++ from supervisor.states import ProcessStates ++ ++ future_time = time.time() + 3600 # 1 hour into the future ++ options = DummyOptions() ++ test_startsecs = 2 ++ ++ # this should go from STARTING to RUNNING via transition() ++ pconfig = DummyPConfig(options, 'process', 'process','/bin/process', ++ startsecs=test_startsecs) ++ process = self._makeOne(pconfig) ++ process.backoff = 1 ++ process.delay = 1 ++ process.system_stop = False ++ process.laststart = future_time ++ process.pid = 1 ++ process.stdout_buffer = 'abc' ++ process.stderr_buffer = 'def' ++ process.state = ProcessStates.STARTING ++ ++ # This iteration of transition() should reset process.laststart ++ # to the current time ++ process.transition() ++ ++ # Process state should still be STARTING ++ self.assertEqual(process.state, ProcessStates.STARTING) ++ ++ # Ensure process.laststart has rolled backward ++ self.assertTrue(process.laststart < future_time) ++ ++ # Sleep for (startsecs + 1) ++ time.sleep(test_startsecs + 1) ++ ++ # This iteration of transition() should actaully trigger the state ++ # transition to RUNNING ++ process.transition() ++ ++ # this implies RUNNING ++ self.assertEqual(process.backoff, 0) ++ self.assertEqual(process.delay, 0) ++ self.assertFalse(process.system_stop) ++ self.assertEqual(process.state, ProcessStates.RUNNING) ++ self.assertEqual(options.logger.data[0], ++ 'success: process entered RUNNING state, process has ' ++ 'stayed up for > than {} seconds (startsecs)'.format(test_startsecs)) ++ self.assertEqual(len(L), 1) ++ event = L[0] ++ self.assertEqual(event.__class__, events.ProcessStateRunningEvent) ++ ++ def test_transition_backoff_to_starting_delay_in_future(self): ++ from supervisor import events ++ L = [] ++ events.subscribe(events.ProcessStateEvent, lambda x: L.append(x)) ++ from supervisor.states import ProcessStates, SupervisorStates ++ ++ future_time = time.time() + 3600 # 1 hour into the future ++ options = DummyOptions() ++ ++ pconfig = DummyPConfig(options, 'process', 'process','/bin/process') ++ process = self._makeOne(pconfig) ++ process.laststart = 1 ++ process.delay = future_time ++ process.backoff = 0 ++ process.state = ProcessStates.BACKOFF ++ ++ # This iteration of transition() should reset process.delay ++ # to the current time ++ process.transition() ++ ++ # Process state should still be BACKOFF ++ self.assertEqual(process.state, ProcessStates.BACKOFF) ++ ++ # Ensure process.delay has rolled backward ++ self.assertTrue(process.delay < future_time) ++ ++ # This iteration of transition() should actaully trigger the state ++ # transition to STARTING ++ process.transition() ++ ++ self.assertEqual(process.state, ProcessStates.STARTING) ++ self.assertEqual(len(L), 1) ++ self.assertEqual(L[0].__class__, events.ProcessStateStartingEvent) ++ + def test_transition_backoff_to_fatal(self): + from supervisor import events + L = [] +@@ -2033,6 +2153,32 @@ class EventListenerPoolTests(ProcessGroupBaseTests): + self.assertEqual(process1.listener_state, EventListenerStates.BUSY) + self.assertEqual(process1.event, event) + ++ def test_transition_event_proc_running_with_dispatch_throttle_last_dispatch_in_future(self): ++ future_time = time.time() + 3600 # 1 hour into the future ++ options = DummyOptions() ++ from supervisor.states import ProcessStates ++ pconfig1 = DummyPConfig(options, 'process1', 'process1','/bin/process1') ++ process1 = DummyProcess(pconfig1, state=ProcessStates.RUNNING) ++ gconfig = DummyPGroupConfig(options, pconfigs=[pconfig1]) ++ pool = self._makeOne(gconfig) ++ pool.dispatch_throttle = 5 ++ pool.last_dispatch = future_time ++ pool.processes = {'process1': process1} ++ event = DummyEvent() ++ from supervisor.states import EventListenerStates ++ process1.listener_state = EventListenerStates.READY ++ class DummyGroup: ++ config = gconfig ++ process1.group = DummyGroup ++ pool._acceptEvent(event) ++ pool.transition() ++ ++ self.assertEqual(process1.transitioned, True) ++ self.assertEqual(pool.event_buffer, [event]) # not popped ++ ++ # Ensure pool.last_dispatch has been rolled backward ++ self.assertTrue(pool.last_dispatch < future_time) ++ + def test__dispatchEvent_notready(self): + options = DummyOptions() + from supervisor.states import ProcessStates +-- +2.1.4 + diff --git a/src/supervisor/patch/series b/src/supervisor/patch/series new file mode 100644 index 000000000000..d141fddf3301 --- /dev/null +++ b/src/supervisor/patch/series @@ -0,0 +1,2 @@ +0001-Prevent-excessive-hanging-delays-in-process-state-tr.patch +0002-Add-unit-test-cases-to-ensure-state-transitions-work.patch