Skip to content

Commit

Permalink
Fix double transition to INSPECTFAIL on aborting in-band inspection
Browse files Browse the repository at this point in the history
Both the driver and the conductor code try to transition the node to
INSPECTFAIL, with the 2nd attempt failing. Rework the driver code to
only do implementation-specific clean-up. Also safeguard the conductor
code against this case.

Change-Id: Ie1c64b4807ecf29fa0da54501798d363675977c8
(cherry picked from commit 8a6b5eb)
  • Loading branch information
dtantsur committed Sep 25, 2024
1 parent 19de7ae commit 99c786d
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 16 deletions.
5 changes: 4 additions & 1 deletion ironic/conductor/inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@ def abort_inspection(task):
error=True,
user=task.context.user_id)
utils.wipe_token_and_url(task)
task.process_event('abort')
if task.node.provision_state != states.INSPECTFAIL:
task.process_event('abort')
else:
task.node.save()
LOG.info('Successfully aborted inspection of node %(node)s',
{'node': node.uuid})

Expand Down
8 changes: 4 additions & 4 deletions ironic/drivers/modules/inspector/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ def abort(self, task):
:param task: a task from TaskManager.
"""
error = _("By request, the inspection operation has been aborted")
inspect_utils.clear_lookup_addresses(task.node)
common.inspection_error_handler(task, error, raise_exc=False,
clean_up=True)
if inspect_utils.clear_lookup_addresses(task.node):
task.node.save()

common.clean_up(task, finish=False)

def continue_inspection(self, task, inventory, plugin_data):
"""Continue in-band hardware inspection.
Expand Down
42 changes: 31 additions & 11 deletions ironic/tests/unit/conductor/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8296,14 +8296,14 @@ def test_remove_node_traits_node_trait_not_found(self):


@mgr_utils.mock_record_keepalive
@mock.patch('ironic.drivers.modules.fake.FakeInspect.abort', autospec=True)
@mock.patch('ironic.conductor.task_manager.acquire', autospec=True)
class DoNodeInspectAbortTestCase(mgr_utils.CommonMixIn,
mgr_utils.ServiceSetUpMixin,
db_base.DbTestCase):
@mock.patch.object(inspection, 'LOG', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeInspect.abort', autospec=True)
@mock.patch('ironic.conductor.task_manager.acquire', autospec=True)
def test_do_inspect_abort_interface_not_support(self, mock_acquire,
mock_abort, mock_log):
def test_do_inspect_abort_interface_not_support(self, mock_log,
mock_acquire, mock_abort):
node = obj_utils.create_test_node(self.context,
driver='fake-hardware',
provision_state=states.INSPECTWAIT)
Expand All @@ -8321,10 +8321,9 @@ def test_do_inspect_abort_interface_not_support(self, mock_acquire,
self.assertTrue(mock_log.error.called)

@mock.patch.object(inspection, 'LOG', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeInspect.abort', autospec=True)
@mock.patch('ironic.conductor.task_manager.acquire', autospec=True)
def test_do_inspect_abort_interface_return_failed(self, mock_acquire,
mock_abort, mock_log):
def test_do_inspect_abort_interface_return_failed(self, mock_log,
mock_acquire,
mock_abort):
mock_abort.side_effect = exception.IronicException('Oops')
self._start_service()
node = obj_utils.create_test_node(self.context,
Expand All @@ -8340,8 +8339,6 @@ def test_do_inspect_abort_interface_return_failed(self, mock_acquire,
self.assertTrue(mock_log.exception.called)
self.assertIn('Failed to abort inspection', node.last_error)

@mock.patch('ironic.drivers.modules.fake.FakeInspect.abort', autospec=True)
@mock.patch('ironic.conductor.task_manager.acquire', autospec=True)
def test_do_inspect_abort_succeeded(self, mock_acquire, mock_abort):
self._start_service()
node = obj_utils.create_test_node(
Expand All @@ -8355,7 +8352,30 @@ def test_do_inspect_abort_succeeded(self, mock_acquire, mock_abort):
self.service.do_provisioning_action(self.context, task.node.uuid,
"abort")
node.refresh()
self.assertEqual('inspect failed', node.provision_state)
self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertIn('Inspection was aborted', node.last_error)
self.assertNotIn('agent_url', node.driver_internal_info)
self.assertNotIn('agent_secret_token', node.driver_internal_info)

def test_do_inspect_abort_state_set_by_driver(self, mock_acquire,
mock_abort):
def abort(driver, task):
task.process_event('abort')

mock_abort.side_effect = abort
self._start_service()
node = obj_utils.create_test_node(
self.context,
driver='fake-hardware',
provision_state=states.INSPECTWAIT,
driver_internal_info={'agent_url': 'url',
'agent_secret_token': 'token'})
task = task_manager.TaskManager(self.context, node.uuid)
mock_acquire.side_effect = self._get_acquire_side_effect(task)
self.service.do_provisioning_action(self.context, task.node.uuid,
"abort")
node.refresh()
self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertIn('Inspection was aborted', node.last_error)
self.assertNotIn('agent_url', node.driver_internal_info)
self.assertNotIn('agent_secret_token', node.driver_internal_info)
Expand Down
40 changes: 40 additions & 0 deletions ironic/tests/unit/drivers/modules/inspector/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,43 @@ def test(self, mock_inspection_hooks, mock_tear_down):
self.assertEqual(states.INSPECTING, task.node.provision_state)

self.assertIsNone(result)


@mock.patch.object(common, 'tear_down_managed_boot', autospec=True)
class AbortInspectionTestCase(db_base.DbTestCase):
def setUp(self):
super().setUp()
CONF.set_override('enabled_inspect_interfaces',
['agent', 'no-inspect'])
self.node = obj_utils.create_test_node(
self.context,
driver_internal_info={
'lookup_bmc_addresses': [42],
},
inspect_interface='agent',
provision_state=states.INSPECTWAIT)
self.iface = inspector.AgentInspect()

def test_success(self, mock_tear_down):
mock_tear_down.return_value = None
with task_manager.acquire(self.context, self.node.id) as task:
self.iface.abort(task)
mock_tear_down.assert_called_once_with(task)

self.node.refresh()
# Keep the state - it will be changed on the conductor level
self.assertEqual(states.INSPECTWAIT, self.node.provision_state)
self.assertNotIn('lookup_bmc_addresses',
self.node.driver_internal_info)

def test_cleanup_failed(self, mock_tear_down):
mock_tear_down.return_value = ['boom']
with task_manager.acquire(self.context, self.node.id) as task:
self.iface.abort(task)
mock_tear_down.assert_called_once_with(task)

self.node.refresh()
self.assertEqual(states.INSPECTFAIL, self.node.provision_state)
self.assertIn('boom', self.node.last_error)
self.assertNotIn('lookup_bmc_addresses',
self.node.driver_internal_info)
5 changes: 5 additions & 0 deletions releasenotes/notes/inspect-abort-8add5e6e6b599357.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
fixes:
- |
Fixes aborting in-band inspection. Previously, it would fail with
``Can not transition from state 'inspect failed' on event 'abort'``.

0 comments on commit 99c786d

Please sign in to comment.