Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid error when JTs deleted while task manager running #6084

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion awx/main/scheduler/dag_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,13 @@ def _init_graph(self, workflow_job_or_jt):

wfn_by_id = dict()

for workflow_node in workflow_nodes.all():
# Intentionally prefetch related jobs and templates so that if they
# are deleted while task manager runs, it will not cause an error
if workflow_nodes.model == WorkflowJobNode:
node_qs = workflow_nodes.prefetch_related('job', 'unified_job_template').all()
else:
node_qs = workflow_nodes.prefetch_related('unified_job_template').all()
for workflow_node in node_qs:
wfn_by_id[workflow_node.id] = workflow_node
self.add_node(workflow_node)

Expand Down
20 changes: 20 additions & 0 deletions awx/main/tests/functional/models/test_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,3 +418,23 @@ def test_workflow_ancestors_recursion_prevention(organization):
)
# mostly, we just care that this assertion finishes in finite time
assert wfj.get_ancestor_workflows() == []


@pytest.mark.django_db
@pytest.mark.parametrize('related', ['template', 'job'])
def test_deleted_related_race_condition(related):
wfj = WorkflowJob.objects.create()
jt = JobTemplate.objects.create(name='test-jt')
# FIXME: running and successful status also results in node
# considered failed when it has not UJT
job = jt.create_job(_eager_fields=dict(status='failed'))
WorkflowJobNode.objects.create(
workflow_job=wfj, unified_job_template=jt, job=job
)
dag = WorkflowDAG(workflow_job=wfj)
if related == 'template':
JobTemplate.objects.get(pk=jt.pk).delete()
elif related == 'job':
Job.objects.get(pk=job.pk).delete()
has_failed, reason = dag.has_workflow_failed()
assert has_failed is True, reason