Skip to content

Commit 9ed5d1d

Browse files
committed
controller: Stop panicking when unable to create pod.
Currently the controller panic if it encounters an error while trying to create a pod/job for an instance, thus causing an impact far wider than just the failing instance. This should only have impact on the said instance, so as a first step, this commit stop the controller from panicking in this case and just output a log entry. Signed-off-by: Nicolas Belouin <nicolas.belouin@suse.com>
1 parent db62ed8 commit 9ed5d1d

File tree

2 files changed

+49
-8
lines changed

2 files changed

+49
-8
lines changed

controller/src/util/instance_action.rs

+44-7
Original file line numberDiff line numberDiff line change
@@ -419,20 +419,27 @@ pub async fn handle_instance_change(
419419
}
420420
};
421421
if let Some(broker_spec) = &configuration.spec.broker_spec {
422-
match broker_spec {
422+
let instance_change_result = match broker_spec {
423423
BrokerSpec::BrokerPodSpec(p) => {
424424
handle_instance_change_pod(instance, p, action, kube_interface).await
425425
}
426426
BrokerSpec::BrokerJobSpec(j) => {
427427
handle_instance_change_job(
428-
instance.clone(),
428+
instance,
429429
*configuration.metadata.generation.as_ref().unwrap(),
430430
j,
431431
action,
432432
kube_interface,
433433
)
434434
.await
435435
}
436+
};
437+
match instance_change_result {
438+
Ok(_) => Ok(()),
439+
Err(e) => {
440+
error!("Unable to handle Broker action: {:?}", e);
441+
Ok(())
442+
}
436443
}
437444
} else {
438445
Ok(())
@@ -444,7 +451,7 @@ pub async fn handle_instance_change(
444451
/// InstanceAction::Remove => Delete all Jobs labeled with the Instance name
445452
/// InstanceAction::Update => No nothing
446453
pub async fn handle_instance_change_job(
447-
instance: Instance,
454+
instance: &Instance,
448455
config_generation: i64,
449456
job_spec: &JobSpec,
450457
action: &InstanceAction,
@@ -472,7 +479,7 @@ pub async fn handle_instance_change_job(
472479
trace!("handle_instance_change_job - instance added");
473480
let capability_id = format!("{}/{}", AKRI_PREFIX, instance_name);
474481
let new_job = job::create_new_job_from_spec(
475-
&instance,
482+
instance,
476483
OwnershipInfo::new(
477484
OwnershipType::Instance,
478485
instance_name.to_string(),
@@ -841,13 +848,15 @@ mod handle_instance_tests {
841848
new_pod_names: Vec<&'static str>,
842849
new_pod_instance_names: Vec<&'static str>,
843850
new_pod_namespaces: Vec<&'static str>,
851+
new_pod_error: Vec<bool>,
844852
}
845853

846854
fn configure_add_shared_config_a_359973(pod_name: &'static str) -> HandleAdditionWork {
847855
HandleAdditionWork {
848856
new_pod_names: vec![pod_name],
849857
new_pod_instance_names: vec!["config-a-359973"],
850858
new_pod_namespaces: vec!["config-a-namespace"],
859+
new_pod_error: vec![false],
851860
}
852861
}
853862
fn get_config_work() -> HandleConfigWork {
@@ -857,11 +866,12 @@ mod handle_instance_tests {
857866
find_config_result: "../test/json/config-a.json",
858867
}
859868
}
860-
fn configure_add_local_config_a_b494b6() -> HandleAdditionWork {
869+
fn configure_add_local_config_a_b494b6(error: bool) -> HandleAdditionWork {
861870
HandleAdditionWork {
862871
new_pod_names: vec!["config-a-b494b6-pod"],
863872
new_pod_instance_names: vec!["config-a-b494b6"],
864873
new_pod_namespaces: vec!["config-a-namespace"],
874+
new_pod_error: vec![error],
865875
}
866876
}
867877

@@ -873,6 +883,7 @@ mod handle_instance_tests {
873883
work.new_pod_namespaces[i],
874884
AKRI_INSTANCE_LABEL_NAME,
875885
work.new_pod_instance_names[i],
886+
work.new_pod_error[i],
876887
);
877888
}
878889
}
@@ -944,7 +955,33 @@ mod handle_instance_tests {
944955
find_pods_delete_start_time: false,
945956
config_work: get_config_work(),
946957
deletion_work: None,
947-
addition_work: Some(configure_add_local_config_a_b494b6()),
958+
addition_work: Some(configure_add_local_config_a_b494b6(false)),
959+
},
960+
);
961+
run_handle_instance_change_test(
962+
&mut mock,
963+
"../test/json/local-instance.json",
964+
&InstanceAction::Add,
965+
)
966+
.await;
967+
}
968+
969+
#[tokio::test]
970+
async fn test_handle_instance_change_for_add_new_local_instance_error() {
971+
let _ = env_logger::builder().is_test(true).try_init();
972+
973+
let mut mock = MockKubeInterface::new();
974+
configure_for_handle_instance_change(
975+
&mut mock,
976+
&HandleInstanceWork {
977+
find_pods_selector: "akri.sh/instance=config-a-b494b6",
978+
find_pods_result: "../test/json/empty-list.json",
979+
find_pods_phase: None,
980+
find_pods_start_time: None,
981+
find_pods_delete_start_time: false,
982+
config_work: get_config_work(),
983+
deletion_work: None,
984+
addition_work: Some(configure_add_local_config_a_b494b6(true)),
948985
},
949986
);
950987
run_handle_instance_change_test(
@@ -1139,7 +1176,7 @@ mod handle_instance_tests {
11391176
find_pods_delete_start_time: false,
11401177
config_work: get_config_work(),
11411178
deletion_work: None,
1142-
addition_work: Some(configure_add_local_config_a_b494b6()),
1179+
addition_work: Some(configure_add_local_config_a_b494b6(false)),
11431180
},
11441181
);
11451182
run_handle_instance_change_test(

controller/src/util/shared_test_utils.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ pub mod config_for_tests {
253253
pod_namespace: &'static str,
254254
label_id: &'static str,
255255
label_value: &'static str,
256+
error: bool,
256257
) {
257258
trace!("mock.expect_create_pod pod_name:{}", pod_name);
258259
mock.expect_create_pod()
@@ -269,7 +270,10 @@ pub mod config_for_tests {
269270
== label_value
270271
&& namespace == pod_namespace
271272
})
272-
.returning(move |_, _| Ok(()));
273+
.returning(move |_, _| match error {
274+
false => Ok(()),
275+
true => Err(anyhow::format_err!("create pod error")),
276+
});
273277
}
274278

275279
pub fn configure_remove_pod(

0 commit comments

Comments
 (0)