Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testing - Output Argo workflow information when the workflow times out #2176

Merged
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 43 additions & 24 deletions test/check-argo-status.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,45 +14,64 @@
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex
set -e
Ark-kun marked this conversation as resolved.
Show resolved Hide resolved

ARTIFACT_DIR=$WORKSPACE/_artifacts
WORKFLOW_COMPLETE_KEYWORD="completed=true"
WORKFLOW_FAILED_KEYWORD="phase=Failed"
PULL_ARGO_WORKFLOW_STATUS_MAX_ATTEMPT=$(expr $TIMEOUT_SECONDS / 20 )

workflow_completed=false
workflow_failed=false

echo "check status of argo workflow $ARGO_WORKFLOW...."
# probing the argo workflow status until it completed. Timeout after 30 minutes
for i in $(seq 1 ${PULL_ARGO_WORKFLOW_STATUS_MAX_ATTEMPT})
do
WORKFLOW_STATUS=`kubectl get workflow $ARGO_WORKFLOW -n ${NAMESPACE} --show-labels 2>&1` \
|| echo kubectl get workflow failed with "$WORKFLOW_STATUS" # Tolerate temporary network failure during kubectl get workflow
echo $WORKFLOW_STATUS | grep ${WORKFLOW_COMPLETE_KEYWORD} && s=0 && break || s=$? && printf "Workflow ${ARGO_WORKFLOW} is not finished.\n${WORKFLOW_STATUS}\nSleep for 20 seconds...\n" && sleep 20
if echo $WORKFLOW_STATUS | grep "${WORKFLOW_COMPLETE_KEYWORD}" --quiet; then
workflow_completed=true
if echo $WORKFLOW_STATUS | grep "${WORKFLOW_FAILED_KEYWORD}" --quiet; then
workflow_failed=true
fi
break
else
echo "Workflow ${ARGO_WORKFLOW} is not finished: ${WORKFLOW_STATUS} - Sleep for 20 seconds..."
sleep 20
fi
done

# Check whether the argo workflow finished or not and exit if not.
if [[ $s != 0 ]]; then
echo "Prow job Failed: Argo workflow timeout.."
argo logs -w ${ARGO_WORKFLOW} -n ${NAMESPACE}
exit $s
if [[ "$workflow_completed" == "true" ]] && [[ "$workflow_failed" == "false" ]]; then
echo "Argo workflow finished successfully."
if [[ -n "$TEST_RESULT_FOLDER" ]]; then
echo "Copy test result"
mkdir -p "$ARTIFACT_DIR"
gsutil cp -r "${TEST_RESULTS_GCS_DIR}"/* "${ARTIFACT_DIR}" || true
fi
argo get "${ARGO_WORKFLOW}" -n "${NAMESPACE}"
exit 0
Ark-kun marked this conversation as resolved.
Show resolved Hide resolved
fi

echo "Argo workflow finished."

if [[ ! -z "$TEST_RESULT_FOLDER" ]]
then
echo "Copy test result"
mkdir -p $ARTIFACT_DIR
gsutil cp -r "${TEST_RESULTS_GCS_DIR}"/* "${ARTIFACT_DIR}" || true
fi

if [[ $WORKFLOW_STATUS = *"${WORKFLOW_FAILED_KEYWORD}"* ]]; then
echo "Test workflow failed."
echo "=========Argo Workflow Logs========="
argo logs -w ${ARGO_WORKFLOW} -n ${NAMESPACE}
echo "===================================="
argo get ${ARGO_WORKFLOW} -n ${NAMESPACE}
exit 1
# Handling failed workflow
if [[ "$workflow_completed" == "false" ]]; then
echo "Argo workflow timed out."
else
argo get ${ARGO_WORKFLOW} -n ${NAMESPACE}
echo "Argo workflow failed."
fi

echo "=========Argo Workflow Logs========="
argo logs -w "${ARGO_WORKFLOW}" -n "${NAMESPACE}"

echo "========All workflows============="

argo --namespace "${NAMESPACE}" list --output=name |
while read workflow_id; do
echo "========${workflow_id}============="
argo get "${workflow_id}" -n "${NAMESPACE}"
done

echo "=========Main workflow=============="
argo get "${ARGO_WORKFLOW}" -n "${NAMESPACE}"

exit 1