forked from ofiwg/libfabric
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Seth Zegelstein <szegel@amazon.com>
- Loading branch information
1 parent
9a049b8
commit 7175ff6
Showing
2 changed files
with
265 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only */ | ||
/* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ | ||
// Use milestones to abort old builds when the user force pushes | ||
def buildNumber = env.BUILD_NUMBER as int | ||
if (buildNumber > 1) milestone(buildNumber - 1) | ||
milestone(buildNumber) | ||
|
||
pipeline { | ||
agent { | ||
ecs { | ||
inheritFrom 'fargate-large' | ||
} | ||
} | ||
options { | ||
buildDiscarder(logRotator(daysToKeepStr: "90")) | ||
timeout(time: 8, unit: 'HOURS') | ||
} | ||
environment { | ||
// AWS region where the cluster is created | ||
REGION="us-west-2" | ||
} | ||
stages { | ||
// Cleanup workspace before job start. | ||
stage("Clean up workspace") { | ||
steps{ | ||
deleteDir() | ||
} | ||
} | ||
stage("Checkout SCM repo") { | ||
steps { | ||
checkout scm | ||
} | ||
} | ||
stage("Download and extract PortaFiducia") { | ||
steps { | ||
script { | ||
sh 'printenv' | ||
def common = load "contrib/aws/common.groovy" | ||
common.download_and_extract_portafiducia('PortaFiducia') | ||
} | ||
} | ||
} | ||
stage("Install PortaFiducia") { | ||
steps { | ||
script { | ||
def common = load "contrib/aws/common.groovy" | ||
common.install_porta_fiducia() | ||
} | ||
|
||
} | ||
} | ||
stage("Test EFA provider") { | ||
steps { | ||
script { | ||
def common = load "contrib/aws/common.groovy" | ||
def stages = [:] | ||
// This needs the extra space at the end | ||
def addl_args_pr = "--test-libfabric-pr $env.CHANGE_ID " | ||
|
||
// Single Node Tests - EFA | ||
stages["1_g4dn_alinux2-efa"] = common.get_test_stage("1_g4dn_alinux2_efa", env.BUILD_TAG, "alinux2", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr) | ||
stages["1_g4dn_ubuntu2004-efa"] = common.get_test_stage("1_g4dn_ubuntu2004_efa", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr) | ||
stages["1_g4dn_rhel8-efa"] = common.get_test_stage("1_g4dn_rhel8_efa", env.BUILD_TAG, "rhel8", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr) | ||
stages["1_g4dn_centos7-efa"] = common.get_test_stage("1_g4dn_centos7_efa", env.BUILD_TAG, "centos7", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr) | ||
|
||
// Single Node Tests - SHM | ||
stages["1_g4dn_alinux2_shm"] = common.get_test_stage("1_g4dn_alinux2_shm", env.BUILD_TAG, "alinux2", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm") | ||
stages["1_g4dn_ubuntu2004_shm"] = common.get_test_stage("1_g4dn_ubuntu2004_shm", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm") | ||
stages["1_g4dn_rhel8_shm"] = common.get_test_stage("1_g4dn_rhel8_shm", env.BUILD_TAG, "rhel8", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm") | ||
stages["1_g4dn_centos7_shm"] = common.get_test_stage("1_g4dn_centos7_shm", env.BUILD_TAG, "centos7", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm") | ||
stages["1_g4dn_ubuntu2004_shm_disable-cma"] = common.get_test_stage("1_g4dn_ubuntu2004_shm_disable-cma", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm --enable-cma false") | ||
|
||
// TODO: Get Single Node Windows test working | ||
// stages["EFA_Windows_Test"] = common.get_single_node_windows_test_stage("EFA_Windows_Test") | ||
|
||
// Multi Node Tests - EFA | ||
stages["2_hpc6a_alinux2_efa"] = common.get_test_stage("2_hpc6a_alinux2_efa", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr) | ||
stages["2_hpc6a_ubuntu2004_efa"] = common.get_test_stage("2_hpc6a_ubuntu2004_efa", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr) | ||
stages["2_hpc6a_rhel8_efa"] = common.get_test_stage("2_hpc6a_rhel8_efa", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr) | ||
|
||
// Multi Node Tests - TCP | ||
stages["2_hpc6a_alinux2_tcp"] = common.get_test_stage("2_hpc6a_alinux2_tcp", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp") | ||
stages["2_hpc6a_ubuntu2004_tcp"] = common.get_test_stage("2_hpc6a_ubuntu2004_tcp", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp") | ||
stages["2_hpc6a_rhel8_tcp"] = common.get_test_stage("2_hpc6a_rhel8_tcp", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp") | ||
|
||
// Multi Node Tests - SOCKETS | ||
stages["2_hpc6a_alinux2_sockets"] = common.get_test_stage("2_hpc6a_alinux2_sockets", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets") | ||
stages["2_hpc6a_ubuntu2004_sockets"] = common.get_test_stage("2_hpc6a_ubuntu2004_sockets", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets") | ||
stages["2_hpc6a_rhel8_sockets"] = common.get_test_stage("2_hpc6a_rhel8_sockets", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets") | ||
|
||
parallel stages | ||
} | ||
} | ||
} | ||
stage('check build_ok') { | ||
steps { | ||
script { | ||
def common = load "contrib/aws/common.groovy" | ||
if (common.build_ok) { | ||
currentBuild.result = "SUCCESS" | ||
} | ||
else { | ||
currentBuild.result = "FAILURE" | ||
} | ||
} | ||
} | ||
} | ||
} | ||
post { | ||
always { | ||
sh 'find PortaFiducia/tests/outputs -name "*.xml" | xargs du -shc' | ||
junit testResults: 'PortaFiducia/tests/outputs/**/*.xml', keepLongStdio: false | ||
archiveArtifacts artifacts: 'PortaFiducia/tests/outputs/**/*.*' | ||
} | ||
failure { | ||
sh ''' | ||
echo FAILURE | ||
''' | ||
} | ||
aborted { | ||
sh '. venv/bin/activate; ./PortaFiducia/scripts/delete_manual_cluster.py --cluster-name "$BUILD_TAG"\'*\' --region $REGION' | ||
} | ||
// Cleanup workspace after job completes. | ||
cleanup { | ||
deleteDir() | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only */ | ||
/* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ | ||
/* This file contains variables and functions that can be shared across different jobs */ | ||
import groovy.transform.Field | ||
@Field boolean build_ok = true | ||
|
||
def get_portafiducia_download_path() { | ||
/* Stable Portafiducia tarball */ | ||
def AWS_ACCOUNT_ID = sh ( | ||
script: "aws sts get-caller-identity --query Account --output text | tr -dc 0-9", | ||
returnStdout: true | ||
) | ||
return "s3://libfabric-ci-$AWS_ACCOUNT_ID-us-west-2/portafiducia/portafiducia.tar.gz" | ||
} | ||
|
||
def download_and_extract_portafiducia(outputDir) { | ||
/* Download PortaFiducia tarball from S3 and extract to outputDir */ | ||
def tempPath = "/tmp/portafiducia.tar.gz" | ||
def downloadPath = this.get_portafiducia_download_path() | ||
|
||
def ret = sh ( | ||
script: "mkdir -p ${outputDir} && aws s3 cp ${downloadPath} ${tempPath} && " + | ||
"tar xf ${tempPath} -C ${outputDir}", | ||
returnStatus: true, | ||
) | ||
|
||
if (ret != 0) { | ||
unstable('Failed to download and extract PortaFiducia') | ||
} | ||
} | ||
|
||
def install_porta_fiducia() { | ||
/* | ||
* Install PortaFiducia in a (new) virtual environment. | ||
*/ | ||
sh ''' | ||
python3 -m venv venv | ||
. venv/bin/activate | ||
pip install --upgrade pip | ||
pip install --upgrade awscli | ||
pip install -e PortaFiducia | ||
''' | ||
} | ||
|
||
def run_test_orchestrator_once(run_name, build_tag, os, instance_type, instance_count, region, test_config_file, addl_args) { | ||
/* | ||
* Run PortaFiducia/tests/test_orchestrator.py with given command line arguments | ||
* param@ args: str, the command line arguments | ||
*/ | ||
def cluster_name = get_cluster_name(build_tag, os, instance_type) | ||
def args = "--config configs/${test_config_file} --os ${os} --instance-type ${instance_type} --instance-count ${instance_count} --region ${region} --cluster-name ${cluster_name} ${addl_args} --junit-xml outputs/${cluster_name}.xml" | ||
def ret = sh ( | ||
script: ". venv/bin/activate; cd PortaFiducia/tests && ./test_orchestrator.py ${args}", | ||
returnStatus: true | ||
) | ||
if (ret == 65) | ||
unstable('Scripts exited with status 65') | ||
else if (ret != 0) | ||
build_ok = false | ||
catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { | ||
sh "exit ${ret}" | ||
} | ||
} | ||
|
||
def get_random_string(len) { | ||
def s = sh ( | ||
script: "cat /dev/urandom | LC_ALL=C tr -dc A-Za-z0-9 | head -c ${len}", | ||
returnStdout: true | ||
) | ||
return s | ||
} | ||
|
||
def get_cluster_name(build_tag, os, instance_type) { | ||
/* | ||
* Compose the cluster name. Pcluster requires a cluster name under 60 characters. | ||
* cluster name cannot have ".". | ||
* Jenkins does not allow groovy to use the replace() method | ||
* of string. Therefore we used shell command sed to replace "." with "" | ||
*/ | ||
build_tag = sh( | ||
script: "echo ${build_tag} | sed \"s/^jenkins-//g\" | sed \"s/ //g\"", | ||
returnStdout: true | ||
) | ||
|
||
def cluster_name = sh( | ||
script: "echo '${build_tag.take(28)}-${os.take(10)}-${instance_type.take(10)}-'${get_random_string(8)} | tr -d '.\\n'", | ||
returnStdout: true | ||
) | ||
|
||
return cluster_name | ||
} | ||
|
||
def get_single_node_windows_test_stage(stage_name) { | ||
/* | ||
* Get Windows Stage | ||
*/ | ||
return { | ||
stage("${stage_name}") { | ||
def ret = sh ( | ||
script: ". venv/bin/activate; cd PortaFiducia/scripts; env AWS_DEFAULT_REGION=us-west-2 ./test_orchestrator_windows.py public", | ||
returnStatus: true | ||
) | ||
if (ret == 65) | ||
unstable('Scripts exited with status 65') | ||
else if (ret != 0) | ||
build_ok = false | ||
catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { | ||
sh "exit ${ret}" | ||
} | ||
} | ||
} | ||
|
||
} | ||
|
||
def get_test_stage(stage_name, build_tag, os, instance_type, instance_count, region, test_config, addl_args) { | ||
/* | ||
* Generate a single test stage that run test_orchestrator.py with the given parameters. | ||
* param@ stage_name: the name of the stage | ||
* param@ build_tag: the BUILD_TAG env generated by Jenkins | ||
* param@ os: the operating system for the test stage. | ||
* param@ instance_type: the instance type for the test stage. | ||
* param@ instance_count: number of intances to use | ||
* param@ region: the (default) aws region where the tests are run. | ||
* param@ test_config: the name of test config file in PortaFiducia/tests/configs/ | ||
* param@ addl_args: additional arguments passed to test_orchestrator.py | ||
* return@: the test stage. | ||
*/ | ||
return { | ||
stage("${stage_name}") { | ||
this.run_test_orchestrator_once(stage_name, build_tag, os, instance_type, instance_count, region, test_config, addl_args) | ||
} | ||
} | ||
} | ||
|
||
|
||
|
||
return this |