From 44e03ff31be47f2d01cbdc3362ab5978dc75f883 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Mon, 27 Jan 2020 16:26:21 -0500 Subject: [PATCH] [CI] Retry flaky tests (#53961) (#56057) --- Jenkinsfile | 29 ++++++++++++--- vars/githubPr.groovy | 58 +++++++++++++++++++++++++++-- vars/kibanaPipeline.groovy | 8 +++- vars/retryable.groovy | 75 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 10 deletions(-) create mode 100644 vars/retryable.groovy diff --git a/Jenkinsfile b/Jenkinsfile index e8e4893b346c7c..86890042f1c196 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -4,16 +4,21 @@ library 'kibana-pipeline-library' kibanaLibrary.load() stage("Kibana Pipeline") { // This stage is just here to help the BlueOcean UI a little bit - timeout(time: 120, unit: 'MINUTES') { + timeout(time: 135, unit: 'MINUTES') { timestamps { ansiColor('xterm') { githubPr.withDefaultPrComments { catchError { + retryable.enable() parallel([ 'kibana-intake-agent': kibanaPipeline.legacyJobRunner('kibana-intake'), 'x-pack-intake-agent': kibanaPipeline.legacyJobRunner('x-pack-intake'), 'kibana-oss-agent': kibanaPipeline.withWorkers('kibana-oss-tests', { kibanaPipeline.buildOss() }, [ - 'oss-firefoxSmoke': kibanaPipeline.getPostBuildWorker('firefoxSmoke', { runbld('./test/scripts/jenkins_firefox_smoke.sh', 'Execute kibana-firefoxSmoke') }), + 'oss-firefoxSmoke': kibanaPipeline.getPostBuildWorker('firefoxSmoke', { + retryable('kibana-firefoxSmoke') { + runbld('./test/scripts/jenkins_firefox_smoke.sh', 'Execute kibana-firefoxSmoke') + } + }), 'oss-ciGroup1': kibanaPipeline.getOssCiGroupWorker(1), 'oss-ciGroup2': kibanaPipeline.getOssCiGroupWorker(2), 'oss-ciGroup3': kibanaPipeline.getOssCiGroupWorker(3), @@ -26,11 +31,19 @@ stage("Kibana Pipeline") { // This stage is just here to help the BlueOcean UI a 'oss-ciGroup10': kibanaPipeline.getOssCiGroupWorker(10), 'oss-ciGroup11': kibanaPipeline.getOssCiGroupWorker(11), 'oss-ciGroup12': kibanaPipeline.getOssCiGroupWorker(12), - 'oss-accessibility': kibanaPipeline.getPostBuildWorker('accessibility', { runbld('./test/scripts/jenkins_accessibility.sh', 'Execute accessibility tests') }), + 'oss-accessibility': kibanaPipeline.getPostBuildWorker('accessibility', { + retryable('kibana-accessibility') { + runbld('./test/scripts/jenkins_accessibility.sh', 'Execute accessibility tests') + } + }), // 'oss-visualRegression': kibanaPipeline.getPostBuildWorker('visualRegression', { runbld('./test/scripts/jenkins_visual_regression.sh', 'Execute kibana-visualRegression') }), ]), 'kibana-xpack-agent': kibanaPipeline.withWorkers('kibana-xpack-tests', { kibanaPipeline.buildXpack() }, [ - 'xpack-firefoxSmoke': kibanaPipeline.getPostBuildWorker('xpack-firefoxSmoke', { runbld('./test/scripts/jenkins_xpack_firefox_smoke.sh', 'Execute xpack-firefoxSmoke') }), + 'xpack-firefoxSmoke': kibanaPipeline.getPostBuildWorker('xpack-firefoxSmoke', { + retryable('xpack-firefoxSmoke') { + runbld('./test/scripts/jenkins_xpack_firefox_smoke.sh', 'Execute xpack-firefoxSmoke') + } + }), 'xpack-ciGroup1': kibanaPipeline.getXpackCiGroupWorker(1), 'xpack-ciGroup2': kibanaPipeline.getXpackCiGroupWorker(2), 'xpack-ciGroup3': kibanaPipeline.getXpackCiGroupWorker(3), @@ -41,12 +54,18 @@ stage("Kibana Pipeline") { // This stage is just here to help the BlueOcean UI a 'xpack-ciGroup8': kibanaPipeline.getXpackCiGroupWorker(8), 'xpack-ciGroup9': kibanaPipeline.getXpackCiGroupWorker(9), 'xpack-ciGroup10': kibanaPipeline.getXpackCiGroupWorker(10), - 'xpack-accessibility': kibanaPipeline.getPostBuildWorker('xpack-accessibility', { runbld('./test/scripts/jenkins_xpack_accessibility.sh', 'Execute xpack-accessibility tests') }), + 'xpack-accessibility': kibanaPipeline.getPostBuildWorker('xpack-accessibility', { + retryable('xpack-accessibility') { + runbld('./test/scripts/jenkins_xpack_accessibility.sh', 'Execute xpack-accessibility tests') + } + }), // 'xpack-visualRegression': kibanaPipeline.getPostBuildWorker('xpack-visualRegression', { runbld('./test/scripts/jenkins_xpack_visual_regression.sh', 'Execute xpack-visualRegression') }), ]), ]) } } + + retryable.printFlakyFailures() kibanaPipeline.sendMail() } } diff --git a/vars/githubPr.groovy b/vars/githubPr.groovy index ce164ab98ab1ef..4c19511bb89532 100644 --- a/vars/githubPr.groovy +++ b/vars/githubPr.groovy @@ -88,6 +88,8 @@ def getHistoryText(builds) { .collect { build -> if (build.status == "SUCCESS") { return "* :green_heart: [Build #${build.number}](${build.url}) succeeded ${build.commit}" + } else if(build.status == "UNSTABLE") { + return "* :yellow_heart: [Build #${build.number}](${build.url}) was flaky ${build.commit}" } else { return "* :broken_heart: [Build #${build.number}](${build.url}) failed ${build.commit}" } @@ -97,18 +99,66 @@ def getHistoryText(builds) { return "### History\n${list}" } +def getTestFailuresMessage() { + def failures = testUtils.getFailures() + if (!failures) { + return "" + } + + def messages = [] + + failures.take(5).each { failure -> + messages << """ +--- + +### [Test Failures](${env.BUILD_URL}testReport) +
${failure.fullDisplayName} + +[Link to Jenkins](${failure.url}) + +``` +${failure.stdOut} +``` +
+ +--- + """ + } + + if (failures.size() > 3) { + messages << "and ${failures.size() - 3} more failures, only showing the first 3." + } + + return messages.join("\n") +} + def getNextCommentMessage(previousCommentInfo = [:]) { - info = previousCommentInfo ?: [:] + def info = previousCommentInfo ?: [:] info.builds = previousCommentInfo.builds ?: [] def messages = [] + def status = buildUtils.getBuildStatus() - if (buildUtils.getBuildStatus() == 'SUCCESS') { + if (status == 'SUCCESS') { messages << """ ## :green_heart: Build Succeeded * [continuous-integration/kibana-ci/pull-request](${env.BUILD_URL}) * Commit: ${getCommitHash()} """ + } else if(status == 'UNSTABLE') { + def message = """ + ## :yellow_heart: Build succeeded, but was flaky + * [continuous-integration/kibana-ci/pull-request](${env.BUILD_URL}) + * Commit: ${getCommitHash()} + """.stripIndent() + + def failures = retryable.getFlakyFailures() + if (failures && failures.size() > 0) { + def list = failures.collect { " * ${it.label}" }.join("\n") + message += "* Flaky suites:\n${list}" + } + + messages << message } else { messages << """ ## :broken_heart: Build Failed @@ -117,6 +167,8 @@ def getNextCommentMessage(previousCommentInfo = [:]) { """ } + messages << getTestFailuresMessage() + if (info.builds && info.builds.size() > 0) { messages << getHistoryText(info.builds) } @@ -133,7 +185,7 @@ def getNextCommentMessage(previousCommentInfo = [:]) { return messages .findAll { !!it } // No blank strings - .collect { it.stripIndent().trim() } + .collect { it.stripIndent().trim() } // This just allows us to indent various strings above, but leaves them un-indented in the comment .join("\n\n") } diff --git a/vars/kibanaPipeline.groovy b/vars/kibanaPipeline.groovy index c778dd799f6e59..3950071b81418c 100644 --- a/vars/kibanaPipeline.groovy +++ b/vars/kibanaPipeline.groovy @@ -70,7 +70,9 @@ def getOssCiGroupWorker(ciGroup) { "CI_GROUP=${ciGroup}", "JOB=kibana-ciGroup${ciGroup}", ]) { - runbld("./test/scripts/jenkins_ci_group.sh", "Execute kibana-ciGroup${ciGroup}") + retryable("kibana-ciGroup${ciGroup}") { + runbld("./test/scripts/jenkins_ci_group.sh", "Execute kibana-ciGroup${ciGroup}") + } } }) } @@ -81,7 +83,9 @@ def getXpackCiGroupWorker(ciGroup) { "CI_GROUP=${ciGroup}", "JOB=xpack-kibana-ciGroup${ciGroup}", ]) { - runbld("./test/scripts/jenkins_xpack_ci_group.sh", "Execute xpack-kibana-ciGroup${ciGroup}") + retryable("xpack-kibana-ciGroup${ciGroup}") { + runbld("./test/scripts/jenkins_xpack_ci_group.sh", "Execute xpack-kibana-ciGroup${ciGroup}") + } } }) } diff --git a/vars/retryable.groovy b/vars/retryable.groovy new file mode 100644 index 00000000000000..cc34024958aed4 --- /dev/null +++ b/vars/retryable.groovy @@ -0,0 +1,75 @@ +import groovy.transform.Field + +public static @Field GLOBAL_RETRIES_ENABLED = false +public static @Field MAX_GLOBAL_RETRIES = 1 +public static @Field CURRENT_GLOBAL_RETRIES = 0 +public static @Field FLAKY_FAILURES = [] + +def setMax(max) { + retryable.MAX_GLOBAL_RETRIES = max +} + +def enable() { + retryable.GLOBAL_RETRIES_ENABLED = true +} + +def enable(max) { + enable() + setMax(max) +} + +def haveReachedMaxRetries() { + return retryable.CURRENT_GLOBAL_RETRIES >= retryable.MAX_GLOBAL_RETRIES +} + +def getFlakyFailures() { + return retryable.FLAKY_FAILURES +} + +def printFlakyFailures() { + catchError { + def failures = getFlakyFailures() + + if (failures && failures.size() > 0) { + print "This build had the following flaky failures:" + failures.each { + print "\n${it.label}" + buildUtils.printStacktrace(it.exception) + } + } + } +} + +def call(label, Closure closure) { + if (!retryable.GLOBAL_RETRIES_ENABLED) { + closure() + return + } + + try { + closure() + } catch (ex) { + if (haveReachedMaxRetries()) { + print "Couldn't retry '${label}', have already reached the max number of retries for this build." + throw ex + } + + retryable.CURRENT_GLOBAL_RETRIES++ + buildUtils.printStacktrace(ex) + unstable "${label} failed but is retryable, trying a second time..." + + def JOB = env.JOB ? "${env.JOB}-retry" : "" + withEnv([ + "JOB=${JOB}", + ]) { + closure() + } + + retryable.FLAKY_FAILURES << [ + label: label, + exception: ex, + ] + + unstable "${label} failed on the first attempt, but succeeded on the second. Marking it as flaky." + } +}