Skip to content

Commit

Permalink
[CI] Retry flaky tests (#53961) (#56057)
Browse files Browse the repository at this point in the history
  • Loading branch information
brianseeders committed Jan 27, 2020
1 parent 828e768 commit 44e03ff
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 10 deletions.
29 changes: 24 additions & 5 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,21 @@ library 'kibana-pipeline-library'
kibanaLibrary.load()

stage("Kibana Pipeline") { // This stage is just here to help the BlueOcean UI a little bit
timeout(time: 120, unit: 'MINUTES') {
timeout(time: 135, unit: 'MINUTES') {
timestamps {
ansiColor('xterm') {
githubPr.withDefaultPrComments {
catchError {
retryable.enable()
parallel([
'kibana-intake-agent': kibanaPipeline.legacyJobRunner('kibana-intake'),
'x-pack-intake-agent': kibanaPipeline.legacyJobRunner('x-pack-intake'),
'kibana-oss-agent': kibanaPipeline.withWorkers('kibana-oss-tests', { kibanaPipeline.buildOss() }, [
'oss-firefoxSmoke': kibanaPipeline.getPostBuildWorker('firefoxSmoke', { runbld('./test/scripts/jenkins_firefox_smoke.sh', 'Execute kibana-firefoxSmoke') }),
'oss-firefoxSmoke': kibanaPipeline.getPostBuildWorker('firefoxSmoke', {
retryable('kibana-firefoxSmoke') {
runbld('./test/scripts/jenkins_firefox_smoke.sh', 'Execute kibana-firefoxSmoke')
}
}),
'oss-ciGroup1': kibanaPipeline.getOssCiGroupWorker(1),
'oss-ciGroup2': kibanaPipeline.getOssCiGroupWorker(2),
'oss-ciGroup3': kibanaPipeline.getOssCiGroupWorker(3),
Expand All @@ -26,11 +31,19 @@ stage("Kibana Pipeline") { // This stage is just here to help the BlueOcean UI a
'oss-ciGroup10': kibanaPipeline.getOssCiGroupWorker(10),
'oss-ciGroup11': kibanaPipeline.getOssCiGroupWorker(11),
'oss-ciGroup12': kibanaPipeline.getOssCiGroupWorker(12),
'oss-accessibility': kibanaPipeline.getPostBuildWorker('accessibility', { runbld('./test/scripts/jenkins_accessibility.sh', 'Execute accessibility tests') }),
'oss-accessibility': kibanaPipeline.getPostBuildWorker('accessibility', {
retryable('kibana-accessibility') {
runbld('./test/scripts/jenkins_accessibility.sh', 'Execute accessibility tests')
}
}),
// 'oss-visualRegression': kibanaPipeline.getPostBuildWorker('visualRegression', { runbld('./test/scripts/jenkins_visual_regression.sh', 'Execute kibana-visualRegression') }),
]),
'kibana-xpack-agent': kibanaPipeline.withWorkers('kibana-xpack-tests', { kibanaPipeline.buildXpack() }, [
'xpack-firefoxSmoke': kibanaPipeline.getPostBuildWorker('xpack-firefoxSmoke', { runbld('./test/scripts/jenkins_xpack_firefox_smoke.sh', 'Execute xpack-firefoxSmoke') }),
'xpack-firefoxSmoke': kibanaPipeline.getPostBuildWorker('xpack-firefoxSmoke', {
retryable('xpack-firefoxSmoke') {
runbld('./test/scripts/jenkins_xpack_firefox_smoke.sh', 'Execute xpack-firefoxSmoke')
}
}),
'xpack-ciGroup1': kibanaPipeline.getXpackCiGroupWorker(1),
'xpack-ciGroup2': kibanaPipeline.getXpackCiGroupWorker(2),
'xpack-ciGroup3': kibanaPipeline.getXpackCiGroupWorker(3),
Expand All @@ -41,12 +54,18 @@ stage("Kibana Pipeline") { // This stage is just here to help the BlueOcean UI a
'xpack-ciGroup8': kibanaPipeline.getXpackCiGroupWorker(8),
'xpack-ciGroup9': kibanaPipeline.getXpackCiGroupWorker(9),
'xpack-ciGroup10': kibanaPipeline.getXpackCiGroupWorker(10),
'xpack-accessibility': kibanaPipeline.getPostBuildWorker('xpack-accessibility', { runbld('./test/scripts/jenkins_xpack_accessibility.sh', 'Execute xpack-accessibility tests') }),
'xpack-accessibility': kibanaPipeline.getPostBuildWorker('xpack-accessibility', {
retryable('xpack-accessibility') {
runbld('./test/scripts/jenkins_xpack_accessibility.sh', 'Execute xpack-accessibility tests')
}
}),
// 'xpack-visualRegression': kibanaPipeline.getPostBuildWorker('xpack-visualRegression', { runbld('./test/scripts/jenkins_xpack_visual_regression.sh', 'Execute xpack-visualRegression') }),
]),
])
}
}

retryable.printFlakyFailures()
kibanaPipeline.sendMail()
}
}
Expand Down
58 changes: 55 additions & 3 deletions vars/githubPr.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ def getHistoryText(builds) {
.collect { build ->
if (build.status == "SUCCESS") {
return "* :green_heart: [Build #${build.number}](${build.url}) succeeded ${build.commit}"
} else if(build.status == "UNSTABLE") {
return "* :yellow_heart: [Build #${build.number}](${build.url}) was flaky ${build.commit}"
} else {
return "* :broken_heart: [Build #${build.number}](${build.url}) failed ${build.commit}"
}
Expand All @@ -97,18 +99,66 @@ def getHistoryText(builds) {
return "### History\n${list}"
}

def getTestFailuresMessage() {
def failures = testUtils.getFailures()
if (!failures) {
return ""
}

def messages = []

failures.take(5).each { failure ->
messages << """
---
### [Test Failures](${env.BUILD_URL}testReport)
<details><summary>${failure.fullDisplayName}</summary>
[Link to Jenkins](${failure.url})
```
${failure.stdOut}
```
</details>
---
"""
}

if (failures.size() > 3) {
messages << "and ${failures.size() - 3} more failures, only showing the first 3."
}

return messages.join("\n")
}

def getNextCommentMessage(previousCommentInfo = [:]) {
info = previousCommentInfo ?: [:]
def info = previousCommentInfo ?: [:]
info.builds = previousCommentInfo.builds ?: []

def messages = []
def status = buildUtils.getBuildStatus()

if (buildUtils.getBuildStatus() == 'SUCCESS') {
if (status == 'SUCCESS') {
messages << """
## :green_heart: Build Succeeded
* [continuous-integration/kibana-ci/pull-request](${env.BUILD_URL})
* Commit: ${getCommitHash()}
"""
} else if(status == 'UNSTABLE') {
def message = """
## :yellow_heart: Build succeeded, but was flaky
* [continuous-integration/kibana-ci/pull-request](${env.BUILD_URL})
* Commit: ${getCommitHash()}
""".stripIndent()

def failures = retryable.getFlakyFailures()
if (failures && failures.size() > 0) {
def list = failures.collect { " * ${it.label}" }.join("\n")
message += "* Flaky suites:\n${list}"
}

messages << message
} else {
messages << """
## :broken_heart: Build Failed
Expand All @@ -117,6 +167,8 @@ def getNextCommentMessage(previousCommentInfo = [:]) {
"""
}

messages << getTestFailuresMessage()

if (info.builds && info.builds.size() > 0) {
messages << getHistoryText(info.builds)
}
Expand All @@ -133,7 +185,7 @@ def getNextCommentMessage(previousCommentInfo = [:]) {

return messages
.findAll { !!it } // No blank strings
.collect { it.stripIndent().trim() }
.collect { it.stripIndent().trim() } // This just allows us to indent various strings above, but leaves them un-indented in the comment
.join("\n\n")
}

Expand Down
8 changes: 6 additions & 2 deletions vars/kibanaPipeline.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ def getOssCiGroupWorker(ciGroup) {
"CI_GROUP=${ciGroup}",
"JOB=kibana-ciGroup${ciGroup}",
]) {
runbld("./test/scripts/jenkins_ci_group.sh", "Execute kibana-ciGroup${ciGroup}")
retryable("kibana-ciGroup${ciGroup}") {
runbld("./test/scripts/jenkins_ci_group.sh", "Execute kibana-ciGroup${ciGroup}")
}
}
})
}
Expand All @@ -81,7 +83,9 @@ def getXpackCiGroupWorker(ciGroup) {
"CI_GROUP=${ciGroup}",
"JOB=xpack-kibana-ciGroup${ciGroup}",
]) {
runbld("./test/scripts/jenkins_xpack_ci_group.sh", "Execute xpack-kibana-ciGroup${ciGroup}")
retryable("xpack-kibana-ciGroup${ciGroup}") {
runbld("./test/scripts/jenkins_xpack_ci_group.sh", "Execute xpack-kibana-ciGroup${ciGroup}")
}
}
})
}
Expand Down
75 changes: 75 additions & 0 deletions vars/retryable.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import groovy.transform.Field

public static @Field GLOBAL_RETRIES_ENABLED = false
public static @Field MAX_GLOBAL_RETRIES = 1
public static @Field CURRENT_GLOBAL_RETRIES = 0
public static @Field FLAKY_FAILURES = []

def setMax(max) {
retryable.MAX_GLOBAL_RETRIES = max
}

def enable() {
retryable.GLOBAL_RETRIES_ENABLED = true
}

def enable(max) {
enable()
setMax(max)
}

def haveReachedMaxRetries() {
return retryable.CURRENT_GLOBAL_RETRIES >= retryable.MAX_GLOBAL_RETRIES
}

def getFlakyFailures() {
return retryable.FLAKY_FAILURES
}

def printFlakyFailures() {
catchError {
def failures = getFlakyFailures()

if (failures && failures.size() > 0) {
print "This build had the following flaky failures:"
failures.each {
print "\n${it.label}"
buildUtils.printStacktrace(it.exception)
}
}
}
}

def call(label, Closure closure) {
if (!retryable.GLOBAL_RETRIES_ENABLED) {
closure()
return
}

try {
closure()
} catch (ex) {
if (haveReachedMaxRetries()) {
print "Couldn't retry '${label}', have already reached the max number of retries for this build."
throw ex
}

retryable.CURRENT_GLOBAL_RETRIES++
buildUtils.printStacktrace(ex)
unstable "${label} failed but is retryable, trying a second time..."

def JOB = env.JOB ? "${env.JOB}-retry" : ""
withEnv([
"JOB=${JOB}",
]) {
closure()
}

retryable.FLAKY_FAILURES << [
label: label,
exception: ex,
]

unstable "${label} failed on the first attempt, but succeeded on the second. Marking it as flaky."
}
}

0 comments on commit 44e03ff

Please sign in to comment.