KAFKA-17479 Fail the whole pipeline if junit step times out [4/n] (#17121)

Fixes an issue where the CI workflow could appear to be successful in the event of a timeout and no failing tests. Instead of using Github Action's timeout, this patch makes use of the linux `timeout` command. This lets us capture the exit code and handle timeouts separately from a failed execution.

Reviewers: Chia-Ping Tsai <chia7712@gmail.com>
This commit is contained in:
David Arthur 2024-09-07 15:13:20 -04:00 committed by GitHub
parent 50e7022a1b
commit 040ae26472
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 31 additions and 15 deletions

View File

@ -37,10 +37,14 @@ FLAKY = "FLAKY ⚠️ "
SKIPPED = "SKIPPED 🙈"
def get_env(key: str) -> str:
def get_env(key: str, fn = str) -> Optional:
value = os.getenv(key)
logger.debug(f"Read env {key}: {value}")
return value
if value is None:
logger.debug(f"Could not find env {key}")
return None
else:
logger.debug(f"Read env {key}: {value}")
return fn(value)
@dataclasses.dataclass
@ -248,12 +252,23 @@ if __name__ == "__main__":
print(f"| {row_joined} |")
print("\n</details>")
logger.debug(summary)
if total_failures > 0:
logger.debug(f"Failing this step due to {total_failures} test failures")
exit(1)
elif total_errors > 0:
logger.debug(f"Failing this step due to {total_errors} test errors")
# Print special message if there was a timeout
exit_code = get_env("GRADLE_EXIT_CODE", int)
if exit_code == 124:
logger.debug(f"Gradle command timed out. These are partial results!")
logger.debug(summary)
logger.debug("Failing this step because the tests timed out.")
exit(1)
elif exit_code in (0, 1):
logger.debug(summary)
if total_failures > 0:
logger.debug(f"Failing this step due to {total_failures} test failures")
exit(1)
elif total_errors > 0:
logger.debug(f"Failing this step due to {total_errors} test errors")
exit(1)
else:
exit(0)
else:
exit(0)
logger.debug(f"Gradle had unexpected exit code {exit_code}. Failing this step")
exit(1)

View File

@ -107,17 +107,18 @@ jobs:
# --scan: Attempt to publish build scans in PRs. This will only work on PRs from apache/kafka, not public forks.
# --continue: Keep running even if a test fails
# -PcommitId Prevent the Git SHA being written into the jar files (which breaks caching)
timeout-minutes: 180 # 3 hours
continue-on-error: true
id: junit-test
run: |
./gradlew --build-cache --scan --continue \
set +e
timeout 180m ./gradlew --build-cache --scan --continue \
-PtestLoggingEvents=started,passed,skipped,failed \
-PmaxParallelForks=2 \
-PmaxTestRetries=1 -PmaxTestRetryFailures=10 \
-PcommitId=xxxxxxxxxxxxxxxx \
test
exitcode="$?"
echo "exitcode=$exitcode" >> $GITHUB_OUTPUT
- name: Archive JUnit reports
if: always()
uses: actions/upload-artifact@v4
id: junit-upload-artifact
with:
@ -126,8 +127,8 @@ jobs:
**/build/reports/tests/test/*
if-no-files-found: ignore
- name: Parse JUnit tests
if: always()
run: python .github/scripts/junit.py >> $GITHUB_STEP_SUMMARY
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
REPORT_URL: ${{ steps.junit-upload-artifact.outputs.artifact-url }}
GRADLE_EXIT_CODE: ${{ steps.junit-test.outputs.exitcode }}