From 040ae26472a61787d0c4962341d83a57fc36ee40 Mon Sep 17 00:00:00 2001 From: David Arthur Date: Sat, 7 Sep 2024 15:13:20 -0400 Subject: [PATCH] KAFKA-17479 Fail the whole pipeline if junit step times out [4/n] (#17121) Fixes an issue where the CI workflow could appear to be successful in the event of a timeout and no failing tests. Instead of using Github Action's timeout, this patch makes use of the linux `timeout` command. This lets us capture the exit code and handle timeouts separately from a failed execution. Reviewers: Chia-Ping Tsai --- .github/scripts/junit.py | 35 +++++++++++++++++++++++++---------- .github/workflows/build.yml | 11 ++++++----- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/.github/scripts/junit.py b/.github/scripts/junit.py index 1850c0c2e8a..2c63efe0be8 100644 --- a/.github/scripts/junit.py +++ b/.github/scripts/junit.py @@ -37,10 +37,14 @@ FLAKY = "FLAKY ⚠️ " SKIPPED = "SKIPPED 🙈" -def get_env(key: str) -> str: +def get_env(key: str, fn = str) -> Optional: value = os.getenv(key) - logger.debug(f"Read env {key}: {value}") - return value + if value is None: + logger.debug(f"Could not find env {key}") + return None + else: + logger.debug(f"Read env {key}: {value}") + return fn(value) @dataclasses.dataclass @@ -248,12 +252,23 @@ if __name__ == "__main__": print(f"| {row_joined} |") print("\n") - logger.debug(summary) - if total_failures > 0: - logger.debug(f"Failing this step due to {total_failures} test failures") - exit(1) - elif total_errors > 0: - logger.debug(f"Failing this step due to {total_errors} test errors") + # Print special message if there was a timeout + exit_code = get_env("GRADLE_EXIT_CODE", int) + if exit_code == 124: + logger.debug(f"Gradle command timed out. These are partial results!") + logger.debug(summary) + logger.debug("Failing this step because the tests timed out.") exit(1) + elif exit_code in (0, 1): + logger.debug(summary) + if total_failures > 0: + logger.debug(f"Failing this step due to {total_failures} test failures") + exit(1) + elif total_errors > 0: + logger.debug(f"Failing this step due to {total_errors} test errors") + exit(1) + else: + exit(0) else: - exit(0) + logger.debug(f"Gradle had unexpected exit code {exit_code}. Failing this step") + exit(1) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d6da2a94fa5..a1a94429148 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -107,17 +107,18 @@ jobs: # --scan: Attempt to publish build scans in PRs. This will only work on PRs from apache/kafka, not public forks. # --continue: Keep running even if a test fails # -PcommitId Prevent the Git SHA being written into the jar files (which breaks caching) - timeout-minutes: 180 # 3 hours - continue-on-error: true + id: junit-test run: | - ./gradlew --build-cache --scan --continue \ + set +e + timeout 180m ./gradlew --build-cache --scan --continue \ -PtestLoggingEvents=started,passed,skipped,failed \ -PmaxParallelForks=2 \ -PmaxTestRetries=1 -PmaxTestRetryFailures=10 \ -PcommitId=xxxxxxxxxxxxxxxx \ test + exitcode="$?" + echo "exitcode=$exitcode" >> $GITHUB_OUTPUT - name: Archive JUnit reports - if: always() uses: actions/upload-artifact@v4 id: junit-upload-artifact with: @@ -126,8 +127,8 @@ jobs: **/build/reports/tests/test/* if-no-files-found: ignore - name: Parse JUnit tests - if: always() run: python .github/scripts/junit.py >> $GITHUB_STEP_SUMMARY env: GITHUB_WORKSPACE: ${{ github.workspace }} REPORT_URL: ${{ steps.junit-upload-artifact.outputs.artifact-url }} + GRADLE_EXIT_CODE: ${{ steps.junit-test.outputs.exitcode }}