KAFKA-18792 Add workflow to check PR format (#18985)

Adds two new workflows to help us enforce some uniform PR structure. The
first workflow runs in an unprivileged context and simply captures the
PR number into a text file and archives it. This is then used by another
workflow that runs in a privileged context using the code in `trunk` to
actually do the validation.

The validation is done using a new Python script. This script fetches a
PR using the GitHub CLI and validates its structure. For now this just
includes the title and body, but could perform other non-code related
checks in the future.

This validation is needed for the up-coming merge queue functionality.

Reviewers: Justine Olshan <jolshan@confluent.io>
This commit is contained in:
David Arthur 2025-02-25 15:33:52 -05:00 committed by GitHub
parent df5839a9f4
commit 8be216b24f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 311 additions and 15 deletions

11
.github/pull_request_template.md vendored Normal file
View File

@ -0,0 +1,11 @@
Delete this text and replace it with a detailed description of your change. The
PR title and body will become the squashed commit message.
If you would like to tag individuals, add some commentary, upload images, or
include other supplemental information that should not be part of the eventual
commit message, please use a separate comment.
If applicable, please include a summary of the testing strategy (including
rationale) for the proposed change. Unit and/or integration tests are expected
for any behavior change and system tests should be considered for larger
changes.

154
.github/scripts/pr-format.py vendored Normal file
View File

@ -0,0 +1,154 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
import json
import logging
import os
import subprocess
import shlex
import sys
import tempfile
from typing import Dict, Optional
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler(sys.stderr)
handler.setLevel(logging.DEBUG)
logger.addHandler(handler)
def get_env(key: str, fn = str) -> Optional:
value = os.getenv(key)
if value is None:
logger.debug(f"Could not find env {key}")
return None
else:
logger.debug(f"Read env {key}: {value}")
return fn(value)
def has_approval(reviews) -> bool:
for review in reviews:
logger.debug(f"Review: {review}")
if review.get("authorAssociation") not in ("MEMBER", "OWNER"):
continue
if review.get("state") == "APPROVED":
return True
return False
def parse_trailers(title, body) -> Dict:
trailers = defaultdict(list)
with tempfile.NamedTemporaryFile() as fp:
fp.write(title.encode())
fp.write(b"\n")
fp.write(body.encode())
fp.flush()
cmd = f"git interpret-trailers --trim-empty --parse {fp.name}"
p = subprocess.run(shlex.split(cmd), capture_output=True)
fp.close()
for line in p.stdout.decode().splitlines():
key, value = line.split(":", 1)
trailers[key].append(value.strip())
return trailers
if __name__ == "__main__":
"""
This script performs some basic linting of our PR titles and body. The PR number is read from the PR_NUMBER
environment variable. Since this script expects to run on a GHA runner, it expects the "gh" tool to be installed.
The STDOUT from this script is used as the status check message. It should not be too long. Use the logger for
any necessary logging.
Title checks:
* Not too short (at least 15 characters)
* Not too long (at most 120 characters)
* Not truncated (ending with ...)
* Starts with "KAFKA-", "MINOR", or "HOTFIX"
Body checks:
* Is not empty
* Has "Reviewers:" trailer if the PR is approved
"""
if not get_env("GITHUB_ACTIONS"):
print("This script is intended to by run by GitHub Actions.")
exit(1)
pr_number = get_env("PR_NUMBER")
cmd = f"gh pr view {pr_number} --json 'title,body,reviews'"
p = subprocess.run(shlex.split(cmd), capture_output=True)
if p.returncode != 0:
logger.error(f"GitHub CLI failed with exit code {p.returncode}.\nSTDOUT: {p.stdout.decode()}\nSTDERR:{p.stderr.decode()}")
exit(1)
gh_json = json.loads(p.stdout)
title = gh_json["title"]
body = gh_json["body"]
reviews = gh_json["reviews"]
warnings = []
errors = []
# Check title
if title.endswith("..."):
errors.append("Title appears truncated")
if len(title) < 15:
errors.append("Title is too short")
if len(title) > 120:
errors.append("Title is too long")
if not title.startswith("KAFKA-") and not title.startswith("MINOR") and not title.startswith("HOTFIX"):
errors.append("Title is missing KAFKA-XXXXX or MINOR/HOTFIX prefix")
# Check body
if len(body) == 0:
errors.append("Body is empty")
if "Delete this text and replace" in body:
errors.append("PR template text should be removed")
# Check for Reviewers
approved = has_approval(reviews)
if approved:
trailers = parse_trailers(title, body)
reviewers_in_body = trailers.get("Reviewers", [])
if len(reviewers_in_body) > 0:
logger.debug(f"Found 'Reviewers' in commit body")
for reviewer_in_body in reviewers_in_body:
logger.debug(reviewer_in_body)
else:
errors.append("Pull Request is approved, but no 'Reviewers' found in commit body")
for warning in warnings:
logger.debug(warning)
if len(errors) > 0:
for error in errors:
logger.debug(error)
# Just output the first error for the status message
print(errors[0])
exit(1)
else:
print("PR format looks good!")
exit(0)

View File

@ -106,6 +106,19 @@ if the PR has the `ci-approved` label
_The pr-labeled.yml workflow includes pull_request_target!_
### PR Linter
To help ensure good commit messages, we have added a "Pull Request Linter" job
that checks the title and body of the PR.
There are two files related to this workflow:
* [pr-reviewed.yml](pr-reviewed.yml) runs when a PR is reviewed or has its title
or body edited. This workflow simply captures the PR number into a text file
* [pr-linter.yml](pr-linter.yml) runs after pr-reviewed.yml and loads the PR
using the saved text file. This workflow runs the linter script that checks the
structure of the PR
### Stale PRs
This one is straightforward. Using the "actions/stale" GitHub Action, we automatically

90
.github/workflows/pr-linter.yml vendored Normal file
View File

@ -0,0 +1,90 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Pull Request Linter
on:
workflow_run:
workflows: [ Pull Request Reviewed ]
types:
- completed
jobs:
lint-pr:
if: github.event.workflow_run.conclusion == 'success'
runs-on: ubuntu-latest
steps:
- name: Env
run: printenv
env:
GITHUB_CONTEXT: ${{ toJson(github) }}
- name: Checkout code
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Load PR Number
id: load-pr-number
uses: actions/download-artifact@v4
with:
github-token: ${{ github.token }}
run-id: ${{ github.event.workflow_run.id }}
name: PR_NUMBER.txt
- name: Handle missing artifact
if: ${{ steps.load-pr-number.outcome == 'failure' }}
uses: ./.github/actions/gh-api-update-status
with:
gh-token: ${{ secrets.GITHUB_TOKEN }}
repository: ${{ github.repository }}
commit_sha: ${{ github.event.workflow_run.head_sha }}
url: 'https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}'
description: 'Could not load PR number'
context: 'PR Linter'
state: 'error'
- name: Lint PR
id: lint-pr
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set +e
echo "Restored PR_NUMBER.txt:"
cat PR_NUMBER.txt
PR_NUMBER=$(cat PR_NUMBER.txt)
PR_NUMBER=$PR_NUMBER python .github/scripts/pr-format.py > pr-format-output.txt
exitcode="$?"
message=$(cat pr-format-output.txt)
echo "message=$message" >> "$GITHUB_OUTPUT"
exit $exitcode
- name: Handle lint failure
if: ${{ failure() && steps.lint-pr.outcome == 'failure' }}
uses: ./.github/actions/gh-api-update-status
with:
gh-token: ${{ secrets.GITHUB_TOKEN }}
repository: ${{ github.repository }}
commit_sha: ${{ github.event.workflow_run.head_sha }}
url: 'https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}'
description: ${{ steps.lint-pr.outputs.message }}
context: 'PR Linter'
state: 'failure'
- name: Handle lint success
if: ${{ steps.lint-pr.outcome == 'success' }}
uses: ./.github/actions/gh-api-update-status
with:
gh-token: ${{ secrets.GITHUB_TOKEN }}
repository: ${{ github.repository }}
commit_sha: ${{ github.event.workflow_run.head_sha }}
url: 'https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}'
description: ${{ steps.lint-pr.outputs.message }}
context: 'PR Linter'
state: 'success'

42
.github/workflows/pr-reviewed.yml vendored Normal file
View File

@ -0,0 +1,42 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Pull Request Reviewed
on:
pull_request_review:
types: [submitted, edited, dismissed]
branches:
- trunk
pull_request:
types: [edited]
branches:
- trunk
jobs:
save-pr-number:
name: Save PR Number
runs-on: ubuntu-latest
steps:
- name: Env
run: printenv
env:
GITHUB_CONTEXT: ${{ toJson(github) }}
- name: Save PR Number
run: echo ${{ github.event.pull_request.number }} > PR_NUMBER.txt
- uses: actions/upload-artifact@v4
with:
name: PR_NUMBER.txt
path: PR_NUMBER.txt

View File

@ -1,14 +0,0 @@
*More detailed description of your change,
if necessary. The PR title and PR message become
the squashed commit message, so use a separate
comment to ping reviewers.*
*Summary of testing strategy (including rationale)
for the feature or bug fix. Unit and/or integration
tests are expected for any behaviour change and
system tests should be considered for larger changes.*
### Committer Checklist (excluded from commit message)
- [ ] Verify design and implementation
- [ ] Verify test coverage and CI build status
- [ ] Verify documentation (including upgrade notes)

View File

@ -275,8 +275,8 @@ if (repo != null) {
excludes.addAll([
'**/.git/**',
'**/build/**',
'.github/pull_request_template.md',
'CONTRIBUTING.md',
'PULL_REQUEST_TEMPLATE.md',
'gradlew',
'gradlew.bat',
'gradle/wrapper/gradle-wrapper.properties',