KAFKA-17607: Add CI step to verify LICENSE-binary (#18299)

Add the verify_license.py script to our build to detect missing licenses.

Reviewers: Chia-Ping Tsai <chia7712@gmail.com>, Ken Huang <s7133700@gmail.com>, David Arthur <mumrah@gmail.com>
This commit is contained in:
xijiu 2025-03-07 22:45:23 +08:00 committed by GitHub
parent 40db001588
commit a6064e0800
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 16 additions and 5 deletions

View File

@ -140,7 +140,7 @@ jobs:
# --scan: Publish the build scan. This will only work on PRs from apache/kafka and trunk # --scan: Publish the build scan. This will only work on PRs from apache/kafka and trunk
# --no-scan: For public fork PRs, we won't attempt to publish the scan # --no-scan: For public fork PRs, we won't attempt to publish the scan
run: | run: |
./gradlew --build-cache --info $SCAN_ARG check siteDocTar -x test ./gradlew --build-cache --info $SCAN_ARG check releaseTarGz -x test
- name: Archive check reports - name: Archive check reports
if: always() if: always()
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
@ -170,6 +170,8 @@ jobs:
find ./site-docs/generated -type f -exec grep -L "." {} \; >&2 find ./site-docs/generated -type f -exec grep -L "." {} \; >&2
exit 1 exit 1
fi fi
- name: Verify license file
run: python committer-tools/verify_license.py --skip-build
test: test:
needs: [configure, validate, load-catalog] needs: [configure, validate, load-catalog]

View File

@ -24,6 +24,7 @@ import sys
import tarfile import tarfile
import tempfile import tempfile
import subprocess import subprocess
import argparse
# Constant: Regex to extract dependency tokens from the LICENSE file. # Constant: Regex to extract dependency tokens from the LICENSE file.
# Matches lines that start with a dash and then a dependency token of the form: # Matches lines that start with a dash and then a dependency token of the form:
@ -44,7 +45,7 @@ def get_tarball_path(project_dir):
print("Error: Distributions directory not found:", distributions_dir) print("Error: Distributions directory not found:", distributions_dir)
sys.exit(1) sys.exit(1)
pattern = re.compile(r'^kafka_2\.13-.+\.tgz$', re.IGNORECASE) pattern = re.compile(r'^kafka_2\.13-(?!.*docs).+\.tgz$', re.IGNORECASE)
candidates = [ candidates = [
os.path.join(distributions_dir, f) os.path.join(distributions_dir, f)
for f in os.listdir(distributions_dir) for f in os.listdir(distributions_dir)
@ -74,12 +75,20 @@ def get_license_deps(license_text):
return set(LICENSE_DEP_PATTERN.findall(license_text)) return set(LICENSE_DEP_PATTERN.findall(license_text))
def main(): def main():
# Argument parser
parser = argparse.ArgumentParser(description="Whether to skip executing ReleaseTarGz.")
parser.add_argument("--skip-build", action="store_true", help="skip the build")
args = parser.parse_args()
# Assume the current working directory is the project root. # Assume the current working directory is the project root.
project_dir = os.getcwd() project_dir = os.getcwd()
print("Using project directory:", project_dir) print("Using project directory:", project_dir)
# Build the tarball. if args.skip_build:
run_gradlew(project_dir) print("Skip running './gradlew clean releaseTarGz'")
else:
# Build the tarball.
run_gradlew(project_dir)
tarball = get_tarball_path(project_dir) tarball = get_tarball_path(project_dir)
print("Tarball created at:", tarball) print("Tarball created at:", tarball)