mirror of https://github.com/apache/kafka.git
				
				
				
			
		
			
				
	
	
		
			142 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			142 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
	
	
| #!/usr/bin/env python3
 | |
| # -*- coding: utf-8 -*-
 | |
| 
 | |
| #
 | |
| # Licensed to the Apache Software Foundation (ASF) under one or more
 | |
| # contributor license agreements.  See the NOTICE file distributed with
 | |
| # this work for additional information regarding copyright ownership.
 | |
| # The ASF licenses this file to You under the Apache License, Version 2.0
 | |
| # (the "License"); you may not use this file except in compliance with
 | |
| # the License.  You may obtain a copy of the License at
 | |
| #
 | |
| #    http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| #
 | |
| 
 | |
| import os
 | |
| import re
 | |
| import sys
 | |
| import tarfile
 | |
| import tempfile
 | |
| import subprocess
 | |
| 
 | |
| # Constant: Regex to extract dependency tokens from the LICENSE file.
 | |
| # Matches lines that start with a dash and then a dependency token of the form:
 | |
| #   DependencyName-x.y, DependencyName-x.y.z, or DependencyName-x.y.z.w
 | |
| # Optionally, a trailing suffix (e.g., "-alpha") is captured.
 | |
| LICENSE_DEP_PATTERN = re.compile(
 | |
|     r'^\s*-\s*([A-Za-z0-9_.+-]+-[0-9]+\.[0-9]+(?:\.[0-9]+){0,2}(?:[-.][A-Za-z0-9]+)?)',
 | |
|     re.MULTILINE
 | |
| )
 | |
| 
 | |
| def run_gradlew(project_dir):
 | |
|     print("Running './gradlew clean releaseTarGz'")
 | |
|     subprocess.run(["./gradlew", "clean", "releaseTarGz"], check=True, cwd=project_dir)
 | |
| 
 | |
| def get_tarball_path(project_dir):
 | |
|     distributions_dir = os.path.join(project_dir, "core", "build", "distributions")
 | |
|     if not os.path.isdir(distributions_dir):
 | |
|         print("Error: Distributions directory not found:", distributions_dir)
 | |
|         sys.exit(1)
 | |
|     
 | |
|     pattern = re.compile(r'^kafka_2\.13-.+\.tgz$', re.IGNORECASE)
 | |
|     candidates = [
 | |
|         os.path.join(distributions_dir, f)
 | |
|         for f in os.listdir(distributions_dir)
 | |
|         if pattern.match(f)
 | |
|     ]
 | |
|     if not candidates:
 | |
|         print("Error: No tarball matching 'kafka_2.13-*.tgz' found in:", distributions_dir)
 | |
|         sys.exit(1)
 | |
|     
 | |
|     tarball_path = max(candidates, key=os.path.getmtime)
 | |
|     return tarball_path
 | |
| 
 | |
| def extract_tarball(tarball, extract_dir):
 | |
|     with tarfile.open(tarball, "r:gz") as tar:
 | |
|         # Use a filter to avoid future deprecation warnings.
 | |
|         tar.extractall(path=extract_dir, filter=lambda tarinfo, dest: tarinfo)
 | |
|     print("Tarball extracted to:", extract_dir)
 | |
| 
 | |
| def get_libs_set(libs_dir):
 | |
|     return {
 | |
|         fname[:-4]
 | |
|         for fname in os.listdir(libs_dir)
 | |
|         if fname.endswith(".jar") and not re.search(r"(kafka|connect|trogdor)", fname, re.IGNORECASE)
 | |
|     }
 | |
| 
 | |
| def get_license_deps(license_text):
 | |
|     return set(LICENSE_DEP_PATTERN.findall(license_text))
 | |
| 
 | |
| def main():
 | |
|     # Assume the current working directory is the project root.
 | |
|     project_dir = os.getcwd()
 | |
|     print("Using project directory:", project_dir)
 | |
|     
 | |
|     # Build the tarball.
 | |
|     run_gradlew(project_dir)
 | |
|     tarball = get_tarball_path(project_dir)
 | |
|     print("Tarball created at:", tarball)
 | |
|     
 | |
|     # Extract the tarball into a temporary directory.
 | |
|     with tempfile.TemporaryDirectory() as tmp_dir:
 | |
|         extract_tarball(tarball, tmp_dir)
 | |
|         extracted_dirs = os.listdir(tmp_dir)
 | |
|         if not extracted_dirs:
 | |
|             print("Error: No directory found after extraction.")
 | |
|             sys.exit(1)
 | |
|         extracted = os.path.join(tmp_dir, extracted_dirs[0])
 | |
|         print("Tarball extracted to:", extracted)
 | |
|         
 | |
|         # Locate the LICENSE file and libs directory.
 | |
|         license_path = os.path.join(extracted, "LICENSE")
 | |
|         libs_dir = os.path.join(extracted, "libs")
 | |
|         if not os.path.exists(license_path) or not os.path.exists(libs_dir):
 | |
|             print("Error: LICENSE file or libs directory not found in the extracted project.")
 | |
|             sys.exit(1)
 | |
|         
 | |
|         with open(license_path, "r", encoding="utf-8") as f:
 | |
|             license_text = f.read()
 | |
|         
 | |
|         # Get dependency sets.
 | |
|         libs = get_libs_set(libs_dir)
 | |
|         license_deps = get_license_deps(license_text)
 | |
| 
 | |
|         print("\nDependencies from libs (extracted from jar names):")
 | |
|         for dep in sorted(libs):
 | |
|             print(" -", dep)
 | |
|         
 | |
|         print("\nDependencies extracted from LICENSE file:")
 | |
|         for dep in sorted(license_deps):
 | |
|             print(" -", dep)
 | |
|         
 | |
|         # Compare the sets.
 | |
|         missing_in_license = libs - license_deps
 | |
|         extra_in_license = license_deps - libs
 | |
| 
 | |
|         if missing_in_license:
 | |
|             print("\nThe following libs (from ./libs) are missing in the LICENSE file. These should be added to the LICENSE-binary file:")
 | |
|             for dep in sorted(missing_in_license):
 | |
|                 print(" -", dep)
 | |
|         else:
 | |
|             print("\nAll libs from ./libs are present in the LICENSE file.")
 | |
|         
 | |
|         if extra_in_license:
 | |
|             print("\nThe following entries are in the LICENSE file but not present in ./libs. These should be removed from the LICENSE-binary file:")
 | |
|             for dep in sorted(extra_in_license):
 | |
|                 print(" -", dep)
 | |
|         else:
 | |
|             print("\nNo extra dependencies in the LICENSE file.")
 | |
| 
 | |
|         if missing_in_license or extra_in_license:
 | |
|             sys.exit(1)
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 | |
| 
 |