mirror of https://github.com/apache/kafka.git
				
				
				
			
		
			
	
	
		
			142 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
	
	
		
		
			
		
	
	
			142 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
	
	
|  | #!/usr/bin/env python3 | ||
|  | # -*- coding: utf-8 -*- | ||
|  | 
 | ||
|  | # | ||
|  | # Licensed to the Apache Software Foundation (ASF) under one or more | ||
|  | # contributor license agreements.  See the NOTICE file distributed with | ||
|  | # this work for additional information regarding copyright ownership. | ||
|  | # The ASF licenses this file to You under the Apache License, Version 2.0 | ||
|  | # (the "License"); you may not use this file except in compliance with | ||
|  | # the License.  You may obtain a copy of the License at | ||
|  | # | ||
|  | #    http://www.apache.org/licenses/LICENSE-2.0 | ||
|  | # | ||
|  | # Unless required by applicable law or agreed to in writing, software | ||
|  | # distributed under the License is distributed on an "AS IS" BASIS, | ||
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
|  | # See the License for the specific language governing permissions and | ||
|  | # limitations under the License. | ||
|  | # | ||
|  | 
 | ||
|  | import os | ||
|  | import re | ||
|  | import sys | ||
|  | import tarfile | ||
|  | import tempfile | ||
|  | import subprocess | ||
|  | 
 | ||
|  | # Constant: Regex to extract dependency tokens from the LICENSE file. | ||
|  | # Matches lines that start with a dash and then a dependency token of the form: | ||
|  | #   DependencyName-x.y, DependencyName-x.y.z, or DependencyName-x.y.z.w | ||
|  | # Optionally, a trailing suffix (e.g., "-alpha") is captured. | ||
|  | LICENSE_DEP_PATTERN = re.compile( | ||
|  |     r'^\s*-\s*([A-Za-z0-9_.+-]+-[0-9]+\.[0-9]+(?:\.[0-9]+){0,2}(?:[-.][A-Za-z0-9]+)?)', | ||
|  |     re.MULTILINE | ||
|  | ) | ||
|  | 
 | ||
|  | def run_gradlew(project_dir): | ||
|  |     print("Running './gradlew clean releaseTarGz'") | ||
|  |     subprocess.run(["./gradlew", "clean", "releaseTarGz"], check=True, cwd=project_dir) | ||
|  | 
 | ||
|  | def get_tarball_path(project_dir): | ||
|  |     distributions_dir = os.path.join(project_dir, "core", "build", "distributions") | ||
|  |     if not os.path.isdir(distributions_dir): | ||
|  |         print("Error: Distributions directory not found:", distributions_dir) | ||
|  |         sys.exit(1) | ||
|  |      | ||
|  |     pattern = re.compile(r'^kafka_2\.13-.+\.tgz$', re.IGNORECASE) | ||
|  |     candidates = [ | ||
|  |         os.path.join(distributions_dir, f) | ||
|  |         for f in os.listdir(distributions_dir) | ||
|  |         if pattern.match(f) | ||
|  |     ] | ||
|  |     if not candidates: | ||
|  |         print("Error: No tarball matching 'kafka_2.13-*.tgz' found in:", distributions_dir) | ||
|  |         sys.exit(1) | ||
|  |      | ||
|  |     tarball_path = max(candidates, key=os.path.getmtime) | ||
|  |     return tarball_path | ||
|  | 
 | ||
|  | def extract_tarball(tarball, extract_dir): | ||
|  |     with tarfile.open(tarball, "r:gz") as tar: | ||
|  |         # Use a filter to avoid future deprecation warnings. | ||
|  |         tar.extractall(path=extract_dir, filter=lambda tarinfo, dest: tarinfo) | ||
|  |     print("Tarball extracted to:", extract_dir) | ||
|  | 
 | ||
|  | def get_libs_set(libs_dir): | ||
|  |     return { | ||
|  |         fname[:-4] | ||
|  |         for fname in os.listdir(libs_dir) | ||
|  |         if fname.endswith(".jar") and not re.search(r"(kafka|connect|trogdor)", fname, re.IGNORECASE) | ||
|  |     } | ||
|  | 
 | ||
|  | def get_license_deps(license_text): | ||
|  |     return set(LICENSE_DEP_PATTERN.findall(license_text)) | ||
|  | 
 | ||
|  | def main(): | ||
|  |     # Assume the current working directory is the project root. | ||
|  |     project_dir = os.getcwd() | ||
|  |     print("Using project directory:", project_dir) | ||
|  |      | ||
|  |     # Build the tarball. | ||
|  |     run_gradlew(project_dir) | ||
|  |     tarball = get_tarball_path(project_dir) | ||
|  |     print("Tarball created at:", tarball) | ||
|  |      | ||
|  |     # Extract the tarball into a temporary directory. | ||
|  |     with tempfile.TemporaryDirectory() as tmp_dir: | ||
|  |         extract_tarball(tarball, tmp_dir) | ||
|  |         extracted_dirs = os.listdir(tmp_dir) | ||
|  |         if not extracted_dirs: | ||
|  |             print("Error: No directory found after extraction.") | ||
|  |             sys.exit(1) | ||
|  |         extracted = os.path.join(tmp_dir, extracted_dirs[0]) | ||
|  |         print("Tarball extracted to:", extracted) | ||
|  |          | ||
|  |         # Locate the LICENSE file and libs directory. | ||
|  |         license_path = os.path.join(extracted, "LICENSE") | ||
|  |         libs_dir = os.path.join(extracted, "libs") | ||
|  |         if not os.path.exists(license_path) or not os.path.exists(libs_dir): | ||
|  |             print("Error: LICENSE file or libs directory not found in the extracted project.") | ||
|  |             sys.exit(1) | ||
|  |          | ||
|  |         with open(license_path, "r", encoding="utf-8") as f: | ||
|  |             license_text = f.read() | ||
|  |          | ||
|  |         # Get dependency sets. | ||
|  |         libs = get_libs_set(libs_dir) | ||
|  |         license_deps = get_license_deps(license_text) | ||
|  | 
 | ||
|  |         print("\nDependencies from libs (extracted from jar names):") | ||
|  |         for dep in sorted(libs): | ||
|  |             print(" -", dep) | ||
|  |          | ||
|  |         print("\nDependencies extracted from LICENSE file:") | ||
|  |         for dep in sorted(license_deps): | ||
|  |             print(" -", dep) | ||
|  |          | ||
|  |         # Compare the sets. | ||
|  |         missing_in_license = libs - license_deps | ||
|  |         extra_in_license = license_deps - libs | ||
|  | 
 | ||
|  |         if missing_in_license: | ||
|  |             print("\nThe following libs (from ./libs) are missing in the LICENSE file. These should be added to the LICENSE-binary file:") | ||
|  |             for dep in sorted(missing_in_license): | ||
|  |                 print(" -", dep) | ||
|  |         else: | ||
|  |             print("\nAll libs from ./libs are present in the LICENSE file.") | ||
|  |          | ||
|  |         if extra_in_license: | ||
|  |             print("\nThe following entries are in the LICENSE file but not present in ./libs. These should be removed from the LICENSE-binary file:") | ||
|  |             for dep in sorted(extra_in_license): | ||
|  |                 print(" -", dep) | ||
|  |         else: | ||
|  |             print("\nNo extra dependencies in the LICENSE file.") | ||
|  | 
 | ||
|  |         if missing_in_license or extra_in_license: | ||
|  |             sys.exit(1) | ||
|  | 
 | ||
|  | if __name__ == "__main__": | ||
|  |     main() | ||
|  | 
 |