mirror of https://github.com/apache/kafka.git
				
				
				
			
		
			
				
	
	
		
			219 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			219 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			Python
		
	
	
	
| # Licensed to the Apache Software Foundation (ASF) under one or more
 | |
| # contributor license agreements.  See the NOTICE file distributed with
 | |
| # this work for additional information regarding copyright ownership.
 | |
| # The ASF licenses this file to You under the Apache License, Version 2.0
 | |
| # (the "License"); you may not use this file except in compliance with
 | |
| # the License.  You may obtain a copy of the License at
 | |
| #
 | |
| #    http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| 
 | |
| from collections import defaultdict
 | |
| from io import BytesIO
 | |
| import json
 | |
| import logging
 | |
| import os
 | |
| import subprocess
 | |
| import shlex
 | |
| import sys
 | |
| import tempfile
 | |
| import textwrap
 | |
| from typing import Dict, Optional, TextIO
 | |
| 
 | |
| logger = logging.getLogger()
 | |
| logger.setLevel(logging.DEBUG)
 | |
| handler = logging.StreamHandler(sys.stderr)
 | |
| handler.setLevel(logging.DEBUG)
 | |
| logger.addHandler(handler)
 | |
| 
 | |
| ok = "✅"
 | |
| err = "❌"
 | |
| 
 | |
| 
 | |
| def get_env(key: str, fn = str) -> Optional:
 | |
|     value = os.getenv(key)
 | |
|     if value is None:
 | |
|         logger.debug(f"Could not find env {key}")
 | |
|         return None
 | |
|     else:
 | |
|         logger.debug(f"Read env {key}: {value}")
 | |
|         return fn(value)
 | |
| 
 | |
| 
 | |
| def has_approval(reviews) -> bool:
 | |
|     approved = False
 | |
|     for review in reviews:
 | |
|         if review.get("authorAssociation") not in ("MEMBER", "OWNER"):
 | |
|             continue
 | |
|         if review.get("state") == "APPROVED":
 | |
|             approved = True
 | |
|     return approved
 | |
| 
 | |
| 
 | |
| def write_commit(io: TextIO, title: str, body: str):
 | |
|     io.write(title.encode())
 | |
|     io.write(b"\n\n")
 | |
|     io.write(body.encode())
 | |
|     io.flush()
 | |
| 
 | |
| 
 | |
| def parse_trailers(title, body) -> Dict:
 | |
|     trailers = defaultdict(list)
 | |
| 
 | |
|     with tempfile.NamedTemporaryFile() as fp:
 | |
|         write_commit(fp, title, body)
 | |
|         cmd = f"git interpret-trailers --trim-empty --parse {fp.name}"
 | |
|         p = subprocess.run(shlex.split(cmd), capture_output=True)
 | |
|         fp.close()
 | |
| 
 | |
|     for line in p.stdout.decode().splitlines():
 | |
|         key, value = line.split(":", 1)
 | |
|         trailers[key].append(value.strip())
 | |
| 
 | |
|     return trailers
 | |
| 
 | |
| 
 | |
| def split_paragraphs(text: str):
 | |
|     """
 | |
|     Split the given text into a generator of paragraph lines and a boolean "markdown" flag.
 | |
| 
 | |
|     If any line of a paragraph starts with a markdown character, we will assume the whole paragraph
 | |
|     contains markdown.
 | |
|     """
 | |
|     lines = text.splitlines(keepends=True)
 | |
|     paragraph = []
 | |
|     markdown = False
 | |
|     for line in lines:
 | |
|         if line.strip() == "":
 | |
|             if len(paragraph) > 0:
 | |
|                 yield paragraph, markdown
 | |
|                 paragraph.clear()
 | |
|                 markdown = False
 | |
|         else:
 | |
|             if line[0] in ("#", "*", "-", "=") or line[0].isdigit():
 | |
|                 markdown = True
 | |
|             if "```" in line:
 | |
|                 markdown = True
 | |
|             paragraph.append(line)
 | |
|     yield paragraph, markdown
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     """
 | |
|     This script performs some basic linting of our PR titles and body. The PR number is read from the PR_NUMBER
 | |
|     environment variable. Since this script expects to run on a GHA runner, it expects the "gh" tool to be installed.
 | |
|     
 | |
|     The STDOUT from this script is used as the status check message. It should not be too long. Use the logger for
 | |
|     any necessary logging.
 | |
|     
 | |
|     Title checks:
 | |
|     * Not too short (at least 15 characters)
 | |
|     * Not too long (at most 120 characters)
 | |
|     * Not truncated (ending with ...)
 | |
|     * Starts with "KAFKA-", "MINOR", or "HOTFIX"
 | |
|     
 | |
|     Body checks:
 | |
|     * Is not empty
 | |
|     * Has "Reviewers:" trailer if the PR is approved
 | |
|     """
 | |
| 
 | |
|     pr_number = get_env("PR_NUMBER")
 | |
|     cmd = f"gh pr view {pr_number} --json 'title,body,reviews'"
 | |
|     p = subprocess.run(shlex.split(cmd), capture_output=True)
 | |
|     if p.returncode != 0:
 | |
|         logger.error(f"GitHub CLI failed with exit code {p.returncode}.\nSTDOUT: {p.stdout.decode()}\nSTDERR:{p.stderr.decode()}")
 | |
|         exit(1)
 | |
| 
 | |
|     gh_json = json.loads(p.stdout)
 | |
|     title = gh_json["title"]
 | |
|     body = gh_json["body"]
 | |
|     reviews = gh_json["reviews"]
 | |
| 
 | |
|     checks = [] # (bool (0=ok, 1=error), message)
 | |
| 
 | |
|     def check(positive_assertion, ok_msg, err_msg):
 | |
|         if positive_assertion:
 | |
|             checks.append((0, f"{ok} {ok_msg}"))
 | |
|         else:
 | |
|             checks.append((1, f"{err} {err_msg}"))
 | |
| 
 | |
|     # Check title
 | |
|     check(not title.endswith("..."), "Title is not truncated", "Title appears truncated (ends with ...)")
 | |
|     check(len(title) >= 15, "Title is not too short", "Title is too short (under 15 characters)")
 | |
|     check(len(title) <= 120, "Title is not too long", "Title is too long (over 120 characters)")
 | |
|     ok_prefix = title.startswith("KAFKA-") or title.startswith("MINOR") or title.startswith("HOTFIX")
 | |
|     check(ok_prefix, "Title has expected KAFKA/MINOR/HOTFIX", "Title is missing KAFKA-XXXXX or MINOR/HOTFIX prefix")
 | |
| 
 | |
|     # Check body
 | |
|     check(len(body) != 0, "Body is not empty", "Body is empty")
 | |
|     check("Delete this text and replace" not in body, "PR template text not present", "PR template text should be removed")
 | |
|     check("Committer Checklist" not in body, "PR template text not present", "Old PR template text should be removed")
 | |
| 
 | |
|     paragraph_iter = split_paragraphs(body)
 | |
|     new_paragraphs = []
 | |
|     for p, markdown in paragraph_iter:
 | |
|         if markdown:
 | |
|             # If a paragraph looks like it has markdown in it, wrap each line separately.
 | |
|             new_lines = []
 | |
|             for line in p:
 | |
|                 new_lines.append(textwrap.fill(line, width=72, break_long_words=False, break_on_hyphens=False, replace_whitespace=False))
 | |
|             rewrapped_p = "\n".join(new_lines)
 | |
|         else:
 | |
|             indent = ""
 | |
|             if len(p) > 0 and p[0].startswith("Reviewers:"):
 | |
|                 indent = " "
 | |
|             rewrapped_p = textwrap.fill("".join(p), subsequent_indent=indent, width=72, break_long_words=False, break_on_hyphens=False, replace_whitespace=True)
 | |
|         new_paragraphs.append(rewrapped_p + "\n")
 | |
|     body = "\n".join(new_paragraphs)
 | |
| 
 | |
|     if get_env("GITHUB_ACTIONS"):
 | |
|         with tempfile.NamedTemporaryFile() as fp:
 | |
|             fp.write(body.encode())
 | |
|             fp.flush()
 | |
|             cmd = f"gh pr edit {pr_number} --body-file {fp.name}"
 | |
|             p = subprocess.run(shlex.split(cmd), capture_output=True)
 | |
|             fp.close()
 | |
|             if p.returncode != 0:
 | |
|                 logger.error(f"Could not update PR {pr_number}. STDOUT: {p.stdout.decode()}")
 | |
|     else:
 | |
|         logger.info(f"Not reformatting {pr_number} since this is not running on GitHub Actions.")
 | |
| 
 | |
|     # Check for Reviewers
 | |
|     approved = has_approval(reviews)
 | |
|     if approved:
 | |
|         trailers = parse_trailers(title, body)
 | |
|         reviewers_in_body = trailers.get("Reviewers", [])
 | |
|         check(len(reviewers_in_body) > 0, "Found 'Reviewers' in commit body", "Pull Request is approved, but no 'Reviewers' found in commit body")
 | |
|         if len(reviewers_in_body) > 0:
 | |
|             for reviewer_in_body in reviewers_in_body:
 | |
|                 logger.debug(reviewer_in_body)
 | |
| 
 | |
|     logger.debug("Commit will look like:\n")
 | |
|     logger.debug("<pre>")
 | |
|     io = BytesIO()
 | |
|     title += f" (#{pr_number})"
 | |
|     write_commit(io, title, body)
 | |
|     io.seek(0)
 | |
|     logger.debug(io.read().decode())
 | |
|     logger.debug("</pre>\n")
 | |
| 
 | |
|     exit_code = 0
 | |
|     logger.debug("Validation results:")
 | |
|     for err, msg in checks:
 | |
|         logger.debug(f"* {msg}")
 | |
| 
 | |
|     for err, msg in checks:
 | |
|         # Just output the first error for the status message. STDOUT becomes the status check message
 | |
|         if err:
 | |
|             print(msg)
 | |
|             exit(1)
 | |
| 
 | |
|     logger.debug("No validation errors, PR format looks good!")
 | |
|     print("PR format looks good!")
 | |
|     exit(0)
 |