Path: blob/master/misc/scripts/validate_codeowners.py
45997 views
#!/usr/bin/env python312if __name__ != "__main__":3raise SystemExit(f'Utility script "{__file__}" should not be used as a module!')45import argparse6import re7import subprocess8import sys910sys.path.insert(0, "./")1112try:13from methods import print_error, print_info14except ImportError:15raise SystemExit(f"Utility script {__file__} must be run from repository root!")161718def glob_to_regex(glob: str) -> re.Pattern[str]:19"""Convert a CODEOWNERS glob to a RegEx pattern."""2021# Heavily inspired by: https://github.com/hmarr/codeowners/blob/main/match.go2223# Handle specific edgecases first.24if "***" in glob:25raise SyntaxError("Pattern cannot contain three consecutive asterisks")26if glob == "/":27raise SyntaxError('Standalone "/" will not match anything')28if not glob:29raise ValueError("Empty pattern")3031segments = glob.split("/")32if not segments[0]:33# Leading slash; relative to root.34segments = segments[1:]35else:36# Check for single-segment pattern, which matches relative to any descendent path.37# This is equivalent to a leading `**/`.38if len(segments) == 1 or (len(segments) == 2 and not segments[1]):39if segments[0] != "**":40segments.insert(0, "**")4142if len(segments) > 1 and not segments[-1]:43# A trailing slash is equivalent to `/**`.44segments[-1] = "**"4546last_index = len(segments) - 147need_slash = False48pattern = r"\A"4950for index, segment in enumerate(segments):51if segment == "**":52if index == 0 and index == last_index:53pattern += r".+" # Pattern is just `**`; match everything.54elif index == 0:55pattern += r"(?:.+/)?" # Pattern starts with `**`; match any leading path segment.56need_slash = False57elif index == last_index:58pattern += r"/.*" # Pattern ends with `**`; match any trailing path segment.59else:60pattern += r"(?:/.+)?" # Pattern contains `**`; match zero or more path segments.61need_slash = True6263elif segment == "*":64if need_slash:65pattern += "/"66# Regular wildcard; match any non-separator characters.67pattern += r"[^/]+"68need_slash = True6970else:71if need_slash:72pattern += "/"7374escape = False75for char in segment:76if escape:77escape = False78pattern += re.escape(char)79continue80elif char == "\\":81escape = True82elif char == "*":83# Multi-character wildcard.84pattern += r"[^/]*"85elif char == "?":86# Single-character wildcard.87pattern += r"[^/]"88else:89# Regular character90pattern += re.escape(char)9192if index == last_index:93pattern += r"(?:/.*)?" # No trailing slash; match descendent paths.94need_slash = True9596pattern += r"\Z"97return re.compile(pattern)9899100RE_CODEOWNERS = re.compile(r"^(?P<code>[^#](?:\\ |[^\s])+) +(?P<owners>(?:[^#][^\s]+ ?)+)")101102103def parse_codeowners() -> list[tuple[re.Pattern[str], list[str]]]:104codeowners = []105with open(".github/CODEOWNERS", encoding="utf-8", newline="\n") as file:106for line in reversed(file.readlines()): # Lower items have higher precedence.107if match := RE_CODEOWNERS.match(line):108codeowners.append((glob_to_regex(match["code"]), match["owners"].split()))109return codeowners110111112def main() -> int:113parser = argparse.ArgumentParser(description="Utility script for validating CODEOWNERS assignment.")114parser.add_argument("files", nargs="*", help="A list of files to validate. If excluded, checks all owned files.")115parser.add_argument("-u", "--unowned", action="store_true", help="Only output files without an owner.")116args = parser.parse_args()117118files: list[str] = args.files119if not files:120files = subprocess.run(["git", "ls-files"], text=True, capture_output=True).stdout.splitlines()121122ret = 0123codeowners = parse_codeowners()124125for file in files:126matched = False127for code, owners in codeowners:128if code.match(file):129matched = True130if not args.unowned:131print_info(f"{file}: {owners}")132break133if not matched:134print_error(f"{file}: <UNOWNED>")135ret += 1136137return ret138139140try:141raise SystemExit(main())142except KeyboardInterrupt:143import os144import signal145146signal.signal(signal.SIGINT, signal.SIG_DFL)147os.kill(os.getpid(), signal.SIGINT)148149150