Path: blob/master/tools/lib/python/kdoc/parse_data_structs.py
38186 views
#!/usr/bin/env python31# SPDX-License-Identifier: GPL-2.02# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <[email protected]>.3# pylint: disable=R0912,R091545"""6Parse a source file or header, creating ReStructured Text cross references.78It accepts an optional file to change the default symbol reference or to9suppress symbols from the output.1011It is capable of identifying defines, functions, structs, typedefs,12enums and enum symbols and create cross-references for all of them.13It is also capable of distinguish #define used for specifying a Linux14ioctl.1516The optional rules file contains a set of rules like:1718ignore ioctl VIDIOC_ENUM_FMT19replace ioctl VIDIOC_DQBUF vidioc_qbuf20replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`21"""2223import os24import re25import sys262728class ParseDataStructs:29"""30Creates an enriched version of a Kernel header file with cross-links31to each C data structure type.3233It is meant to allow having a more comprehensive documentation, where34uAPI headers will create cross-reference links to the code.3536It is capable of identifying defines, functions, structs, typedefs,37enums and enum symbols and create cross-references for all of them.38It is also capable of distinguish #define used for specifying a Linux39ioctl.4041By default, it create rules for all symbols and defines, but it also42allows parsing an exception file. Such file contains a set of rules43using the syntax below:44451. Ignore rules:4647ignore <type> <symbol>`4849Removes the symbol from reference generation.50512. Replace rules:5253replace <type> <old_symbol> <new_reference>5455Replaces how old_symbol with a new reference. The new_reference can be:5657- A simple symbol name;58- A full Sphinx reference.59603. Namespace rules6162namespace <namespace>6364Sets C namespace to be used during cross-reference generation. Can65be overridden by replace rules.6667On ignore and replace rules, <type> can be:68- ioctl: for defines that end with _IO*, e.g. ioctl definitions69- define: for other defines70- symbol: for symbols defined within enums;71- typedef: for typedefs;72- enum: for the name of a non-anonymous enum;73- struct: for structs.7475Examples:7677ignore define __LINUX_MEDIA_H78ignore ioctl VIDIOC_ENUM_FMT79replace ioctl VIDIOC_DQBUF vidioc_qbuf80replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`8182namespace MC83"""8485# Parser regexes with multiple ways to capture enums and structs86RE_ENUMS = [87re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),88re.compile(r"^\s*enum\s+([\w_]+)\s*$"),89re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),90re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),91]92RE_STRUCTS = [93re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),94re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),95re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),96re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),97]9899# FIXME: the original code was written a long time before Sphinx C100# domain to have multiple namespaces. To avoid to much turn at the101# existing hyperlinks, the code kept using "c:type" instead of the102# right types. To change that, we need to change the types not only103# here, but also at the uAPI media documentation.104DEF_SYMBOL_TYPES = {105"ioctl": {106"prefix": "\\ ",107"suffix": "\\ ",108"ref_type": ":ref",109"description": "IOCTL Commands",110},111"define": {112"prefix": "\\ ",113"suffix": "\\ ",114"ref_type": ":ref",115"description": "Macros and Definitions",116},117# We're calling each definition inside an enum as "symbol"118"symbol": {119"prefix": "\\ ",120"suffix": "\\ ",121"ref_type": ":ref",122"description": "Enumeration values",123},124"typedef": {125"prefix": "\\ ",126"suffix": "\\ ",127"ref_type": ":c:type",128"description": "Type Definitions",129},130# This is the description of the enum itself131"enum": {132"prefix": "\\ ",133"suffix": "\\ ",134"ref_type": ":c:type",135"description": "Enumerations",136},137"struct": {138"prefix": "\\ ",139"suffix": "\\ ",140"ref_type": ":c:type",141"description": "Structures",142},143}144145def __init__(self, debug: bool = False):146"""Initialize internal vars"""147self.debug = debug148self.data = ""149150self.symbols = {}151152self.namespace = None153self.ignore = []154self.replace = []155156for symbol_type in self.DEF_SYMBOL_TYPES:157self.symbols[symbol_type] = {}158159def read_exceptions(self, fname: str):160if not fname:161return162163name = os.path.basename(fname)164165with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:166for ln, line in enumerate(f):167ln += 1168line = line.strip()169if not line or line.startswith("#"):170continue171172# ignore rules173match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)174175if match:176self.ignore.append((ln, match.group(1), match.group(2)))177continue178179# replace rules180match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)181if match:182self.replace.append((ln, match.group(1), match.group(2),183match.group(3)))184continue185186match = re.match(r"^namespace\s+(\S+)", line)187if match:188self.namespace = match.group(1)189continue190191sys.exit(f"{name}:{ln}: invalid line: {line}")192193def apply_exceptions(self):194"""195Process exceptions file with rules to ignore or replace references.196"""197198# Handle ignore rules199for ln, c_type, symbol in self.ignore:200if c_type not in self.DEF_SYMBOL_TYPES:201sys.exit(f"{name}:{ln}: {c_type} is invalid")202203d = self.symbols[c_type]204if symbol in d:205del d[symbol]206207# Handle replace rules208for ln, c_type, old, new in self.replace:209if c_type not in self.DEF_SYMBOL_TYPES:210sys.exit(f"{name}:{ln}: {c_type} is invalid")211212reftype = None213214# Parse reference type when the type is specified215216match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new)217if match:218reftype = f":c:{match.group(1)}"219new = match.group(2)220else:221match = re.search(r"(\:ref)\:\`(.+)\`", new)222if match:223reftype = match.group(1)224new = match.group(2)225226# If the replacement rule doesn't have a type, get default227if not reftype:228reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")229if not reftype:230reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")231232new_ref = f"{reftype}:`{old} <{new}>`"233234# Change self.symbols to use the replacement rule235if old in self.symbols[c_type]:236(_, ln) = self.symbols[c_type][old]237self.symbols[c_type][old] = (new_ref, ln)238else:239print(f"{name}:{ln}: Warning: can't find {old} {c_type}")240241def store_type(self, ln, symbol_type: str, symbol: str,242ref_name: str = None, replace_underscores: bool = True):243"""244Stores a new symbol at self.symbols under symbol_type.245246By default, underscores are replaced by "-"247"""248defs = self.DEF_SYMBOL_TYPES[symbol_type]249250prefix = defs.get("prefix", "")251suffix = defs.get("suffix", "")252ref_type = defs.get("ref_type")253254# Determine ref_link based on symbol type255if ref_type or self.namespace:256if not ref_name:257ref_name = symbol.lower()258259# c-type references don't support hash260if ref_type == ":ref" and replace_underscores:261ref_name = ref_name.replace("_", "-")262263# C domain references may have namespaces264if ref_type.startswith(":c:"):265if self.namespace:266ref_name = f"{self.namespace}.{ref_name}"267268if ref_type:269ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"270else:271ref_link = f"`{symbol} <{ref_name}>`"272else:273ref_link = symbol274275self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln)276277def store_line(self, line):278"""Stores a line at self.data, properly indented"""279line = " " + line.expandtabs()280self.data += line.rstrip(" ")281282def parse_file(self, file_in: str, exceptions: str = None):283"""Reads a C source file and get identifiers"""284self.data = ""285is_enum = False286is_comment = False287multiline = ""288289self.read_exceptions(exceptions)290291with open(file_in, "r",292encoding="utf-8", errors="backslashreplace") as f:293for line_no, line in enumerate(f):294self.store_line(line)295line = line.strip("\n")296297# Handle continuation lines298if line.endswith(r"\\"):299multiline += line[-1]300continue301302if multiline:303line = multiline + line304multiline = ""305306# Handle comments. They can be multilined307if not is_comment:308if re.search(r"/\*.*", line):309is_comment = True310else:311# Strip C99-style comments312line = re.sub(r"(//.*)", "", line)313314if is_comment:315if re.search(r".*\*/", line):316is_comment = False317else:318multiline = line319continue320321# At this point, line variable may be a multilined statement,322# if lines end with \ or if they have multi-line comments323# With that, it can safely remove the entire comments,324# and there's no need to use re.DOTALL for the logic below325326line = re.sub(r"(/\*.*\*/)", "", line)327if not line.strip():328continue329330# It can be useful for debug purposes to print the file after331# having comments stripped and multi-lines grouped.332if self.debug > 1:333print(f"line {line_no + 1}: {line}")334335# Now the fun begins: parse each type and store it.336337# We opted for a two parsing logic here due to:338# 1. it makes easier to debug issues not-parsed symbols;339# 2. we want symbol replacement at the entire content, not340# just when the symbol is detected.341342if is_enum:343match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)344if match:345self.store_type(line_no, "symbol", match.group(1))346if "}" in line:347is_enum = False348continue349350match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)351if match:352self.store_type(line_no, "ioctl", match.group(1),353replace_underscores=False)354continue355356match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)357if match:358self.store_type(line_no, "define", match.group(1))359continue360361match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",362line)363if match:364name = match.group(2).strip()365symbol = match.group(3)366self.store_type(line_no, "typedef", symbol, ref_name=name)367continue368369for re_enum in self.RE_ENUMS:370match = re_enum.match(line)371if match:372self.store_type(line_no, "enum", match.group(1))373is_enum = True374break375376for re_struct in self.RE_STRUCTS:377match = re_struct.match(line)378if match:379self.store_type(line_no, "struct", match.group(1))380break381382self.apply_exceptions()383384def debug_print(self):385"""386Print debug information containing the replacement rules per symbol.387To make easier to check, group them per type.388"""389if not self.debug:390return391392for c_type, refs in self.symbols.items():393if not refs: # Skip empty dictionaries394continue395396print(f"{c_type}:")397398for symbol, (ref, ln) in sorted(refs.items()):399print(f" #{ln:<5d} {symbol} -> {ref}")400401print()402403def gen_output(self):404"""Write the formatted output to a file."""405406# Avoid extra blank lines407text = re.sub(r"\s+$", "", self.data) + "\n"408text = re.sub(r"\n\s+\n", "\n\n", text)409410# Escape Sphinx special characters411text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)412413# Source uAPI files may have special notes. Use bold font for them414text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)415416# Delimiters to catch the entire symbol after escaped417start_delim = r"([ \n\t\(=\*\@])"418end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"419420# Process all reference types421for ref_dict in self.symbols.values():422for symbol, (replacement, _) in ref_dict.items():423symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))424text = re.sub(fr'{start_delim}{symbol}{end_delim}',425fr'\1{replacement}\2', text)426427# Remove "\ " where not needed: before spaces and at the end of lines428text = re.sub(r"\\ ([\n ])", r"\1", text)429text = re.sub(r" \\ ", " ", text)430431return text432433def gen_toc(self):434"""435Create a list of symbols to be part of a TOC contents table436"""437text = []438439# Sort symbol types per description440symbol_descriptions = []441for k, v in self.DEF_SYMBOL_TYPES.items():442symbol_descriptions.append((v['description'], k))443444symbol_descriptions.sort()445446# Process each category447for description, c_type in symbol_descriptions:448449refs = self.symbols[c_type]450if not refs: # Skip empty categories451continue452453text.append(f"{description}")454text.append("-" * len(description))455text.append("")456457# Sort symbols alphabetically458for symbol, (ref, ln) in sorted(refs.items()):459text.append(f"- LINENO_{ln}: {ref}")460461text.append("") # Add empty line between categories462463return "\n".join(text)464465def write_output(self, file_in: str, file_out: str, toc: bool):466title = os.path.basename(file_in)467468if toc:469text = self.gen_toc()470else:471text = self.gen_output()472473with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:474f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")475f.write(f"{title}\n")476f.write("=" * len(title) + "\n\n")477478if not toc:479f.write(".. parsed-literal::\n\n")480481f.write(text)482483484