Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/lib/python/kdoc/parse_data_structs.py
38186 views
1
#!/usr/bin/env python3
2
# SPDX-License-Identifier: GPL-2.0
3
# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <[email protected]>.
4
# pylint: disable=R0912,R0915
5
6
"""
7
Parse a source file or header, creating ReStructured Text cross references.
8
9
It accepts an optional file to change the default symbol reference or to
10
suppress symbols from the output.
11
12
It is capable of identifying defines, functions, structs, typedefs,
13
enums and enum symbols and create cross-references for all of them.
14
It is also capable of distinguish #define used for specifying a Linux
15
ioctl.
16
17
The optional rules file contains a set of rules like:
18
19
ignore ioctl VIDIOC_ENUM_FMT
20
replace ioctl VIDIOC_DQBUF vidioc_qbuf
21
replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
22
"""
23
24
import os
25
import re
26
import sys
27
28
29
class ParseDataStructs:
30
"""
31
Creates an enriched version of a Kernel header file with cross-links
32
to each C data structure type.
33
34
It is meant to allow having a more comprehensive documentation, where
35
uAPI headers will create cross-reference links to the code.
36
37
It is capable of identifying defines, functions, structs, typedefs,
38
enums and enum symbols and create cross-references for all of them.
39
It is also capable of distinguish #define used for specifying a Linux
40
ioctl.
41
42
By default, it create rules for all symbols and defines, but it also
43
allows parsing an exception file. Such file contains a set of rules
44
using the syntax below:
45
46
1. Ignore rules:
47
48
ignore <type> <symbol>`
49
50
Removes the symbol from reference generation.
51
52
2. Replace rules:
53
54
replace <type> <old_symbol> <new_reference>
55
56
Replaces how old_symbol with a new reference. The new_reference can be:
57
58
- A simple symbol name;
59
- A full Sphinx reference.
60
61
3. Namespace rules
62
63
namespace <namespace>
64
65
Sets C namespace to be used during cross-reference generation. Can
66
be overridden by replace rules.
67
68
On ignore and replace rules, <type> can be:
69
- ioctl: for defines that end with _IO*, e.g. ioctl definitions
70
- define: for other defines
71
- symbol: for symbols defined within enums;
72
- typedef: for typedefs;
73
- enum: for the name of a non-anonymous enum;
74
- struct: for structs.
75
76
Examples:
77
78
ignore define __LINUX_MEDIA_H
79
ignore ioctl VIDIOC_ENUM_FMT
80
replace ioctl VIDIOC_DQBUF vidioc_qbuf
81
replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
82
83
namespace MC
84
"""
85
86
# Parser regexes with multiple ways to capture enums and structs
87
RE_ENUMS = [
88
re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),
89
re.compile(r"^\s*enum\s+([\w_]+)\s*$"),
90
re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),
91
re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),
92
]
93
RE_STRUCTS = [
94
re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),
95
re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),
96
re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),
97
re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),
98
]
99
100
# FIXME: the original code was written a long time before Sphinx C
101
# domain to have multiple namespaces. To avoid to much turn at the
102
# existing hyperlinks, the code kept using "c:type" instead of the
103
# right types. To change that, we need to change the types not only
104
# here, but also at the uAPI media documentation.
105
DEF_SYMBOL_TYPES = {
106
"ioctl": {
107
"prefix": "\\ ",
108
"suffix": "\\ ",
109
"ref_type": ":ref",
110
"description": "IOCTL Commands",
111
},
112
"define": {
113
"prefix": "\\ ",
114
"suffix": "\\ ",
115
"ref_type": ":ref",
116
"description": "Macros and Definitions",
117
},
118
# We're calling each definition inside an enum as "symbol"
119
"symbol": {
120
"prefix": "\\ ",
121
"suffix": "\\ ",
122
"ref_type": ":ref",
123
"description": "Enumeration values",
124
},
125
"typedef": {
126
"prefix": "\\ ",
127
"suffix": "\\ ",
128
"ref_type": ":c:type",
129
"description": "Type Definitions",
130
},
131
# This is the description of the enum itself
132
"enum": {
133
"prefix": "\\ ",
134
"suffix": "\\ ",
135
"ref_type": ":c:type",
136
"description": "Enumerations",
137
},
138
"struct": {
139
"prefix": "\\ ",
140
"suffix": "\\ ",
141
"ref_type": ":c:type",
142
"description": "Structures",
143
},
144
}
145
146
def __init__(self, debug: bool = False):
147
"""Initialize internal vars"""
148
self.debug = debug
149
self.data = ""
150
151
self.symbols = {}
152
153
self.namespace = None
154
self.ignore = []
155
self.replace = []
156
157
for symbol_type in self.DEF_SYMBOL_TYPES:
158
self.symbols[symbol_type] = {}
159
160
def read_exceptions(self, fname: str):
161
if not fname:
162
return
163
164
name = os.path.basename(fname)
165
166
with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
167
for ln, line in enumerate(f):
168
ln += 1
169
line = line.strip()
170
if not line or line.startswith("#"):
171
continue
172
173
# ignore rules
174
match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
175
176
if match:
177
self.ignore.append((ln, match.group(1), match.group(2)))
178
continue
179
180
# replace rules
181
match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
182
if match:
183
self.replace.append((ln, match.group(1), match.group(2),
184
match.group(3)))
185
continue
186
187
match = re.match(r"^namespace\s+(\S+)", line)
188
if match:
189
self.namespace = match.group(1)
190
continue
191
192
sys.exit(f"{name}:{ln}: invalid line: {line}")
193
194
def apply_exceptions(self):
195
"""
196
Process exceptions file with rules to ignore or replace references.
197
"""
198
199
# Handle ignore rules
200
for ln, c_type, symbol in self.ignore:
201
if c_type not in self.DEF_SYMBOL_TYPES:
202
sys.exit(f"{name}:{ln}: {c_type} is invalid")
203
204
d = self.symbols[c_type]
205
if symbol in d:
206
del d[symbol]
207
208
# Handle replace rules
209
for ln, c_type, old, new in self.replace:
210
if c_type not in self.DEF_SYMBOL_TYPES:
211
sys.exit(f"{name}:{ln}: {c_type} is invalid")
212
213
reftype = None
214
215
# Parse reference type when the type is specified
216
217
match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new)
218
if match:
219
reftype = f":c:{match.group(1)}"
220
new = match.group(2)
221
else:
222
match = re.search(r"(\:ref)\:\`(.+)\`", new)
223
if match:
224
reftype = match.group(1)
225
new = match.group(2)
226
227
# If the replacement rule doesn't have a type, get default
228
if not reftype:
229
reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
230
if not reftype:
231
reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
232
233
new_ref = f"{reftype}:`{old} <{new}>`"
234
235
# Change self.symbols to use the replacement rule
236
if old in self.symbols[c_type]:
237
(_, ln) = self.symbols[c_type][old]
238
self.symbols[c_type][old] = (new_ref, ln)
239
else:
240
print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
241
242
def store_type(self, ln, symbol_type: str, symbol: str,
243
ref_name: str = None, replace_underscores: bool = True):
244
"""
245
Stores a new symbol at self.symbols under symbol_type.
246
247
By default, underscores are replaced by "-"
248
"""
249
defs = self.DEF_SYMBOL_TYPES[symbol_type]
250
251
prefix = defs.get("prefix", "")
252
suffix = defs.get("suffix", "")
253
ref_type = defs.get("ref_type")
254
255
# Determine ref_link based on symbol type
256
if ref_type or self.namespace:
257
if not ref_name:
258
ref_name = symbol.lower()
259
260
# c-type references don't support hash
261
if ref_type == ":ref" and replace_underscores:
262
ref_name = ref_name.replace("_", "-")
263
264
# C domain references may have namespaces
265
if ref_type.startswith(":c:"):
266
if self.namespace:
267
ref_name = f"{self.namespace}.{ref_name}"
268
269
if ref_type:
270
ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
271
else:
272
ref_link = f"`{symbol} <{ref_name}>`"
273
else:
274
ref_link = symbol
275
276
self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln)
277
278
def store_line(self, line):
279
"""Stores a line at self.data, properly indented"""
280
line = " " + line.expandtabs()
281
self.data += line.rstrip(" ")
282
283
def parse_file(self, file_in: str, exceptions: str = None):
284
"""Reads a C source file and get identifiers"""
285
self.data = ""
286
is_enum = False
287
is_comment = False
288
multiline = ""
289
290
self.read_exceptions(exceptions)
291
292
with open(file_in, "r",
293
encoding="utf-8", errors="backslashreplace") as f:
294
for line_no, line in enumerate(f):
295
self.store_line(line)
296
line = line.strip("\n")
297
298
# Handle continuation lines
299
if line.endswith(r"\\"):
300
multiline += line[-1]
301
continue
302
303
if multiline:
304
line = multiline + line
305
multiline = ""
306
307
# Handle comments. They can be multilined
308
if not is_comment:
309
if re.search(r"/\*.*", line):
310
is_comment = True
311
else:
312
# Strip C99-style comments
313
line = re.sub(r"(//.*)", "", line)
314
315
if is_comment:
316
if re.search(r".*\*/", line):
317
is_comment = False
318
else:
319
multiline = line
320
continue
321
322
# At this point, line variable may be a multilined statement,
323
# if lines end with \ or if they have multi-line comments
324
# With that, it can safely remove the entire comments,
325
# and there's no need to use re.DOTALL for the logic below
326
327
line = re.sub(r"(/\*.*\*/)", "", line)
328
if not line.strip():
329
continue
330
331
# It can be useful for debug purposes to print the file after
332
# having comments stripped and multi-lines grouped.
333
if self.debug > 1:
334
print(f"line {line_no + 1}: {line}")
335
336
# Now the fun begins: parse each type and store it.
337
338
# We opted for a two parsing logic here due to:
339
# 1. it makes easier to debug issues not-parsed symbols;
340
# 2. we want symbol replacement at the entire content, not
341
# just when the symbol is detected.
342
343
if is_enum:
344
match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
345
if match:
346
self.store_type(line_no, "symbol", match.group(1))
347
if "}" in line:
348
is_enum = False
349
continue
350
351
match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
352
if match:
353
self.store_type(line_no, "ioctl", match.group(1),
354
replace_underscores=False)
355
continue
356
357
match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
358
if match:
359
self.store_type(line_no, "define", match.group(1))
360
continue
361
362
match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
363
line)
364
if match:
365
name = match.group(2).strip()
366
symbol = match.group(3)
367
self.store_type(line_no, "typedef", symbol, ref_name=name)
368
continue
369
370
for re_enum in self.RE_ENUMS:
371
match = re_enum.match(line)
372
if match:
373
self.store_type(line_no, "enum", match.group(1))
374
is_enum = True
375
break
376
377
for re_struct in self.RE_STRUCTS:
378
match = re_struct.match(line)
379
if match:
380
self.store_type(line_no, "struct", match.group(1))
381
break
382
383
self.apply_exceptions()
384
385
def debug_print(self):
386
"""
387
Print debug information containing the replacement rules per symbol.
388
To make easier to check, group them per type.
389
"""
390
if not self.debug:
391
return
392
393
for c_type, refs in self.symbols.items():
394
if not refs: # Skip empty dictionaries
395
continue
396
397
print(f"{c_type}:")
398
399
for symbol, (ref, ln) in sorted(refs.items()):
400
print(f" #{ln:<5d} {symbol} -> {ref}")
401
402
print()
403
404
def gen_output(self):
405
"""Write the formatted output to a file."""
406
407
# Avoid extra blank lines
408
text = re.sub(r"\s+$", "", self.data) + "\n"
409
text = re.sub(r"\n\s+\n", "\n\n", text)
410
411
# Escape Sphinx special characters
412
text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)
413
414
# Source uAPI files may have special notes. Use bold font for them
415
text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)
416
417
# Delimiters to catch the entire symbol after escaped
418
start_delim = r"([ \n\t\(=\*\@])"
419
end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"
420
421
# Process all reference types
422
for ref_dict in self.symbols.values():
423
for symbol, (replacement, _) in ref_dict.items():
424
symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
425
text = re.sub(fr'{start_delim}{symbol}{end_delim}',
426
fr'\1{replacement}\2', text)
427
428
# Remove "\ " where not needed: before spaces and at the end of lines
429
text = re.sub(r"\\ ([\n ])", r"\1", text)
430
text = re.sub(r" \\ ", " ", text)
431
432
return text
433
434
def gen_toc(self):
435
"""
436
Create a list of symbols to be part of a TOC contents table
437
"""
438
text = []
439
440
# Sort symbol types per description
441
symbol_descriptions = []
442
for k, v in self.DEF_SYMBOL_TYPES.items():
443
symbol_descriptions.append((v['description'], k))
444
445
symbol_descriptions.sort()
446
447
# Process each category
448
for description, c_type in symbol_descriptions:
449
450
refs = self.symbols[c_type]
451
if not refs: # Skip empty categories
452
continue
453
454
text.append(f"{description}")
455
text.append("-" * len(description))
456
text.append("")
457
458
# Sort symbols alphabetically
459
for symbol, (ref, ln) in sorted(refs.items()):
460
text.append(f"- LINENO_{ln}: {ref}")
461
462
text.append("") # Add empty line between categories
463
464
return "\n".join(text)
465
466
def write_output(self, file_in: str, file_out: str, toc: bool):
467
title = os.path.basename(file_in)
468
469
if toc:
470
text = self.gen_toc()
471
else:
472
text = self.gen_output()
473
474
with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:
475
f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")
476
f.write(f"{title}\n")
477
f.write("=" * len(title) + "\n\n")
478
479
if not toc:
480
f.write(".. parsed-literal::\n\n")
481
482
f.write(text)
483
484