Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/lib/python/abi/abi_parser.py
38186 views
1
#!/usr/bin/env python3
2
# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
3
# Copyright(c) 2025: Mauro Carvalho Chehab <[email protected]>.
4
# SPDX-License-Identifier: GPL-2.0
5
6
"""
7
Parse ABI documentation and produce results from it.
8
"""
9
10
from argparse import Namespace
11
import logging
12
import os
13
import re
14
15
from pprint import pformat
16
from random import randrange, seed
17
18
# Import Python modules
19
20
from abi.helpers import AbiDebug, ABI_DIR
21
22
23
class AbiParser:
24
"""Main class to parse ABI files"""
25
26
TAGS = r"(what|where|date|kernelversion|contact|description|users)"
27
XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
28
29
def __init__(self, directory, logger=None,
30
enable_lineno=False, show_warnings=True, debug=0):
31
"""Stores arguments for the class and initialize class vars"""
32
33
self.directory = directory
34
self.enable_lineno = enable_lineno
35
self.show_warnings = show_warnings
36
self.debug = debug
37
38
if not logger:
39
self.log = logging.getLogger("get_abi")
40
else:
41
self.log = logger
42
43
self.data = {}
44
self.what_symbols = {}
45
self.file_refs = {}
46
self.what_refs = {}
47
48
# Ignore files that contain such suffixes
49
self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")
50
51
# Regular expressions used on parser
52
self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
53
self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
54
self.re_valid = re.compile(self.TAGS)
55
self.re_start_spc = re.compile(r"(\s*)(\S.*)")
56
self.re_whitespace = re.compile(r"^\s+")
57
58
# Regular used on print
59
self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
60
self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
61
self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
62
self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
63
self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
64
self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
65
self.re_xref_node = re.compile(self.XREF)
66
67
def warn(self, fdata, msg, extra=None):
68
"""Displays a parse error if warning is enabled"""
69
70
if not self.show_warnings:
71
return
72
73
msg = f"{fdata.fname}:{fdata.ln}: {msg}"
74
if extra:
75
msg += "\n\t\t" + extra
76
77
self.log.warning(msg)
78
79
def add_symbol(self, what, fname, ln=None, xref=None):
80
"""Create a reference table describing where each 'what' is located"""
81
82
if what not in self.what_symbols:
83
self.what_symbols[what] = {"file": {}}
84
85
if fname not in self.what_symbols[what]["file"]:
86
self.what_symbols[what]["file"][fname] = []
87
88
if ln and ln not in self.what_symbols[what]["file"][fname]:
89
self.what_symbols[what]["file"][fname].append(ln)
90
91
if xref:
92
self.what_symbols[what]["xref"] = xref
93
94
def _parse_line(self, fdata, line):
95
"""Parse a single line of an ABI file"""
96
97
new_what = False
98
new_tag = False
99
content = None
100
101
match = self.re_tag.match(line)
102
if match:
103
new = match.group(1).lower()
104
sep = match.group(2)
105
content = match.group(3)
106
107
match = self.re_valid.search(new)
108
if match:
109
new_tag = match.group(1)
110
else:
111
if fdata.tag == "description":
112
# New "tag" is actually part of description.
113
# Don't consider it a tag
114
new_tag = False
115
elif fdata.tag != "":
116
self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
117
118
if new_tag:
119
# "where" is Invalid, but was a common mistake. Warn if found
120
if new_tag == "where":
121
self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
122
new_tag = "what"
123
124
if new_tag == "what":
125
fdata.space = None
126
127
if content not in self.what_symbols:
128
self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
129
130
if fdata.tag == "what":
131
fdata.what.append(content.strip("\n"))
132
else:
133
if fdata.key:
134
if "description" not in self.data.get(fdata.key, {}):
135
self.warn(fdata, f"{fdata.key} doesn't have a description")
136
137
for w in fdata.what:
138
self.add_symbol(what=w, fname=fdata.fname,
139
ln=fdata.what_ln, xref=fdata.key)
140
141
fdata.label = content
142
new_what = True
143
144
key = "abi_" + content.lower()
145
fdata.key = self.re_unprintable.sub("_", key).strip("_")
146
147
# Avoid duplicated keys but using a defined seed, to make
148
# the namespace identical if there aren't changes at the
149
# ABI symbols
150
seed(42)
151
152
while fdata.key in self.data:
153
char = randrange(0, 51) + ord("A")
154
if char > ord("Z"):
155
char += ord("a") - ord("Z") - 1
156
157
fdata.key += chr(char)
158
159
if fdata.key and fdata.key not in self.data:
160
self.data[fdata.key] = {
161
"what": [content],
162
"file": [fdata.file_ref],
163
"path": fdata.ftype,
164
"line_no": fdata.ln,
165
}
166
167
fdata.what = self.data[fdata.key]["what"]
168
169
self.what_refs[content] = fdata.key
170
fdata.tag = new_tag
171
fdata.what_ln = fdata.ln
172
173
if fdata.nametag["what"]:
174
t = (content, fdata.key)
175
if t not in fdata.nametag["symbols"]:
176
fdata.nametag["symbols"].append(t)
177
178
return
179
180
if fdata.tag and new_tag:
181
fdata.tag = new_tag
182
183
if new_what:
184
fdata.label = ""
185
186
if "description" in self.data[fdata.key]:
187
self.data[fdata.key]["description"] += "\n\n"
188
189
if fdata.file_ref not in self.data[fdata.key]["file"]:
190
self.data[fdata.key]["file"].append(fdata.file_ref)
191
192
if self.debug == AbiDebug.WHAT_PARSING:
193
self.log.debug("what: %s", fdata.what)
194
195
if not fdata.what:
196
self.warn(fdata, "'What:' should come first:", line)
197
return
198
199
if new_tag == "description":
200
fdata.space = None
201
202
if content:
203
sep = sep.replace(":", " ")
204
205
c = " " * len(new_tag) + sep + content
206
c = c.expandtabs()
207
208
match = self.re_start_spc.match(c)
209
if match:
210
# Preserve initial spaces for the first line
211
fdata.space = match.group(1)
212
content = match.group(2) + "\n"
213
214
self.data[fdata.key][fdata.tag] = content
215
216
return
217
218
# Store any contents before tags at the database
219
if not fdata.tag and "what" in fdata.nametag:
220
fdata.nametag["description"] += line
221
return
222
223
if fdata.tag == "description":
224
content = line.expandtabs()
225
226
if self.re_whitespace.sub("", content) == "":
227
self.data[fdata.key][fdata.tag] += "\n"
228
return
229
230
if fdata.space is None:
231
match = self.re_start_spc.match(content)
232
if match:
233
# Preserve initial spaces for the first line
234
fdata.space = match.group(1)
235
236
content = match.group(2) + "\n"
237
else:
238
if content.startswith(fdata.space):
239
content = content[len(fdata.space):]
240
241
else:
242
fdata.space = ""
243
244
if fdata.tag == "what":
245
w = content.strip("\n")
246
if w:
247
self.data[fdata.key][fdata.tag].append(w)
248
else:
249
self.data[fdata.key][fdata.tag] += content
250
return
251
252
content = line.strip()
253
if fdata.tag:
254
if fdata.tag == "what":
255
w = content.strip("\n")
256
if w:
257
self.data[fdata.key][fdata.tag].append(w)
258
else:
259
self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
260
return
261
262
# Everything else is error
263
if content:
264
self.warn(fdata, "Unexpected content", line)
265
266
def parse_readme(self, nametag, fname):
267
"""Parse ABI README file"""
268
269
nametag["what"] = ["Introduction"]
270
nametag["path"] = "README"
271
with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
272
for line in fp:
273
match = self.re_tag.match(line)
274
if match:
275
new = match.group(1).lower()
276
277
match = self.re_valid.search(new)
278
if match:
279
nametag["description"] += "\n:" + line
280
continue
281
282
nametag["description"] += line
283
284
def parse_file(self, fname, path, basename):
285
"""Parse a single file"""
286
287
ref = f"abi_file_{path}_{basename}"
288
ref = self.re_unprintable.sub("_", ref).strip("_")
289
290
# Store per-file state into a namespace variable. This will be used
291
# by the per-line parser state machine and by the warning function.
292
fdata = Namespace
293
294
fdata.fname = fname
295
fdata.name = basename
296
297
pos = fname.find(ABI_DIR)
298
if pos > 0:
299
f = fname[pos:]
300
else:
301
f = fname
302
303
fdata.file_ref = (f, ref)
304
self.file_refs[f] = ref
305
306
fdata.ln = 0
307
fdata.what_ln = 0
308
fdata.tag = ""
309
fdata.label = ""
310
fdata.what = []
311
fdata.key = None
312
fdata.xrefs = None
313
fdata.space = None
314
fdata.ftype = path.split("/")[0]
315
316
fdata.nametag = {}
317
fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
318
fdata.nametag["type"] = "File"
319
fdata.nametag["path"] = fdata.ftype
320
fdata.nametag["file"] = [fdata.file_ref]
321
fdata.nametag["line_no"] = 1
322
fdata.nametag["description"] = ""
323
fdata.nametag["symbols"] = []
324
325
self.data[ref] = fdata.nametag
326
327
if self.debug & AbiDebug.WHAT_OPEN:
328
self.log.debug("Opening file %s", fname)
329
330
if basename == "README":
331
self.parse_readme(fdata.nametag, fname)
332
return
333
334
with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
335
for line in fp:
336
fdata.ln += 1
337
338
self._parse_line(fdata, line)
339
340
if "description" in fdata.nametag:
341
fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
342
343
if fdata.key:
344
if "description" not in self.data.get(fdata.key, {}):
345
self.warn(fdata, f"{fdata.key} doesn't have a description")
346
347
for w in fdata.what:
348
self.add_symbol(what=w, fname=fname, xref=fdata.key)
349
350
def _parse_abi(self, root=None):
351
"""Internal function to parse documentation ABI recursively"""
352
353
if not root:
354
root = self.directory
355
356
with os.scandir(root) as obj:
357
for entry in obj:
358
name = os.path.join(root, entry.name)
359
360
if entry.is_dir():
361
self._parse_abi(name)
362
continue
363
364
if not entry.is_file():
365
continue
366
367
basename = os.path.basename(name)
368
369
if basename.startswith("."):
370
continue
371
372
if basename.endswith(self.ignore_suffixes):
373
continue
374
375
path = self.re_abi_dir.sub("", os.path.dirname(name))
376
377
self.parse_file(name, path, basename)
378
379
def parse_abi(self, root=None):
380
"""Parse documentation ABI"""
381
382
self._parse_abi(root)
383
384
if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
385
self.log.debug(pformat(self.data))
386
387
def desc_txt(self, desc):
388
"""Print description as found inside ABI files"""
389
390
desc = desc.strip(" \t\n")
391
392
return desc + "\n\n"
393
394
def xref(self, fname):
395
"""
396
Converts a Documentation/ABI + basename into a ReST cross-reference
397
"""
398
399
xref = self.file_refs.get(fname)
400
if not xref:
401
return None
402
else:
403
return xref
404
405
def desc_rst(self, desc):
406
"""Enrich ReST output by creating cross-references"""
407
408
# Remove title markups from the description
409
# Having titles inside ABI files will only work if extra
410
# care would be taken in order to strictly follow the same
411
# level order for each markup.
412
desc = self.re_title_mark.sub("\n\n", "\n" + desc)
413
desc = desc.rstrip(" \t\n").lstrip("\n")
414
415
# Python's regex performance for non-compiled expressions is a lot
416
# than Perl, as Perl automatically caches them at their
417
# first usage. Here, we'll need to do the same, as otherwise the
418
# performance penalty is be high
419
420
new_desc = ""
421
for d in desc.split("\n"):
422
if d == "":
423
new_desc += "\n"
424
continue
425
426
# Use cross-references for doc files where needed
427
d = self.re_doc.sub(r":doc:`/\1`", d)
428
429
# Use cross-references for ABI generated docs where needed
430
matches = self.re_abi.findall(d)
431
for m in matches:
432
abi = m[0] + m[1]
433
434
xref = self.file_refs.get(abi)
435
if not xref:
436
# This may happen if ABI is on a separate directory,
437
# like parsing ABI testing and symbol is at stable.
438
# The proper solution is to move this part of the code
439
# for it to be inside sphinx/kernel_abi.py
440
self.log.info("Didn't find ABI reference for '%s'", abi)
441
else:
442
new = self.re_escape.sub(r"\\\1", m[1])
443
d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
444
445
# Seek for cross reference symbols like /sys/...
446
# Need to be careful to avoid doing it on a code block
447
if d[0] not in [" ", "\t"]:
448
matches = self.re_xref_node.findall(d)
449
for m in matches:
450
# Finding ABI here is more complex due to wildcards
451
xref = self.what_refs.get(m)
452
if xref:
453
new = self.re_escape.sub(r"\\\1", m)
454
d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
455
456
new_desc += d + "\n"
457
458
return new_desc + "\n\n"
459
460
def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
461
filter_path=None):
462
"""Print ABI at stdout"""
463
464
part = None
465
for key, v in sorted(self.data.items(),
466
key=lambda x: (x[1].get("type", ""),
467
x[1].get("what"))):
468
469
wtype = v.get("type", "Symbol")
470
file_ref = v.get("file")
471
names = v.get("what", [""])
472
473
if wtype == "File":
474
if not show_file:
475
continue
476
else:
477
if not show_symbols:
478
continue
479
480
if filter_path:
481
if v.get("path") != filter_path:
482
continue
483
484
msg = ""
485
486
if wtype != "File":
487
cur_part = names[0]
488
if cur_part.find("/") >= 0:
489
match = self.re_what.match(cur_part)
490
if match:
491
symbol = match.group(1).rstrip("/")
492
cur_part = "Symbols under " + symbol
493
494
if cur_part and cur_part != part:
495
part = cur_part
496
msg += part + "\n"+ "-" * len(part) +"\n\n"
497
498
msg += f".. _{key}:\n\n"
499
500
max_len = 0
501
for i in range(0, len(names)): # pylint: disable=C0200
502
names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
503
504
max_len = max(max_len, len(names[i]))
505
506
msg += "+-" + "-" * max_len + "-+\n"
507
for name in names:
508
msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
509
msg += "+-" + "-" * max_len + "-+\n"
510
msg += "\n"
511
512
for ref in file_ref:
513
if wtype == "File":
514
msg += f".. _{ref[1]}:\n\n"
515
else:
516
base = os.path.basename(ref[0])
517
msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"
518
519
if wtype == "File":
520
msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"
521
522
desc = v.get("description")
523
if not desc and wtype != "File":
524
msg += f"DESCRIPTION MISSING for {names[0]}\n\n"
525
526
if desc:
527
if output_in_txt:
528
msg += self.desc_txt(desc)
529
else:
530
msg += self.desc_rst(desc)
531
532
symbols = v.get("symbols")
533
if symbols:
534
msg += "Has the following ABI:\n\n"
535
536
for w, label in symbols:
537
# Escape special chars from content
538
content = self.re_escape.sub(r"\\\1", w)
539
540
msg += f"- :ref:`{content} <{label}>`\n\n"
541
542
users = v.get("users")
543
if users and users.strip(" \t\n"):
544
users = users.strip("\n").replace('\n', '\n\t')
545
msg += f"Users:\n\t{users}\n\n"
546
547
ln = v.get("line_no", 1)
548
549
yield (msg, file_ref[0][0], ln)
550
551
def check_issues(self):
552
"""Warn about duplicated ABI entries"""
553
554
for what, v in self.what_symbols.items():
555
files = v.get("file")
556
if not files:
557
# Should never happen if the parser works properly
558
self.log.warning("%s doesn't have a file associated", what)
559
continue
560
561
if len(files) == 1:
562
continue
563
564
f = []
565
for fname, lines in sorted(files.items()):
566
if not lines:
567
f.append(f"{fname}")
568
elif len(lines) == 1:
569
f.append(f"{fname}:{lines[0]}")
570
else:
571
m = fname + "lines "
572
m += ", ".join(str(x) for x in lines)
573
f.append(m)
574
575
self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
576
577
def search_symbols(self, expr):
578
""" Searches for ABI symbols """
579
580
regex = re.compile(expr, re.I)
581
582
found_keys = 0
583
for t in sorted(self.data.items(), key=lambda x: [0]):
584
v = t[1]
585
586
wtype = v.get("type", "")
587
if wtype == "File":
588
continue
589
590
for what in v.get("what", [""]):
591
if regex.search(what):
592
found_keys += 1
593
594
kernelversion = v.get("kernelversion", "").strip(" \t\n")
595
date = v.get("date", "").strip(" \t\n")
596
contact = v.get("contact", "").strip(" \t\n")
597
users = v.get("users", "").strip(" \t\n")
598
desc = v.get("description", "").strip(" \t\n")
599
600
files = []
601
for f in v.get("file", ()):
602
files.append(f[0])
603
604
what = str(found_keys) + ". " + what
605
title_tag = "-" * len(what)
606
607
print(f"\n{what}\n{title_tag}\n")
608
609
if kernelversion:
610
print(f"Kernel version:\t\t{kernelversion}")
611
612
if date:
613
print(f"Date:\t\t\t{date}")
614
615
if contact:
616
print(f"Contact:\t\t{contact}")
617
618
if users:
619
print(f"Users:\t\t\t{users}")
620
621
print("Defined on file(s):\t" + ", ".join(files))
622
623
if desc:
624
desc = desc.strip("\n")
625
print(f"\n{desc}\n")
626
627
if not found_keys:
628
print(f"Regular expression /{expr}/ not found.")
629
630