CoCalc -- abi_parser.py

GitHub Repository: torvalds/linux
Path: blob/master/tools/lib/python/abi/abi_parser.py
⁴⁹⁶⁸⁴ views
1
#!/usr/bin/env python3
2
# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
3
# Copyright(c) 2025: Mauro Carvalho Chehab <[email protected]>.
4
# SPDX-License-Identifier: GPL-2.0
5

6
"""
7
Parse ABI documentation and produce results from it.
8
"""
9

10
from argparse import Namespace
11
import logging
12
import os
13
import re
14

15
from pprint import pformat
16
from random import randrange, seed
17

18
# Import Python modules
19

20
from abi.helpers import AbiDebug, ABI_DIR
21

22

23
class AbiParser:
24
    """Main class to parse ABI files"""
25

26
    TAGS = r"(what|where|date|kernelversion|contact|description|users)"
27
    XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
28

29
    def __init__(self, directory, logger=None,
30
                 enable_lineno=False, show_warnings=True, debug=0):
31
        """Stores arguments for the class and initialize class vars"""
32

33
        self.directory = directory
34
        self.enable_lineno = enable_lineno
35
        self.show_warnings = show_warnings
36
        self.debug = debug
37

38
        if not logger:
39
            self.log = logging.getLogger("get_abi")
40
        else:
41
            self.log = logger
42

43
        self.data = {}
44
        self.what_symbols = {}
45
        self.file_refs = {}
46
        self.what_refs = {}
47

48
        # Ignore files that contain such suffixes
49
        self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")
50

51
        # Regular expressions used on parser
52
        self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
53
        self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
54
        self.re_valid = re.compile(self.TAGS)
55
        self.re_start_spc = re.compile(r"(\s*)(\S.*)")
56
        self.re_whitespace = re.compile(r"^\s+")
57

58
        # Regular used on print
59
        self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
60
        self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
61
        self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
62
        self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
63
        self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
64
        self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
65
        self.re_xref_node = re.compile(self.XREF)
66

67
    def warn(self, fdata, msg, extra=None):
68
        """Displays a parse error if warning is enabled"""
69

70
        if not self.show_warnings:
71
            return
72

73
        msg = f"{fdata.fname}:{fdata.ln}: {msg}"
74
        if extra:
75
            msg += "\n\t\t" + extra
76

77
        self.log.warning(msg)
78

79
    def add_symbol(self, what, fname, ln=None, xref=None):
80
        """Create a reference table describing where each 'what' is located"""
81

82
        if what not in self.what_symbols:
83
            self.what_symbols[what] = {"file": {}}
84

85
        if fname not in self.what_symbols[what]["file"]:
86
            self.what_symbols[what]["file"][fname] = []
87

88
        if ln and ln not in self.what_symbols[what]["file"][fname]:
89
            self.what_symbols[what]["file"][fname].append(ln)
90

91
        if xref:
92
            self.what_symbols[what]["xref"] = xref
93

94
    def _parse_line(self, fdata, line):
95
        """Parse a single line of an ABI file"""
96

97
        new_what = False
98
        new_tag = False
99
        content = None
100

101
        match = self.re_tag.match(line)
102
        if match:
103
            new = match.group(1).lower()
104
            sep = match.group(2)
105
            content = match.group(3)
106

107
            match = self.re_valid.search(new)
108
            if match:
109
                new_tag = match.group(1)
110
            else:
111
                if fdata.tag == "description":
112
                    # New "tag" is actually part of description.
113
                    # Don't consider it a tag
114
                    new_tag = False
115
                elif fdata.tag != "":
116
                    self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
117

118
        if new_tag:
119
            # "where" is Invalid, but was a common mistake. Warn if found
120
            if new_tag == "where":
121
                self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
122
                new_tag = "what"
123

124
            if new_tag == "what":
125
                fdata.space = None
126

127
                if content not in self.what_symbols:
128
                    self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
129

130
                if fdata.tag == "what":
131
                    fdata.what.append(content.strip("\n"))
132
                else:
133
                    if fdata.key:
134
                        if "description" not in self.data.get(fdata.key, {}):
135
                            self.warn(fdata, f"{fdata.key} doesn't have a description")
136

137
                        for w in fdata.what:
138
                            self.add_symbol(what=w, fname=fdata.fname,
139
                                            ln=fdata.what_ln, xref=fdata.key)
140

141
                    fdata.label = content
142
                    new_what = True
143

144
                    key = "abi_" + content.lower()
145
                    fdata.key = self.re_unprintable.sub("_", key).strip("_")
146

147
                    # Avoid duplicated keys but using a defined seed, to make
148
                    # the namespace identical if there aren't changes at the
149
                    # ABI symbols
150
                    seed(42)
151

152
                    while fdata.key in self.data:
153
                        char = randrange(0, 51) + ord("A")
154
                        if char > ord("Z"):
155
                            char += ord("a") - ord("Z") - 1
156

157
                        fdata.key += chr(char)
158

159
                    if fdata.key and fdata.key not in self.data:
160
                        self.data[fdata.key] = {
161
                            "what": [content],
162
                            "file": [fdata.file_ref],
163
                            "path": fdata.ftype,
164
                            "line_no": fdata.ln,
165
                        }
166

167
                    fdata.what = self.data[fdata.key]["what"]
168

169
                self.what_refs[content] = fdata.key
170
                fdata.tag = new_tag
171
                fdata.what_ln = fdata.ln
172

173
                if fdata.nametag["what"]:
174
                    t = (content, fdata.key)
175
                    if t not in fdata.nametag["symbols"]:
176
                        fdata.nametag["symbols"].append(t)
177

178
                return
179

180
            if fdata.tag and new_tag:
181
                fdata.tag = new_tag
182

183
                if new_what:
184
                    fdata.label = ""
185

186
                    if "description" in self.data[fdata.key]:
187
                        self.data[fdata.key]["description"] += "\n\n"
188

189
                    if fdata.file_ref not in self.data[fdata.key]["file"]:
190
                        self.data[fdata.key]["file"].append(fdata.file_ref)
191

192
                    if self.debug == AbiDebug.WHAT_PARSING:
193
                        self.log.debug("what: %s", fdata.what)
194

195
                if not fdata.what:
196
                    self.warn(fdata, "'What:' should come first:", line)
197
                    return
198

199
                if new_tag == "description":
200
                    fdata.space = None
201

202
                    if content:
203
                        sep = sep.replace(":", " ")
204

205
                        c = " " * len(new_tag) + sep + content
206
                        c = c.expandtabs()
207

208
                        match = self.re_start_spc.match(c)
209
                        if match:
210
                            # Preserve initial spaces for the first line
211
                            fdata.space = match.group(1)
212
                            content = match.group(2) + "\n"
213

214
                self.data[fdata.key][fdata.tag] = content
215

216
            return
217

218
        # Store any contents before tags at the database
219
        if not fdata.tag and "what" in fdata.nametag:
220
            fdata.nametag["description"] += line
221
            return
222

223
        if fdata.tag == "description":
224
            content = line.expandtabs()
225

226
            if self.re_whitespace.sub("", content) == "":
227
                self.data[fdata.key][fdata.tag] += "\n"
228
                return
229

230
            if fdata.space is None:
231
                match = self.re_start_spc.match(content)
232
                if match:
233
                    # Preserve initial spaces for the first line
234
                    fdata.space = match.group(1)
235

236
                    content = match.group(2) + "\n"
237
            else:
238
                if content.startswith(fdata.space):
239
                    content = content[len(fdata.space):]
240

241
                else:
242
                    fdata.space = ""
243

244
            if fdata.tag == "what":
245
                w = content.strip("\n")
246
                if w:
247
                    self.data[fdata.key][fdata.tag].append(w)
248
            else:
249
                self.data[fdata.key][fdata.tag] += content
250
            return
251

252
        content = line.strip()
253
        if fdata.tag:
254
            if fdata.tag == "what":
255
                w = content.strip("\n")
256
                if w:
257
                    self.data[fdata.key][fdata.tag].append(w)
258
            else:
259
                self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
260
            return
261

262
        # Everything else is error
263
        if content:
264
            self.warn(fdata, "Unexpected content", line)
265

266
    def parse_readme(self, nametag, fname):
267
        """Parse ABI README file"""
268

269
        nametag["what"] = ["Introduction"]
270
        nametag["path"] = "README"
271
        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
272
            for line in fp:
273
                match = self.re_tag.match(line)
274
                if match:
275
                    new = match.group(1).lower()
276

277
                    match = self.re_valid.search(new)
278
                    if match:
279
                        nametag["description"] += "\n:" + line
280
                        continue
281

282
                nametag["description"] += line
283

284
    def parse_file(self, fname, path, basename):
285
        """Parse a single file"""
286

287
        ref = f"abi_file_{path}_{basename}"
288
        ref = self.re_unprintable.sub("_", ref).strip("_")
289

290
        # Store per-file state into a namespace variable. This will be used
291
        # by the per-line parser state machine and by the warning function.
292
        fdata = Namespace
293

294
        fdata.fname = fname
295
        fdata.name = basename
296

297
        pos = fname.find(ABI_DIR)
298
        if pos > 0:
299
            f = fname[pos:]
300
        else:
301
            f = fname
302

303
        fdata.file_ref = (f, ref)
304
        self.file_refs[f] = ref
305

306
        fdata.ln = 0
307
        fdata.what_ln = 0
308
        fdata.tag = ""
309
        fdata.label = ""
310
        fdata.what = []
311
        fdata.key = None
312
        fdata.xrefs = None
313
        fdata.space = None
314
        fdata.ftype = path.split("/")[0]
315

316
        fdata.nametag = {}
317
        fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
318
        fdata.nametag["type"] = "File"
319
        fdata.nametag["path"] = fdata.ftype
320
        fdata.nametag["file"] = [fdata.file_ref]
321
        fdata.nametag["line_no"] = 1
322
        fdata.nametag["description"] = ""
323
        fdata.nametag["symbols"] = []
324

325
        self.data[ref] = fdata.nametag
326

327
        if self.debug & AbiDebug.WHAT_OPEN:
328
            self.log.debug("Opening file %s", fname)
329

330
        if basename == "README":
331
            self.parse_readme(fdata.nametag, fname)
332
            return
333

334
        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
335
            for line in fp:
336
                fdata.ln += 1
337

338
                self._parse_line(fdata, line)
339

340
            if "description" in fdata.nametag:
341
                fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
342

343
            if fdata.key:
344
                if "description" not in self.data.get(fdata.key, {}):
345
                    self.warn(fdata, f"{fdata.key} doesn't have a description")
346

347
                for w in fdata.what:
348
                    self.add_symbol(what=w, fname=fname, xref=fdata.key)
349

350
    def _parse_abi(self, root=None):
351
        """Internal function to parse documentation ABI recursively"""
352

353
        if not root:
354
            root = self.directory
355

356
        with os.scandir(root) as obj:
357
            for entry in obj:
358
                name = os.path.join(root, entry.name)
359

360
                if entry.is_dir():
361
                    self._parse_abi(name)
362
                    continue
363

364
                if not entry.is_file():
365
                    continue
366

367
                basename = os.path.basename(name)
368

369
                if basename.startswith("."):
370
                    continue
371

372
                if basename.endswith(self.ignore_suffixes):
373
                    continue
374

375
                path = self.re_abi_dir.sub("", os.path.dirname(name))
376

377
                self.parse_file(name, path, basename)
378

379
    def parse_abi(self, root=None):
380
        """Parse documentation ABI"""
381

382
        self._parse_abi(root)
383

384
        if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
385
            self.log.debug(pformat(self.data))
386

387
    def desc_txt(self, desc):
388
        """Print description as found inside ABI files"""
389

390
        desc = desc.strip(" \t\n")
391

392
        return desc + "\n\n"
393

394
    def xref(self, fname):
395
        """
396
        Converts a Documentation/ABI + basename into a ReST cross-reference
397
        """
398

399
        xref = self.file_refs.get(fname)
400
        if not xref:
401
            return None
402
        else:
403
            return xref
404

405
    def desc_rst(self, desc):
406
        """Enrich ReST output by creating cross-references"""
407

408
        # Remove title markups from the description
409
        # Having titles inside ABI files will only work if extra
410
        # care would be taken in order to strictly follow the same
411
        # level order for each markup.
412
        desc = self.re_title_mark.sub("\n\n", "\n" + desc)
413
        desc = desc.rstrip(" \t\n").lstrip("\n")
414

415
        # Python's regex performance for non-compiled expressions is a lot
416
        # than Perl, as Perl automatically caches them at their
417
        # first usage. Here, we'll need to do the same, as otherwise the
418
        # performance penalty is be high
419

420
        new_desc = ""
421
        for d in desc.split("\n"):
422
            if d == "":
423
                new_desc += "\n"
424
                continue
425

426
            # Use cross-references for doc files where needed
427
            d = self.re_doc.sub(r":doc:`/\1`", d)
428

429
            # Use cross-references for ABI generated docs where needed
430
            matches = self.re_abi.findall(d)
431
            for m in matches:
432
                abi = m[0] + m[1]
433

434
                xref = self.file_refs.get(abi)
435
                if not xref:
436
                    # This may happen if ABI is on a separate directory,
437
                    # like parsing ABI testing and symbol is at stable.
438
                    # The proper solution is to move this part of the code
439
                    # for it to be inside sphinx/kernel_abi.py
440
                    self.log.info("Didn't find ABI reference for '%s'", abi)
441
                else:
442
                    new = self.re_escape.sub(r"\\\1", m[1])
443
                    d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
444

445
            # Seek for cross reference symbols like /sys/...
446
            # Need to be careful to avoid doing it on a code block
447
            if d[0] not in [" ", "\t"]:
448
                matches = self.re_xref_node.findall(d)
449
                for m in matches:
450
                    # Finding ABI here is more complex due to wildcards
451
                    xref = self.what_refs.get(m)
452
                    if xref:
453
                        new = self.re_escape.sub(r"\\\1", m)
454
                        d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
455

456
            new_desc += d + "\n"
457

458
        return new_desc + "\n\n"
459

460
    def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
461
            filter_path=None):
462
        """Print ABI at stdout"""
463

464
        part = None
465
        for key, v in sorted(self.data.items(),
466
                             key=lambda x: (x[1].get("type", ""),
467
                                            x[1].get("what"))):
468

469
            wtype = v.get("type", "Symbol")
470
            file_ref = v.get("file")
471
            names = v.get("what", [""])
472

473
            if wtype == "File":
474
                if not show_file:
475
                    continue
476
            else:
477
                if not show_symbols:
478
                    continue
479

480
            if filter_path:
481
                if v.get("path") != filter_path:
482
                    continue
483

484
            msg = ""
485

486
            if wtype != "File":
487
                cur_part = names[0]
488
                if cur_part.find("/") >= 0:
489
                    match = self.re_what.match(cur_part)
490
                    if match:
491
                        symbol = match.group(1).rstrip("/")
492
                        cur_part = "Symbols under " + symbol
493

494
                if cur_part and cur_part != part:
495
                    part = cur_part
496
                    msg += part + "\n"+ "-" * len(part) +"\n\n"
497

498
                msg += f".. _{key}:\n\n"
499

500
                max_len = 0
501
                for i in range(0, len(names)):           # pylint: disable=C0200
502
                    names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
503

504
                    max_len = max(max_len, len(names[i]))
505

506
                msg += "+-" + "-" * max_len + "-+\n"
507
                for name in names:
508
                    msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
509
                    msg += "+-" + "-" * max_len + "-+\n"
510
                msg += "\n"
511

512
            for ref in file_ref:
513
                if wtype == "File":
514
                    msg += f".. _{ref[1]}:\n\n"
515
                else:
516
                    base = os.path.basename(ref[0])
517
                    msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"
518

519
            if wtype == "File":
520
                msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"
521

522
            desc = v.get("description")
523
            if not desc and wtype != "File":
524
                msg += f"DESCRIPTION MISSING for {names[0]}\n\n"
525

526
            if desc:
527
                if output_in_txt:
528
                    msg += self.desc_txt(desc)
529
                else:
530
                    msg += self.desc_rst(desc)
531

532
            symbols = v.get("symbols")
533
            if symbols:
534
                msg += "Has the following ABI:\n\n"
535

536
                for w, label in symbols:
537
                    # Escape special chars from content
538
                    content = self.re_escape.sub(r"\\\1", w)
539

540
                    msg += f"- :ref:`{content} <{label}>`\n\n"
541

542
            users = v.get("users")
543
            if users and users.strip(" \t\n"):
544
                users = users.strip("\n").replace('\n', '\n\t')
545
                msg += f"Users:\n\t{users}\n\n"
546

547
            ln = v.get("line_no", 1)
548

549
            yield (msg, file_ref[0][0], ln)
550

551
    def check_issues(self):
552
        """Warn about duplicated ABI entries"""
553

554
        for what, v in self.what_symbols.items():
555
            files = v.get("file")
556
            if not files:
557
                # Should never happen if the parser works properly
558
                self.log.warning("%s doesn't have a file associated", what)
559
                continue
560

561
            if len(files) == 1:
562
                continue
563

564
            f = []
565
            for fname, lines in sorted(files.items()):
566
                if not lines:
567
                    f.append(f"{fname}")
568
                elif len(lines) == 1:
569
                    f.append(f"{fname}:{lines[0]}")
570
                else:
571
                    m = fname + "lines "
572
                    m += ", ".join(str(x) for x in lines)
573
                    f.append(m)
574

575
            self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
576

577
    def search_symbols(self, expr):
578
        """ Searches for ABI symbols """
579

580
        regex = re.compile(expr, re.I)
581

582
        found_keys = 0
583
        for t in sorted(self.data.items(), key=lambda x: [0]):
584
            v = t[1]
585

586
            wtype = v.get("type", "")
587
            if wtype == "File":
588
                continue
589

590
            for what in v.get("what", [""]):
591
                if regex.search(what):
592
                    found_keys += 1
593

594
                    kernelversion = v.get("kernelversion", "").strip(" \t\n")
595
                    date = v.get("date", "").strip(" \t\n")
596
                    contact = v.get("contact", "").strip(" \t\n")
597
                    users = v.get("users", "").strip(" \t\n")
598
                    desc = v.get("description", "").strip(" \t\n")
599

600
                    files = []
601
                    for f in v.get("file", ()):
602
                        files.append(f[0])
603

604
                    what = str(found_keys) + ". " + what
605
                    title_tag = "-" * len(what)
606

607
                    print(f"\n{what}\n{title_tag}\n")
608

609
                    if kernelversion:
610
                        print(f"Kernel version:\t\t{kernelversion}")
611

612
                    if date:
613
                        print(f"Date:\t\t\t{date}")
614

615
                    if contact:
616
                        print(f"Contact:\t\t{contact}")
617

618
                    if users:
619
                        print(f"Users:\t\t\t{users}")
620

621
                    print("Defined on file(s):\t" + ", ".join(files))
622

623
                    if desc:
624
                        desc = desc.strip("\n")
625
                        print(f"\n{desc}\n")
626

627
        if not found_keys:
628
            print(f"Regular expression /{expr}/ not found.")
629

630
Product

Resources

Company