Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/lib/python/abi/abi_regex.py
38186 views
1
#!/usr/bin/env python3
2
# xxpylint: disable=R0903
3
# Copyright(c) 2025: Mauro Carvalho Chehab <[email protected]>.
4
# SPDX-License-Identifier: GPL-2.0
5
6
"""
7
Convert ABI what into regular expressions
8
"""
9
10
import re
11
import sys
12
13
from pprint import pformat
14
15
from abi.abi_parser import AbiParser
16
from abi.helpers import AbiDebug
17
18
class AbiRegex(AbiParser):
19
"""Extends AbiParser to search ABI nodes with regular expressions"""
20
21
# Escape only ASCII visible characters
22
escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
23
leave_others = "others"
24
25
# Tuples with regular expressions to be compiled and replacement data
26
re_whats = [
27
# Drop escape characters that might exist
28
(re.compile("\\\\"), ""),
29
30
# Temporarily escape dot characters
31
(re.compile(r"\."), "\xf6"),
32
33
# Temporarily change [0-9]+ type of patterns
34
(re.compile(r"\[0\-9\]\+"), "\xff"),
35
36
# Temporarily change [\d+-\d+] type of patterns
37
(re.compile(r"\[0\-\d+\]"), "\xff"),
38
(re.compile(r"\[0:\d+\]"), "\xff"),
39
(re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"),
40
41
# Temporarily change [0-9] type of patterns
42
(re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"),
43
44
# Handle multiple option patterns
45
(re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
46
47
# Handle wildcards
48
(re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
49
(re.compile(r"/\*/"), "/.*/"),
50
(re.compile(r"/\xf6\xf6\xf6"), "/.*"),
51
(re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
52
(re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
53
(re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
54
55
(re.compile(r"XX+"), "\\\\w\xf7"),
56
(re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
57
(re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
58
(re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
59
60
# Recover [0-9] type of patterns
61
(re.compile(r"\xf4"), "["),
62
(re.compile(r"\xf5"), "]"),
63
64
# Remove duplicated spaces
65
(re.compile(r"\s+"), r" "),
66
67
# Special case: drop comparison as in:
68
# What: foo = <something>
69
# (this happens on a few IIO definitions)
70
(re.compile(r"\s*\=.*$"), ""),
71
72
# Escape all other symbols
73
(re.compile(escape_symbols), r"\\\1"),
74
(re.compile(r"\\\\"), r"\\"),
75
(re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
76
(re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
77
78
(re.compile(r"\xff"), r"\\d+"),
79
80
# Special case: IIO ABI which a parenthesis.
81
(re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
82
83
# Simplify regexes with multiple .*
84
(re.compile(r"(?:\.\*){2,}"), ""),
85
86
# Recover dot characters
87
(re.compile(r"\xf6"), "\\."),
88
# Recover plus characters
89
(re.compile(r"\xf7"), "+"),
90
]
91
re_has_num = re.compile(r"\\d")
92
93
# Symbol name after escape_chars that are considered a devnode basename
94
re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$")
95
96
# List of popular group names to be skipped to minimize regex group size
97
# Use AbiDebug.SUBGROUP_SIZE to detect those
98
skip_names = set(["devices", "hwmon"])
99
100
def regex_append(self, what, new):
101
"""
102
Get a search group for a subset of regular expressions.
103
104
As ABI may have thousands of symbols, using a for to search all
105
regular expressions is at least O(n^2). When there are wildcards,
106
the complexity increases substantially, eventually becoming exponential.
107
108
To avoid spending too much time on them, use a logic to split
109
them into groups. The smaller the group, the better, as it would
110
mean that searches will be confined to a small number of regular
111
expressions.
112
113
The conversion to a regex subset is tricky, as we need something
114
that can be easily obtained from the sysfs symbol and from the
115
regular expression. So, we need to discard nodes that have
116
wildcards.
117
118
If it can't obtain a subgroup, place the regular expression inside
119
a special group (self.leave_others).
120
"""
121
122
search_group = None
123
124
for search_group in reversed(new.split("/")):
125
if not search_group or search_group in self.skip_names:
126
continue
127
if self.re_symbol_name.match(search_group):
128
break
129
130
if not search_group:
131
search_group = self.leave_others
132
133
if self.debug & AbiDebug.SUBGROUP_MAP:
134
self.log.debug("%s: mapped as %s", what, search_group)
135
136
try:
137
if search_group not in self.regex_group:
138
self.regex_group[search_group] = []
139
140
self.regex_group[search_group].append(re.compile(new))
141
if self.search_string:
142
if what.find(self.search_string) >= 0:
143
print(f"What: {what}")
144
except re.PatternError:
145
self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
146
" '%s'", what, new)
147
148
def get_regexes(self, what):
149
"""
150
Given an ABI devnode, return a list of all regular expressions that
151
may match it, based on the sub-groups created by regex_append()
152
"""
153
154
re_list = []
155
156
patches = what.split("/")
157
patches.reverse()
158
patches.append(self.leave_others)
159
160
for search_group in patches:
161
if search_group in self.regex_group:
162
re_list += self.regex_group[search_group]
163
164
return re_list
165
166
def __init__(self, *args, **kwargs):
167
"""
168
Override init method to get verbose argument
169
"""
170
171
self.regex_group = None
172
self.search_string = None
173
self.re_string = None
174
175
if "search_string" in kwargs:
176
self.search_string = kwargs.get("search_string")
177
del kwargs["search_string"]
178
179
if self.search_string:
180
181
try:
182
self.re_string = re.compile(self.search_string)
183
except re.PatternError as e:
184
msg = f"{self.search_string} is not a valid regular expression"
185
raise ValueError(msg) from e
186
187
super().__init__(*args, **kwargs)
188
189
def parse_abi(self, *args, **kwargs):
190
191
super().parse_abi(*args, **kwargs)
192
193
self.regex_group = {}
194
195
print("Converting ABI What fields into regexes...", file=sys.stderr)
196
197
for t in sorted(self.data.items(), key=lambda x: x[0]):
198
v = t[1]
199
if v.get("type") == "File":
200
continue
201
202
v["regex"] = []
203
204
for what in v.get("what", []):
205
if not what.startswith("/sys"):
206
continue
207
208
new = what
209
for r, s in self.re_whats:
210
try:
211
new = r.sub(s, new)
212
except re.PatternError as e:
213
# Help debugging troubles with new regexes
214
raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
215
216
v["regex"].append(new)
217
218
if self.debug & AbiDebug.REGEX:
219
self.log.debug("%-90s <== %s", new, what)
220
221
# Store regex into a subgroup to speedup searches
222
self.regex_append(what, new)
223
224
if self.debug & AbiDebug.SUBGROUP_DICT:
225
self.log.debug("%s", pformat(self.regex_group))
226
227
if self.debug & AbiDebug.SUBGROUP_SIZE:
228
biggestd_keys = sorted(self.regex_group.keys(),
229
key= lambda k: len(self.regex_group[k]),
230
reverse=True)
231
232
print("Top regex subgroups:", file=sys.stderr)
233
for k in biggestd_keys[:10]:
234
print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)
235
236