Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/lib/python/kdoc/kdoc_re.py
38186 views
1
#!/usr/bin/env python3
2
# SPDX-License-Identifier: GPL-2.0
3
# Copyright(c) 2025: Mauro Carvalho Chehab <[email protected]>.
4
5
"""
6
Regular expression ancillary classes.
7
8
Those help caching regular expressions and do matching for kernel-doc.
9
"""
10
11
import re
12
13
# Local cache for regular expressions
14
re_cache = {}
15
16
17
class KernRe:
18
"""
19
Helper class to simplify regex declaration and usage.
20
21
It calls re.compile for a given pattern. It also allows adding
22
regular expressions and define sub at class init time.
23
24
Regular expressions can be cached via an argument, helping to speedup
25
searches.
26
"""
27
28
def _add_regex(self, string, flags):
29
"""
30
Adds a new regex or reuses it from the cache.
31
"""
32
self.regex = re_cache.get(string, None)
33
if not self.regex:
34
self.regex = re.compile(string, flags=flags)
35
if self.cache:
36
re_cache[string] = self.regex
37
38
def __init__(self, string, cache=True, flags=0):
39
"""
40
Compile a regular expression and initialize internal vars.
41
"""
42
43
self.cache = cache
44
self.last_match = None
45
46
self._add_regex(string, flags)
47
48
def __str__(self):
49
"""
50
Return the regular expression pattern.
51
"""
52
return self.regex.pattern
53
54
def __add__(self, other):
55
"""
56
Allows adding two regular expressions into one.
57
"""
58
59
return KernRe(str(self) + str(other), cache=self.cache or other.cache,
60
flags=self.regex.flags | other.regex.flags)
61
62
def match(self, string):
63
"""
64
Handles a re.match storing its results
65
"""
66
67
self.last_match = self.regex.match(string)
68
return self.last_match
69
70
def search(self, string):
71
"""
72
Handles a re.search storing its results
73
"""
74
75
self.last_match = self.regex.search(string)
76
return self.last_match
77
78
def findall(self, string):
79
"""
80
Alias to re.findall
81
"""
82
83
return self.regex.findall(string)
84
85
def split(self, string):
86
"""
87
Alias to re.split
88
"""
89
90
return self.regex.split(string)
91
92
def sub(self, sub, string, count=0):
93
"""
94
Alias to re.sub
95
"""
96
97
return self.regex.sub(sub, string, count=count)
98
99
def group(self, num):
100
"""
101
Returns the group results of the last match
102
"""
103
104
return self.last_match.group(num)
105
106
107
class NestedMatch:
108
"""
109
Finding nested delimiters is hard with regular expressions. It is
110
even harder on Python with its normal re module, as there are several
111
advanced regular expressions that are missing.
112
113
This is the case of this pattern:
114
115
'\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
116
117
which is used to properly match open/close parentheses of the
118
string search STRUCT_GROUP(),
119
120
Add a class that counts pairs of delimiters, using it to match and
121
replace nested expressions.
122
123
The original approach was suggested by:
124
https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
125
126
Although I re-implemented it to make it more generic and match 3 types
127
of delimiters. The logic checks if delimiters are paired. If not, it
128
will ignore the search string.
129
"""
130
131
# TODO: make NestedMatch handle multiple match groups
132
#
133
# Right now, regular expressions to match it are defined only up to
134
# the start delimiter, e.g.:
135
#
136
# \bSTRUCT_GROUP\(
137
#
138
# is similar to: STRUCT_GROUP\((.*)\)
139
# except that the content inside the match group is delimiter-aligned.
140
#
141
# The content inside parentheses is converted into a single replace
142
# group (e.g. r`\1').
143
#
144
# It would be nice to change such definition to support multiple
145
# match groups, allowing a regex equivalent to:
146
#
147
# FOO\((.*), (.*), (.*)\)
148
#
149
# it is probably easier to define it not as a regular expression, but
150
# with some lexical definition like:
151
#
152
# FOO(arg1, arg2, arg3)
153
154
DELIMITER_PAIRS = {
155
'{': '}',
156
'(': ')',
157
'[': ']',
158
}
159
160
RE_DELIM = re.compile(r'[\{\}\[\]\(\)]')
161
162
def _search(self, regex, line):
163
"""
164
Finds paired blocks for a regex that ends with a delimiter.
165
166
The suggestion of using finditer to match pairs came from:
167
https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
168
but I ended using a different implementation to align all three types
169
of delimiters and seek for an initial regular expression.
170
171
The algorithm seeks for open/close paired delimiters and places them
172
into a stack, yielding a start/stop position of each match when the
173
stack is zeroed.
174
175
The algorithm should work fine for properly paired lines, but will
176
silently ignore end delimiters that precede a start delimiter.
177
This should be OK for kernel-doc parser, as unaligned delimiters
178
would cause compilation errors. So, we don't need to raise exceptions
179
to cover such issues.
180
"""
181
182
stack = []
183
184
for match_re in regex.finditer(line):
185
start = match_re.start()
186
offset = match_re.end()
187
188
d = line[offset - 1]
189
if d not in self.DELIMITER_PAIRS:
190
continue
191
192
end = self.DELIMITER_PAIRS[d]
193
stack.append(end)
194
195
for match in self.RE_DELIM.finditer(line[offset:]):
196
pos = match.start() + offset
197
198
d = line[pos]
199
200
if d in self.DELIMITER_PAIRS:
201
end = self.DELIMITER_PAIRS[d]
202
203
stack.append(end)
204
continue
205
206
# Does the end delimiter match what is expected?
207
if stack and d == stack[-1]:
208
stack.pop()
209
210
if not stack:
211
yield start, offset, pos + 1
212
break
213
214
def search(self, regex, line):
215
"""
216
This is similar to re.search:
217
218
It matches a regex that it is followed by a delimiter,
219
returning occurrences only if all delimiters are paired.
220
"""
221
222
for t in self._search(regex, line):
223
224
yield line[t[0]:t[2]]
225
226
def sub(self, regex, sub, line, count=0):
227
"""
228
This is similar to re.sub:
229
230
It matches a regex that it is followed by a delimiter,
231
replacing occurrences only if all delimiters are paired.
232
233
if r'\1' is used, it works just like re: it places there the
234
matched paired data with the delimiter stripped.
235
236
If count is different than zero, it will replace at most count
237
items.
238
"""
239
out = ""
240
241
cur_pos = 0
242
n = 0
243
244
for start, end, pos in self._search(regex, line):
245
out += line[cur_pos:start]
246
247
# Value, ignoring start/end delimiters
248
value = line[end:pos - 1]
249
250
# replaces \1 at the sub string, if \1 is used there
251
new_sub = sub
252
new_sub = new_sub.replace(r'\1', value)
253
254
out += new_sub
255
256
# Drop end ';' if any
257
if line[pos] == ';':
258
pos += 1
259
260
cur_pos = pos
261
n += 1
262
263
if count and count >= n:
264
break
265
266
# Append the remaining string
267
l = len(line)
268
out += line[cur_pos:l]
269
270
return out
271
272