Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
singlestore-labs
GitHub Repository: singlestore-labs/singlestoredb-python
Path: blob/main/singlestoredb/docstring/numpydoc.py
469 views
1
"""Numpydoc-style docstring parsing.
2
3
:see: https://numpydoc.readthedocs.io/en/latest/format.html
4
"""
5
import inspect
6
import itertools
7
import re
8
import typing as T
9
from abc import abstractmethod
10
from textwrap import dedent
11
12
from .common import Docstring
13
from .common import DocstringDeprecated
14
from .common import DocstringExample
15
from .common import DocstringMeta
16
from .common import DocstringParam
17
from .common import DocstringRaises
18
from .common import DocstringReturns
19
from .common import DocstringStyle
20
from .common import RenderingStyle
21
22
23
def _pairwise(
24
iterable: T.Iterable[T.Any],
25
end: T.Optional[T.Any] = None,
26
) -> T.Iterable[T.Tuple[T.Any, T.Any]]:
27
left, right = itertools.tee(iterable)
28
next(right, None)
29
return itertools.zip_longest(left, right, fillvalue=end)
30
31
32
def _clean_str(string: str) -> T.Optional[str]:
33
string = string.strip()
34
if len(string) > 0:
35
return string
36
return None
37
38
39
KV_REGEX = re.compile(r'^[^\s].*$', flags=re.M)
40
PARAM_KEY_REGEX = re.compile(r'^(?P<name>.*?)(?:\s*:\s*(?P<type>.*?))?$')
41
PARAM_OPTIONAL_REGEX = re.compile(r'(?P<type>.*?)(?:, optional|\(optional\))$')
42
43
# numpydoc format has no formal grammar for this,
44
# but we can make some educated guesses...
45
PARAM_DEFAULT_REGEX = re.compile(
46
r'(?<!\S)[Dd]efault(?: is | = |: |s to |)\s*(?P<value>[\w\-\.]*\w)',
47
)
48
49
RETURN_KEY_REGEX = re.compile(r'^(?:(?P<name>.*?)\s*:\s*)?(?P<type>.*?)$')
50
51
52
class Section:
53
"""Numpydoc section parser.
54
55
:param title: section title. For most sections, this is a heading like
56
"Parameters" which appears on its own line, underlined by
57
en-dashes ('-') on the following line.
58
:param key: meta key string. In the parsed ``DocstringMeta`` instance this
59
will be the first element of the ``args`` attribute list.
60
"""
61
62
def __init__(self, title: str, key: str) -> None:
63
self.title = title
64
self.key = key
65
66
@property
67
def title_pattern(self) -> str:
68
"""Regular expression pattern matching this section's header.
69
70
This pattern will match this instance's ``title`` attribute in
71
an anonymous group.
72
"""
73
dashes = '-' * len(self.title)
74
return rf'^({self.title})\s*?\n{dashes}\s*$'
75
76
def parse(self, text: str) -> T.Iterable[DocstringMeta]:
77
"""Parse ``DocstringMeta`` objects from the body of this section.
78
79
:param text: section body text. Should be cleaned with
80
``inspect.cleandoc`` before parsing.
81
"""
82
yield DocstringMeta([self.key], description=_clean_str(text))
83
84
85
class _KVSection(Section):
86
"""Base parser for numpydoc sections with key-value syntax.
87
88
E.g. sections that look like this:
89
key
90
value
91
key2 : type
92
values can also span...
93
... multiple lines
94
"""
95
96
@abstractmethod
97
def _parse_item(self, key: str, value: str) -> DocstringMeta:
98
return DocstringMeta(args=[key], description=_clean_str(value))
99
100
def parse(self, text: str) -> T.Iterable[DocstringMeta]:
101
for match, next_match in _pairwise(KV_REGEX.finditer(text)):
102
start = match.end()
103
end = next_match.start() if next_match is not None else None
104
value = text[start:end]
105
yield self._parse_item(
106
key=match.group(), value=inspect.cleandoc(value),
107
)
108
109
110
class _SphinxSection(Section):
111
"""Base parser for numpydoc sections with sphinx-style syntax.
112
113
E.g. sections that look like this:
114
.. title:: something
115
possibly over multiple lines
116
"""
117
118
@property
119
def title_pattern(self) -> str:
120
return rf'^\.\.\s*({self.title})\s*::'
121
122
123
class ParamSection(_KVSection):
124
"""Parser for numpydoc parameter sections.
125
126
E.g. any section that looks like this:
127
arg_name
128
arg_description
129
arg_2 : type, optional
130
descriptions can also span...
131
... multiple lines
132
"""
133
134
def _parse_item(self, key: str, value: str) -> DocstringParam:
135
match = PARAM_KEY_REGEX.match(key)
136
arg_name = type_name = is_optional = None
137
if match is not None:
138
arg_name = match.group('name')
139
type_name = match.group('type')
140
if type_name is not None:
141
optional_match = PARAM_OPTIONAL_REGEX.match(type_name)
142
if optional_match is not None:
143
type_name = optional_match.group('type')
144
is_optional = True
145
else:
146
is_optional = False
147
148
default = None
149
if len(value) > 0:
150
default_match = PARAM_DEFAULT_REGEX.search(value)
151
if default_match is not None:
152
default = default_match.group('value')
153
154
return DocstringParam(
155
args=[self.key, str(arg_name)],
156
description=_clean_str(value),
157
arg_name=str(arg_name),
158
type_name=type_name,
159
is_optional=is_optional,
160
default=default,
161
)
162
163
164
class RaisesSection(_KVSection):
165
"""Parser for numpydoc raises sections.
166
167
E.g. any section that looks like this:
168
ValueError
169
A description of what might raise ValueError
170
"""
171
172
def _parse_item(self, key: str, value: str) -> DocstringRaises:
173
return DocstringRaises(
174
args=[self.key, key],
175
description=_clean_str(value),
176
type_name=key if len(key) > 0 else None,
177
)
178
179
180
class ReturnsSection(_KVSection):
181
"""Parser for numpydoc returns sections.
182
183
E.g. any section that looks like this:
184
return_name : type
185
A description of this returned value
186
another_type
187
Return names are optional, types are required
188
"""
189
190
is_generator = False
191
192
def _parse_item(self, key: str, value: str) -> DocstringReturns:
193
match = RETURN_KEY_REGEX.match(key)
194
if match is not None:
195
return_name = match.group('name')
196
type_name = match.group('type')
197
else:
198
return_name = None
199
type_name = None
200
201
return DocstringReturns(
202
args=[self.key],
203
description=_clean_str(value),
204
type_name=type_name,
205
is_generator=self.is_generator,
206
return_name=return_name,
207
)
208
209
210
class YieldsSection(ReturnsSection):
211
"""Parser for numpydoc generator "yields" sections."""
212
213
is_generator = True
214
215
216
class DeprecationSection(_SphinxSection):
217
"""Parser for numpydoc "deprecation warning" sections."""
218
219
def parse(self, text: str) -> T.Iterable[DocstringDeprecated]:
220
version, desc, *_ = text.split(sep='\n', maxsplit=1) + [None, None]
221
222
if desc is not None:
223
desc = _clean_str(inspect.cleandoc(desc))
224
225
yield DocstringDeprecated(
226
args=[self.key], description=desc, version=_clean_str(str(version)),
227
)
228
229
230
class ExamplesSection(Section):
231
"""Parser for numpydoc examples sections.
232
233
E.g. any section that looks like this:
234
>>> import numpy.matlib
235
>>> np.matlib.empty((2, 2)) # filled with random data
236
matrix([[ 6.76425276e-320, 9.79033856e-307], # random
237
[ 7.39337286e-309, 3.22135945e-309]])
238
>>> np.matlib.empty((2, 2), dtype=int)
239
matrix([[ 6600475, 0], # random
240
[ 6586976, 22740995]])
241
"""
242
243
def parse(self, text: str) -> T.Iterable[DocstringExample]:
244
"""Parse ``DocstringExample`` objects from the body of this section.
245
246
:param text: section body text. Should be cleaned with
247
``inspect.cleandoc`` before parsing.
248
"""
249
lines = [x.rstrip() for x in dedent(text).strip().splitlines()]
250
while lines:
251
snippet_lines = []
252
description_lines = []
253
post_snippet_lines = []
254
255
# Parse description of snippet
256
while lines:
257
if re.match(r'^(>>>|sql>) ', lines[0]):
258
break
259
description_lines.append(lines.pop(0))
260
261
# Parse code of snippet
262
while lines:
263
if not re.match(r'^(>>>|sql>|\.\.\.) ', lines[0]):
264
break
265
snippet_lines.append(lines.pop(0))
266
267
# Parse output of snippet
268
while lines:
269
# Bail out at blank lines
270
if not lines[0]:
271
lines.pop(0)
272
break
273
# Bail out if a new snippet is started
274
elif re.match(r'^(>>>|sql>) ', lines[0]):
275
break
276
else:
277
snippet_lines.append(lines.pop(0))
278
279
# if there is following text, but no more snippets,
280
# make this a post description.
281
if not [x for x in lines if re.match(r'^(>>>|sql>) ', x)]:
282
post_snippet_lines.extend(lines)
283
lines = []
284
285
yield DocstringExample(
286
[self.key],
287
snippet='\n'.join(snippet_lines).strip() if snippet_lines else None,
288
description='\n'.join(description_lines).strip(),
289
post_snippet='\n'.join(post_snippet_lines).strip(),
290
)
291
292
293
DEFAULT_SECTIONS = [
294
ParamSection('Parameters', 'param'),
295
ParamSection('Params', 'param'),
296
ParamSection('Arguments', 'param'),
297
ParamSection('Args', 'param'),
298
ParamSection('Other Parameters', 'other_param'),
299
ParamSection('Other Params', 'other_param'),
300
ParamSection('Other Arguments', 'other_param'),
301
ParamSection('Other Args', 'other_param'),
302
ParamSection('Receives', 'receives'),
303
ParamSection('Receive', 'receives'),
304
RaisesSection('Raises', 'raises'),
305
RaisesSection('Raise', 'raises'),
306
RaisesSection('Warns', 'warns'),
307
RaisesSection('Warn', 'warns'),
308
ParamSection('Attributes', 'attribute'),
309
ParamSection('Attribute', 'attribute'),
310
ReturnsSection('Returns', 'returns'),
311
ReturnsSection('Return', 'returns'),
312
YieldsSection('Yields', 'yields'),
313
YieldsSection('Yield', 'yields'),
314
ExamplesSection('Examples', 'examples'),
315
ExamplesSection('Example', 'examples'),
316
Section('Warnings', 'warnings'),
317
Section('Warning', 'warnings'),
318
Section('See Also', 'see_also'),
319
Section('Related', 'see_also'),
320
Section('Notes', 'notes'),
321
Section('Note', 'notes'),
322
Section('References', 'references'),
323
Section('Reference', 'references'),
324
DeprecationSection('deprecated', 'deprecation'),
325
]
326
327
328
class NumpydocParser:
329
"""Parser for numpydoc-style docstrings."""
330
331
def __init__(self, sections: T.Optional[T.List[Section]] = None):
332
"""Setup sections.
333
334
:param sections: Recognized sections or None to defaults.
335
"""
336
sects = sections or DEFAULT_SECTIONS
337
self.sections = {s.title: s for s in sects}
338
self._setup()
339
340
def _setup(self) -> None:
341
self.titles_re = re.compile(
342
r'|'.join(s.title_pattern for s in self.sections.values()),
343
flags=re.M,
344
)
345
346
def add_section(self, section: Section) -> None:
347
"""Add or replace a section.
348
349
:param section: The new section.
350
"""
351
352
self.sections[section.title] = section
353
self._setup()
354
355
def parse(self, text: T.Optional[str]) -> Docstring:
356
"""Parse the numpy-style docstring into its components.
357
358
:returns: parsed docstring
359
"""
360
ret = Docstring(style=DocstringStyle.NUMPYDOC)
361
if not text:
362
return ret
363
364
# Clean according to PEP-0257
365
text = inspect.cleandoc(text)
366
367
# Find first title and split on its position
368
match = self.titles_re.search(text)
369
if match:
370
desc_chunk = text[:match.start()]
371
meta_chunk = text[match.start():]
372
else:
373
desc_chunk = text
374
meta_chunk = ''
375
376
# Break description into short and long parts
377
parts = desc_chunk.split('\n', 1)
378
ret.short_description = parts[0] or None
379
if len(parts) > 1:
380
long_desc_chunk = parts[1] or ''
381
ret.blank_after_short_description = long_desc_chunk.startswith(
382
'\n',
383
)
384
ret.blank_after_long_description = long_desc_chunk.endswith('\n\n')
385
ret.long_description = long_desc_chunk.strip() or None
386
387
for match, nextmatch in _pairwise(self.titles_re.finditer(meta_chunk)):
388
if not match:
389
raise ValueError(
390
'No section title found in docstring: %s' % meta_chunk,
391
)
392
title = next(g for g in match.groups() if g is not None)
393
factory = self.sections[title]
394
395
# section chunk starts after the header,
396
# ends at the start of the next header
397
start = match.end()
398
end = nextmatch.start() if nextmatch is not None else None
399
ret.meta.extend(factory.parse(meta_chunk[start:end]))
400
401
return ret
402
403
404
def parse(text: T.Optional[str]) -> Docstring:
405
"""Parse the numpy-style docstring into its components.
406
407
:returns: parsed docstring
408
"""
409
return NumpydocParser().parse(text)
410
411
412
def compose(
413
# pylint: disable=W0613
414
docstring: Docstring,
415
rendering_style: RenderingStyle = RenderingStyle.COMPACT,
416
indent: str = ' ',
417
) -> str:
418
"""Render a parsed docstring into docstring text.
419
420
:param docstring: parsed docstring representation
421
:param rendering_style: the style to render docstrings
422
:param indent: the characters used as indentation in the docstring string
423
:returns: docstring text
424
"""
425
426
def process_one(
427
one: T.Union[DocstringParam, DocstringReturns, DocstringRaises],
428
) -> None:
429
head: T.Optional[str] = None
430
if isinstance(one, DocstringParam):
431
head = one.arg_name
432
elif isinstance(one, DocstringReturns):
433
head = one.return_name
434
435
if one.type_name and head:
436
head += f' : {one.type_name}'
437
elif one.type_name:
438
head = one.type_name
439
elif not head:
440
head = ''
441
442
if isinstance(one, DocstringParam) and one.is_optional:
443
head += ', optional'
444
445
if one.description:
446
body = f'\n{indent}'.join([head] + one.description.splitlines())
447
parts.append(body)
448
else:
449
parts.append(head)
450
451
def process_sect(name: str, args: T.List[T.Any]) -> None:
452
if args:
453
parts.append('')
454
parts.append(name)
455
parts.append('-' * len(parts[-1]))
456
for arg in args:
457
process_one(arg)
458
459
parts: T.List[str] = []
460
if docstring.short_description:
461
parts.append(docstring.short_description)
462
if docstring.blank_after_short_description:
463
parts.append('')
464
465
if docstring.deprecation:
466
first = '.. deprecated::'
467
if docstring.deprecation.version:
468
first += f' {docstring.deprecation.version}'
469
if docstring.deprecation.description:
470
rest = docstring.deprecation.description.splitlines()
471
else:
472
rest = []
473
sep = f'\n{indent}'
474
parts.append(sep.join([first] + rest))
475
476
if docstring.long_description:
477
parts.append(docstring.long_description)
478
if docstring.blank_after_long_description:
479
parts.append('')
480
481
process_sect(
482
'Parameters',
483
[item for item in docstring.params or [] if item.args[0] == 'param'],
484
)
485
486
process_sect(
487
'Attributes',
488
[
489
item
490
for item in docstring.params or []
491
if item.args[0] == 'attribute'
492
],
493
)
494
495
process_sect(
496
'Returns',
497
[
498
item
499
for item in docstring.many_returns or []
500
if not item.is_generator
501
],
502
)
503
504
process_sect(
505
'Yields',
506
[item for item in docstring.many_returns or [] if item.is_generator],
507
)
508
509
if docstring.returns and not docstring.many_returns:
510
ret = docstring.returns
511
parts.append('Yields' if ret else 'Returns')
512
parts.append('-' * len(parts[-1]))
513
process_one(ret)
514
515
process_sect(
516
'Receives',
517
[
518
item
519
for item in docstring.params or []
520
if item.args[0] == 'receives'
521
],
522
)
523
524
process_sect(
525
'Other Parameters',
526
[
527
item
528
for item in docstring.params or []
529
if item.args[0] == 'other_param'
530
],
531
)
532
533
process_sect(
534
'Raises',
535
[item for item in docstring.raises or [] if item.args[0] == 'raises'],
536
)
537
538
process_sect(
539
'Warns',
540
[item for item in docstring.raises or [] if item.args[0] == 'warns'],
541
)
542
543
for meta in docstring.meta:
544
if isinstance(
545
meta,
546
(
547
DocstringDeprecated,
548
DocstringParam,
549
DocstringReturns,
550
DocstringRaises,
551
),
552
):
553
continue # Already handled
554
555
parts.append('')
556
parts.append(meta.args[0].replace('_', '').title())
557
parts.append('-' * len(meta.args[0]))
558
559
if meta.description:
560
parts.append(meta.description)
561
562
return '\n'.join(parts)
563
564