CoCalc -- numpydoc.py

GitHub Repository: singlestore-labs/singlestoredb-python
Path: blob/main/singlestoredb/docstring/numpydoc.py
⁴⁶⁹ views
1
"""Numpydoc-style docstring parsing.
2

3
:see: https://numpydoc.readthedocs.io/en/latest/format.html
4
"""
5
import inspect
6
import itertools
7
import re
8
import typing as T
9
from abc import abstractmethod
10
from textwrap import dedent
11

12
from .common import Docstring
13
from .common import DocstringDeprecated
14
from .common import DocstringExample
15
from .common import DocstringMeta
16
from .common import DocstringParam
17
from .common import DocstringRaises
18
from .common import DocstringReturns
19
from .common import DocstringStyle
20
from .common import RenderingStyle
21

22

23
def _pairwise(
24
    iterable: T.Iterable[T.Any],
25
    end: T.Optional[T.Any] = None,
26
) -> T.Iterable[T.Tuple[T.Any, T.Any]]:
27
    left, right = itertools.tee(iterable)
28
    next(right, None)
29
    return itertools.zip_longest(left, right, fillvalue=end)
30

31

32
def _clean_str(string: str) -> T.Optional[str]:
33
    string = string.strip()
34
    if len(string) > 0:
35
        return string
36
    return None
37

38

39
KV_REGEX = re.compile(r'^[^\s].*$', flags=re.M)
40
PARAM_KEY_REGEX = re.compile(r'^(?P<name>.*?)(?:\s*:\s*(?P<type>.*?))?$')
41
PARAM_OPTIONAL_REGEX = re.compile(r'(?P<type>.*?)(?:, optional|\(optional\))$')
42

43
# numpydoc format has no formal grammar for this,
44
# but we can make some educated guesses...
45
PARAM_DEFAULT_REGEX = re.compile(
46
    r'(?<!\S)[Dd]efault(?: is | = |: |s to |)\s*(?P<value>[\w\-\.]*\w)',
47
)
48

49
RETURN_KEY_REGEX = re.compile(r'^(?:(?P<name>.*?)\s*:\s*)?(?P<type>.*?)$')
50

51

52
class Section:
53
    """Numpydoc section parser.
54

55
    :param title: section title. For most sections, this is a heading like
56
                  "Parameters" which appears on its own line, underlined by
57
                  en-dashes ('-') on the following line.
58
    :param key: meta key string. In the parsed ``DocstringMeta`` instance this
59
                will be the first element of the ``args`` attribute list.
60
    """
61

62
    def __init__(self, title: str, key: str) -> None:
63
        self.title = title
64
        self.key = key
65

66
    @property
67
    def title_pattern(self) -> str:
68
        """Regular expression pattern matching this section's header.
69

70
        This pattern will match this instance's ``title`` attribute in
71
        an anonymous group.
72
        """
73
        dashes = '-' * len(self.title)
74
        return rf'^({self.title})\s*?\n{dashes}\s*$'
75

76
    def parse(self, text: str) -> T.Iterable[DocstringMeta]:
77
        """Parse ``DocstringMeta`` objects from the body of this section.
78

79
        :param text: section body text. Should be cleaned with
80
                     ``inspect.cleandoc`` before parsing.
81
        """
82
        yield DocstringMeta([self.key], description=_clean_str(text))
83

84

85
class _KVSection(Section):
86
    """Base parser for numpydoc sections with key-value syntax.
87

88
    E.g. sections that look like this:
89
        key
90
            value
91
        key2 : type
92
            values can also span...
93
            ... multiple lines
94
    """
95

96
    @abstractmethod
97
    def _parse_item(self, key: str, value: str) -> DocstringMeta:
98
        return DocstringMeta(args=[key], description=_clean_str(value))
99

100
    def parse(self, text: str) -> T.Iterable[DocstringMeta]:
101
        for match, next_match in _pairwise(KV_REGEX.finditer(text)):
102
            start = match.end()
103
            end = next_match.start() if next_match is not None else None
104
            value = text[start:end]
105
            yield self._parse_item(
106
                key=match.group(), value=inspect.cleandoc(value),
107
            )
108

109

110
class _SphinxSection(Section):
111
    """Base parser for numpydoc sections with sphinx-style syntax.
112

113
    E.g. sections that look like this:
114
        .. title:: something
115
            possibly over multiple lines
116
    """
117

118
    @property
119
    def title_pattern(self) -> str:
120
        return rf'^\.\.\s*({self.title})\s*::'
121

122

123
class ParamSection(_KVSection):
124
    """Parser for numpydoc parameter sections.
125

126
    E.g. any section that looks like this:
127
        arg_name
128
            arg_description
129
        arg_2 : type, optional
130
            descriptions can also span...
131
            ... multiple lines
132
    """
133

134
    def _parse_item(self, key: str, value: str) -> DocstringParam:
135
        match = PARAM_KEY_REGEX.match(key)
136
        arg_name = type_name = is_optional = None
137
        if match is not None:
138
            arg_name = match.group('name')
139
            type_name = match.group('type')
140
            if type_name is not None:
141
                optional_match = PARAM_OPTIONAL_REGEX.match(type_name)
142
                if optional_match is not None:
143
                    type_name = optional_match.group('type')
144
                    is_optional = True
145
                else:
146
                    is_optional = False
147

148
        default = None
149
        if len(value) > 0:
150
            default_match = PARAM_DEFAULT_REGEX.search(value)
151
            if default_match is not None:
152
                default = default_match.group('value')
153

154
        return DocstringParam(
155
            args=[self.key, str(arg_name)],
156
            description=_clean_str(value),
157
            arg_name=str(arg_name),
158
            type_name=type_name,
159
            is_optional=is_optional,
160
            default=default,
161
        )
162

163

164
class RaisesSection(_KVSection):
165
    """Parser for numpydoc raises sections.
166

167
    E.g. any section that looks like this:
168
        ValueError
169
            A description of what might raise ValueError
170
    """
171

172
    def _parse_item(self, key: str, value: str) -> DocstringRaises:
173
        return DocstringRaises(
174
            args=[self.key, key],
175
            description=_clean_str(value),
176
            type_name=key if len(key) > 0 else None,
177
        )
178

179

180
class ReturnsSection(_KVSection):
181
    """Parser for numpydoc returns sections.
182

183
    E.g. any section that looks like this:
184
        return_name : type
185
            A description of this returned value
186
        another_type
187
            Return names are optional, types are required
188
    """
189

190
    is_generator = False
191

192
    def _parse_item(self, key: str, value: str) -> DocstringReturns:
193
        match = RETURN_KEY_REGEX.match(key)
194
        if match is not None:
195
            return_name = match.group('name')
196
            type_name = match.group('type')
197
        else:
198
            return_name = None
199
            type_name = None
200

201
        return DocstringReturns(
202
            args=[self.key],
203
            description=_clean_str(value),
204
            type_name=type_name,
205
            is_generator=self.is_generator,
206
            return_name=return_name,
207
        )
208

209

210
class YieldsSection(ReturnsSection):
211
    """Parser for numpydoc generator "yields" sections."""
212

213
    is_generator = True
214

215

216
class DeprecationSection(_SphinxSection):
217
    """Parser for numpydoc "deprecation warning" sections."""
218

219
    def parse(self, text: str) -> T.Iterable[DocstringDeprecated]:
220
        version, desc, *_ = text.split(sep='\n', maxsplit=1) + [None, None]
221

222
        if desc is not None:
223
            desc = _clean_str(inspect.cleandoc(desc))
224

225
        yield DocstringDeprecated(
226
            args=[self.key], description=desc, version=_clean_str(str(version)),
227
        )
228

229

230
class ExamplesSection(Section):
231
    """Parser for numpydoc examples sections.
232

233
    E.g. any section that looks like this:
234
        >>> import numpy.matlib
235
        >>> np.matlib.empty((2, 2))    # filled with random data
236
        matrix([[  6.76425276e-320,   9.79033856e-307], # random
237
                [  7.39337286e-309,   3.22135945e-309]])
238
        >>> np.matlib.empty((2, 2), dtype=int)
239
        matrix([[ 6600475,        0], # random
240
                [ 6586976, 22740995]])
241
    """
242

243
    def parse(self, text: str) -> T.Iterable[DocstringExample]:
244
        """Parse ``DocstringExample`` objects from the body of this section.
245

246
        :param text: section body text. Should be cleaned with
247
                     ``inspect.cleandoc`` before parsing.
248
        """
249
        lines = [x.rstrip() for x in dedent(text).strip().splitlines()]
250
        while lines:
251
            snippet_lines = []
252
            description_lines = []
253
            post_snippet_lines = []
254

255
            # Parse description of snippet
256
            while lines:
257
                if re.match(r'^(>>>|sql>) ', lines[0]):
258
                    break
259
                description_lines.append(lines.pop(0))
260

261
            # Parse code of snippet
262
            while lines:
263
                if not re.match(r'^(>>>|sql>|\.\.\.) ', lines[0]):
264
                    break
265
                snippet_lines.append(lines.pop(0))
266

267
            # Parse output of snippet
268
            while lines:
269
                # Bail out at blank lines
270
                if not lines[0]:
271
                    lines.pop(0)
272
                    break
273
                # Bail out if a new snippet is started
274
                elif re.match(r'^(>>>|sql>) ', lines[0]):
275
                    break
276
                else:
277
                    snippet_lines.append(lines.pop(0))
278

279
            # if there is following text, but no more snippets,
280
            # make this a post description.
281
            if not [x for x in lines if re.match(r'^(>>>|sql>) ', x)]:
282
                post_snippet_lines.extend(lines)
283
                lines = []
284

285
            yield DocstringExample(
286
                [self.key],
287
                snippet='\n'.join(snippet_lines).strip() if snippet_lines else None,
288
                description='\n'.join(description_lines).strip(),
289
                post_snippet='\n'.join(post_snippet_lines).strip(),
290
            )
291

292

293
DEFAULT_SECTIONS = [
294
    ParamSection('Parameters', 'param'),
295
    ParamSection('Params', 'param'),
296
    ParamSection('Arguments', 'param'),
297
    ParamSection('Args', 'param'),
298
    ParamSection('Other Parameters', 'other_param'),
299
    ParamSection('Other Params', 'other_param'),
300
    ParamSection('Other Arguments', 'other_param'),
301
    ParamSection('Other Args', 'other_param'),
302
    ParamSection('Receives', 'receives'),
303
    ParamSection('Receive', 'receives'),
304
    RaisesSection('Raises', 'raises'),
305
    RaisesSection('Raise', 'raises'),
306
    RaisesSection('Warns', 'warns'),
307
    RaisesSection('Warn', 'warns'),
308
    ParamSection('Attributes', 'attribute'),
309
    ParamSection('Attribute', 'attribute'),
310
    ReturnsSection('Returns', 'returns'),
311
    ReturnsSection('Return', 'returns'),
312
    YieldsSection('Yields', 'yields'),
313
    YieldsSection('Yield', 'yields'),
314
    ExamplesSection('Examples', 'examples'),
315
    ExamplesSection('Example', 'examples'),
316
    Section('Warnings', 'warnings'),
317
    Section('Warning', 'warnings'),
318
    Section('See Also', 'see_also'),
319
    Section('Related', 'see_also'),
320
    Section('Notes', 'notes'),
321
    Section('Note', 'notes'),
322
    Section('References', 'references'),
323
    Section('Reference', 'references'),
324
    DeprecationSection('deprecated', 'deprecation'),
325
]
326

327

328
class NumpydocParser:
329
    """Parser for numpydoc-style docstrings."""
330

331
    def __init__(self, sections: T.Optional[T.List[Section]] = None):
332
        """Setup sections.
333

334
        :param sections: Recognized sections or None to defaults.
335
        """
336
        sects = sections or DEFAULT_SECTIONS
337
        self.sections = {s.title: s for s in sects}
338
        self._setup()
339

340
    def _setup(self) -> None:
341
        self.titles_re = re.compile(
342
            r'|'.join(s.title_pattern for s in self.sections.values()),
343
            flags=re.M,
344
        )
345

346
    def add_section(self, section: Section) -> None:
347
        """Add or replace a section.
348

349
        :param section: The new section.
350
        """
351

352
        self.sections[section.title] = section
353
        self._setup()
354

355
    def parse(self, text: T.Optional[str]) -> Docstring:
356
        """Parse the numpy-style docstring into its components.
357

358
        :returns: parsed docstring
359
        """
360
        ret = Docstring(style=DocstringStyle.NUMPYDOC)
361
        if not text:
362
            return ret
363

364
        # Clean according to PEP-0257
365
        text = inspect.cleandoc(text)
366

367
        # Find first title and split on its position
368
        match = self.titles_re.search(text)
369
        if match:
370
            desc_chunk = text[:match.start()]
371
            meta_chunk = text[match.start():]
372
        else:
373
            desc_chunk = text
374
            meta_chunk = ''
375

376
        # Break description into short and long parts
377
        parts = desc_chunk.split('\n', 1)
378
        ret.short_description = parts[0] or None
379
        if len(parts) > 1:
380
            long_desc_chunk = parts[1] or ''
381
            ret.blank_after_short_description = long_desc_chunk.startswith(
382
                '\n',
383
            )
384
            ret.blank_after_long_description = long_desc_chunk.endswith('\n\n')
385
            ret.long_description = long_desc_chunk.strip() or None
386

387
        for match, nextmatch in _pairwise(self.titles_re.finditer(meta_chunk)):
388
            if not match:
389
                raise ValueError(
390
                    'No section title found in docstring: %s' % meta_chunk,
391
                )
392
            title = next(g for g in match.groups() if g is not None)
393
            factory = self.sections[title]
394

395
            # section chunk starts after the header,
396
            # ends at the start of the next header
397
            start = match.end()
398
            end = nextmatch.start() if nextmatch is not None else None
399
            ret.meta.extend(factory.parse(meta_chunk[start:end]))
400

401
        return ret
402

403

404
def parse(text: T.Optional[str]) -> Docstring:
405
    """Parse the numpy-style docstring into its components.
406

407
    :returns: parsed docstring
408
    """
409
    return NumpydocParser().parse(text)
410

411

412
def compose(
413
    # pylint: disable=W0613
414
    docstring: Docstring,
415
    rendering_style: RenderingStyle = RenderingStyle.COMPACT,
416
    indent: str = '    ',
417
) -> str:
418
    """Render a parsed docstring into docstring text.
419

420
    :param docstring: parsed docstring representation
421
    :param rendering_style: the style to render docstrings
422
    :param indent: the characters used as indentation in the docstring string
423
    :returns: docstring text
424
    """
425

426
    def process_one(
427
        one: T.Union[DocstringParam, DocstringReturns, DocstringRaises],
428
    ) -> None:
429
        head: T.Optional[str] = None
430
        if isinstance(one, DocstringParam):
431
            head = one.arg_name
432
        elif isinstance(one, DocstringReturns):
433
            head = one.return_name
434

435
        if one.type_name and head:
436
            head += f' : {one.type_name}'
437
        elif one.type_name:
438
            head = one.type_name
439
        elif not head:
440
            head = ''
441

442
        if isinstance(one, DocstringParam) and one.is_optional:
443
            head += ', optional'
444

445
        if one.description:
446
            body = f'\n{indent}'.join([head] + one.description.splitlines())
447
            parts.append(body)
448
        else:
449
            parts.append(head)
450

451
    def process_sect(name: str, args: T.List[T.Any]) -> None:
452
        if args:
453
            parts.append('')
454
            parts.append(name)
455
            parts.append('-' * len(parts[-1]))
456
            for arg in args:
457
                process_one(arg)
458

459
    parts: T.List[str] = []
460
    if docstring.short_description:
461
        parts.append(docstring.short_description)
462
    if docstring.blank_after_short_description:
463
        parts.append('')
464

465
    if docstring.deprecation:
466
        first = '.. deprecated::'
467
        if docstring.deprecation.version:
468
            first += f' {docstring.deprecation.version}'
469
        if docstring.deprecation.description:
470
            rest = docstring.deprecation.description.splitlines()
471
        else:
472
            rest = []
473
        sep = f'\n{indent}'
474
        parts.append(sep.join([first] + rest))
475

476
    if docstring.long_description:
477
        parts.append(docstring.long_description)
478
    if docstring.blank_after_long_description:
479
        parts.append('')
480

481
    process_sect(
482
        'Parameters',
483
        [item for item in docstring.params or [] if item.args[0] == 'param'],
484
    )
485

486
    process_sect(
487
        'Attributes',
488
        [
489
            item
490
            for item in docstring.params or []
491
            if item.args[0] == 'attribute'
492
        ],
493
    )
494

495
    process_sect(
496
        'Returns',
497
        [
498
            item
499
            for item in docstring.many_returns or []
500
            if not item.is_generator
501
        ],
502
    )
503

504
    process_sect(
505
        'Yields',
506
        [item for item in docstring.many_returns or [] if item.is_generator],
507
    )
508

509
    if docstring.returns and not docstring.many_returns:
510
        ret = docstring.returns
511
        parts.append('Yields' if ret else 'Returns')
512
        parts.append('-' * len(parts[-1]))
513
        process_one(ret)
514

515
    process_sect(
516
        'Receives',
517
        [
518
            item
519
            for item in docstring.params or []
520
            if item.args[0] == 'receives'
521
        ],
522
    )
523

524
    process_sect(
525
        'Other Parameters',
526
        [
527
            item
528
            for item in docstring.params or []
529
            if item.args[0] == 'other_param'
530
        ],
531
    )
532

533
    process_sect(
534
        'Raises',
535
        [item for item in docstring.raises or [] if item.args[0] == 'raises'],
536
    )
537

538
    process_sect(
539
        'Warns',
540
        [item for item in docstring.raises or [] if item.args[0] == 'warns'],
541
    )
542

543
    for meta in docstring.meta:
544
        if isinstance(
545
            meta,
546
            (
547
                DocstringDeprecated,
548
                DocstringParam,
549
                DocstringReturns,
550
                DocstringRaises,
551
            ),
552
        ):
553
            continue  # Already handled
554

555
        parts.append('')
556
        parts.append(meta.args[0].replace('_', '').title())
557
        parts.append('-' * len(meta.args[0]))
558

559
        if meta.description:
560
            parts.append(meta.description)
561

562
    return '\n'.join(parts)
563

564
Product

Resources

Company