CoCalc -- google.py

GitHub Repository: singlestore-labs/singlestoredb-python
Path: blob/main/singlestoredb/docstring/google.py
⁴⁶⁹ views
1
"""Google-style docstring parsing."""
2
import inspect
3
import re
4
import typing as T
5
from collections import namedtuple
6
from collections import OrderedDict
7
from enum import IntEnum
8

9
from .common import Docstring
10
from .common import DocstringExample
11
from .common import DocstringMeta
12
from .common import DocstringParam
13
from .common import DocstringRaises
14
from .common import DocstringReturns
15
from .common import DocstringStyle
16
from .common import EXAMPLES_KEYWORDS
17
from .common import PARAM_KEYWORDS
18
from .common import ParseError
19
from .common import RAISES_KEYWORDS
20
from .common import RenderingStyle
21
from .common import RETURNS_KEYWORDS
22
from .common import YIELDS_KEYWORDS
23

24

25
class SectionType(IntEnum):
26
    """Types of sections."""
27

28
    SINGULAR = 0
29
    """For sections like examples."""
30

31
    MULTIPLE = 1
32
    """For sections like params."""
33

34
    SINGULAR_OR_MULTIPLE = 2
35
    """For sections like returns or yields."""
36

37

38
class Section(namedtuple('SectionBase', 'title key type')):
39
    """A docstring section."""
40

41

42
GOOGLE_TYPED_ARG_REGEX = re.compile(r'\s*(.+?)\s*\(\s*(.*[^\s]+)\s*\)')
43
GOOGLE_ARG_DESC_REGEX = re.compile(r'.*\. Defaults to (.+)\.')
44
MULTIPLE_PATTERN = re.compile(r'(\s*[^:\s]+:)|([^:]*\]:.*)')
45

46
DEFAULT_SECTIONS = [
47
    Section('Arguments', 'param', SectionType.MULTIPLE),
48
    Section('Args', 'param', SectionType.MULTIPLE),
49
    Section('Parameters', 'param', SectionType.MULTIPLE),
50
    Section('Params', 'param', SectionType.MULTIPLE),
51
    Section('Raises', 'raises', SectionType.MULTIPLE),
52
    Section('Exceptions', 'raises', SectionType.MULTIPLE),
53
    Section('Except', 'raises', SectionType.MULTIPLE),
54
    Section('Attributes', 'attribute', SectionType.MULTIPLE),
55
    Section('Example', 'examples', SectionType.SINGULAR),
56
    Section('Examples', 'examples', SectionType.SINGULAR),
57
    Section('Returns', 'returns', SectionType.SINGULAR_OR_MULTIPLE),
58
    Section('Yields', 'yields', SectionType.SINGULAR_OR_MULTIPLE),
59
]
60

61

62
class GoogleParser:
63
    """Parser for Google-style docstrings."""
64

65
    def __init__(
66
        self, sections: T.Optional[T.List[Section]] = None, title_colon: bool = True,
67
    ):
68
        """Setup sections.
69

70
        :param sections: Recognized sections or None to defaults.
71
        :param title_colon: require colon after section title.
72
        """
73
        if not sections:
74
            sections = DEFAULT_SECTIONS
75
        self.sections = {s.title: s for s in sections}
76
        self.title_colon = title_colon
77
        self._setup()
78

79
    def _setup(self) -> None:
80
        if self.title_colon:
81
            colon = ':'
82
        else:
83
            colon = ''
84
        self.titles_re = re.compile(
85
            '^('
86
            + '|'.join(f'({t})' for t in self.sections)
87
            + ')'
88
            + colon
89
            + '[ \t\r\f\v]*$',
90
            flags=re.M,
91
        )
92

93
    def _build_meta(self, text: str, title: str) -> DocstringMeta:
94
        """Build docstring element.
95

96
        :param text: docstring element text
97
        :param title: title of section containing element
98
        :return:
99
        """
100

101
        section = self.sections[title]
102

103
        if (
104
            section.type == SectionType.SINGULAR_OR_MULTIPLE
105
            and not MULTIPLE_PATTERN.match(text)
106
        ) or section.type == SectionType.SINGULAR:
107
            return self._build_single_meta(section, text)
108

109
        if ':' not in text:
110
            raise ParseError(f'Expected a colon in {text!r}.')
111

112
        # Split spec and description
113
        before, desc = text.split(':', 1)
114

115
        if before and '\n' in before:
116
            # If there is a newline in the first line, clean it up
117
            first_line, rest = before.split('\n', 1)
118
            before = first_line + inspect.cleandoc(rest)
119

120
        if desc:
121
            desc = desc[1:] if desc[0] == ' ' else desc
122
            if '\n' in desc:
123
                first_line, rest = desc.split('\n', 1)
124
                desc = first_line + '\n' + inspect.cleandoc(rest)
125
            desc = desc.strip('\n')
126

127
        return self._build_multi_meta(section, before, desc)
128

129
    @staticmethod
130
    def _build_single_meta(section: Section, desc: str) -> DocstringMeta:
131
        if section.key in RETURNS_KEYWORDS | YIELDS_KEYWORDS:
132
            return DocstringReturns(
133
                args=[section.key],
134
                description=desc,
135
                type_name=None,
136
                is_generator=section.key in YIELDS_KEYWORDS,
137
            )
138
        if section.key in RAISES_KEYWORDS:
139
            return DocstringRaises(
140
                args=[section.key], description=desc, type_name=None,
141
            )
142
        if section.key in EXAMPLES_KEYWORDS:
143
            return DocstringExample(
144
                args=[section.key], snippet=None, description=desc,
145
            )
146
        if section.key in PARAM_KEYWORDS:
147
            raise ParseError('Expected paramenter name.')
148
        return DocstringMeta(args=[section.key], description=desc)
149

150
    @staticmethod
151
    def _build_multi_meta(
152
        section: Section, before: str, desc: str,
153
    ) -> DocstringMeta:
154
        if section.key in PARAM_KEYWORDS:
155
            match = GOOGLE_TYPED_ARG_REGEX.match(before)
156
            if match:
157
                arg_name, type_name = match.group(1, 2)
158
                if type_name.endswith(', optional'):
159
                    is_optional = True
160
                    type_name = type_name[:-10]
161
                elif type_name.endswith('?'):
162
                    is_optional = True
163
                    type_name = type_name[:-1]
164
                else:
165
                    is_optional = False
166
            else:
167
                arg_name, type_name = before, None
168
                is_optional = None
169

170
            match = GOOGLE_ARG_DESC_REGEX.match(desc)
171
            default = match.group(1) if match else None
172

173
            return DocstringParam(
174
                args=[section.key, before],
175
                description=desc,
176
                arg_name=arg_name,
177
                type_name=type_name,
178
                is_optional=is_optional,
179
                default=default,
180
            )
181
        if section.key in RETURNS_KEYWORDS | YIELDS_KEYWORDS:
182
            return DocstringReturns(
183
                args=[section.key, before],
184
                description=desc,
185
                type_name=before,
186
                is_generator=section.key in YIELDS_KEYWORDS,
187
            )
188
        if section.key in RAISES_KEYWORDS:
189
            return DocstringRaises(
190
                args=[section.key, before], description=desc, type_name=before,
191
            )
192
        return DocstringMeta(args=[section.key, before], description=desc)
193

194
    def add_section(self, section: Section) -> None:
195
        """Add or replace a section.
196

197
        :param section: The new section.
198
        """
199

200
        self.sections[section.title] = section
201
        self._setup()
202

203
    def parse(self, text: T.Optional[str]) -> Docstring:
204
        """Parse the Google-style docstring into its components.
205

206
        :returns: parsed docstring
207
        """
208
        ret = Docstring(style=DocstringStyle.GOOGLE)
209
        if not text:
210
            return ret
211

212
        # Clean according to PEP-0257
213
        text = inspect.cleandoc(text)
214

215
        # Find first title and split on its position
216
        match = self.titles_re.search(text)
217
        if match:
218
            desc_chunk = text[:match.start()]
219
            meta_chunk = text[match.start():]
220
        else:
221
            desc_chunk = text
222
            meta_chunk = ''
223

224
        # Break description into short and long parts
225
        parts = desc_chunk.split('\n', 1)
226
        ret.short_description = parts[0] or None
227
        if len(parts) > 1:
228
            long_desc_chunk = parts[1] or ''
229
            ret.blank_after_short_description = long_desc_chunk.startswith(
230
                '\n',
231
            )
232
            ret.blank_after_long_description = long_desc_chunk.endswith('\n\n')
233
            ret.long_description = long_desc_chunk.strip() or None
234

235
        # Split by sections determined by titles
236
        matches = list(self.titles_re.finditer(meta_chunk))
237
        if not matches:
238
            return ret
239
        splits = []
240
        for j in range(len(matches) - 1):
241
            splits.append((matches[j].end(), matches[j + 1].start()))
242
        splits.append((matches[-1].end(), len(meta_chunk)))
243

244
        chunks = OrderedDict()  # type: T.MutableMapping[str,str]
245
        for j, (start, end) in enumerate(splits):
246
            title = matches[j].group(1)
247
            if title not in self.sections:
248
                continue
249

250
            # Clear Any Unknown Meta
251
            # Ref: https://github.com/rr-/docstring_parser/issues/29
252
            meta_details = meta_chunk[start:end]
253
            unknown_meta = re.search(r'\n\S', meta_details)
254
            if unknown_meta is not None:
255
                meta_details = meta_details[: unknown_meta.start()]
256

257
            chunks[title] = meta_details.strip('\n')
258
        if not chunks:
259
            return ret
260

261
        # Add elements from each chunk
262
        for title, chunk in chunks.items():
263
            # Determine indent
264
            indent_match = re.search(r'^\s*', chunk)
265
            if not indent_match:
266
                raise ParseError(f'Can\'t infer indent from "{chunk}"')
267
            indent = indent_match.group()
268

269
            # Check for singular elements
270
            if self.sections[title].type in [
271
                SectionType.SINGULAR,
272
                SectionType.SINGULAR_OR_MULTIPLE,
273
            ]:
274
                part = inspect.cleandoc(chunk)
275
                ret.meta.append(self._build_meta(part, title))
276
                continue
277

278
            # Split based on lines which have exactly that indent
279
            _re = '^' + indent + r'(?=\S)'
280
            c_matches = list(re.finditer(_re, chunk, flags=re.M))
281
            if not c_matches:
282
                raise ParseError(f'No specification for "{title}": "{chunk}"')
283
            c_splits = []
284
            for j in range(len(c_matches) - 1):
285
                c_splits.append((c_matches[j].end(), c_matches[j + 1].start()))
286
            c_splits.append((c_matches[-1].end(), len(chunk)))
287
            for j, (start, end) in enumerate(c_splits):
288
                part = chunk[start:end].strip('\n')
289
                ret.meta.append(self._build_meta(part, title))
290

291
        return ret
292

293

294
def parse(text: T.Optional[str]) -> Docstring:
295
    """Parse the Google-style docstring into its components.
296

297
    :returns: parsed docstring
298
    """
299
    return GoogleParser().parse(text)
300

301

302
def compose(
303
    docstring: Docstring,
304
    rendering_style: RenderingStyle = RenderingStyle.COMPACT,
305
    indent: str = '    ',
306
) -> str:
307
    """Render a parsed docstring into docstring text.
308

309
    :param docstring: parsed docstring representation
310
    :param rendering_style: the style to render docstrings
311
    :param indent: the characters used as indentation in the docstring string
312
    :returns: docstring text
313
    """
314

315
    def process_one(
316
        one: T.Union[DocstringParam, DocstringReturns, DocstringRaises],
317
    ) -> None:
318
        head = ''
319

320
        if isinstance(one, DocstringParam):
321
            head += one.arg_name or ''
322
        elif isinstance(one, DocstringReturns):
323
            head += one.return_name or ''
324

325
        if isinstance(one, DocstringParam) and one.is_optional:
326
            optional = (
327
                '?'
328
                if rendering_style == RenderingStyle.COMPACT
329
                else ', optional'
330
            )
331
        else:
332
            optional = ''
333

334
        if one.type_name and head:
335
            head += f' ({one.type_name}{optional}):'
336
        elif one.type_name:
337
            head += f'{one.type_name}{optional}:'
338
        else:
339
            head += ':'
340
        head = indent + head
341

342
        if one.description and rendering_style == RenderingStyle.EXPANDED:
343
            body = f'\n{indent}{indent}'.join(
344
                [head] + one.description.splitlines(),
345
            )
346
            parts.append(body)
347
        elif one.description:
348
            (first, *rest) = one.description.splitlines()
349
            body = f'\n{indent}{indent}'.join([head + ' ' + first] + rest)
350
            parts.append(body)
351
        else:
352
            parts.append(head)
353

354
    def process_sect(name: str, args: T.List[T.Any]) -> None:
355
        if args:
356
            parts.append(name)
357
            for arg in args:
358
                process_one(arg)
359
            parts.append('')
360

361
    parts: T.List[str] = []
362
    if docstring.short_description:
363
        parts.append(docstring.short_description)
364
    if docstring.blank_after_short_description:
365
        parts.append('')
366

367
    if docstring.long_description:
368
        parts.append(docstring.long_description)
369
    if docstring.blank_after_long_description:
370
        parts.append('')
371

372
    process_sect(
373
        'Args:', [p for p in docstring.params or [] if p.args[0] == 'param'],
374
    )
375

376
    process_sect(
377
        'Attributes:',
378
        [p for p in docstring.params or [] if p.args[0] == 'attribute'],
379
    )
380

381
    process_sect(
382
        'Returns:',
383
        [p for p in docstring.many_returns or [] if not p.is_generator],
384
    )
385

386
    process_sect(
387
        'Yields:', [p for p in docstring.many_returns or [] if p.is_generator],
388
    )
389

390
    process_sect('Raises:', docstring.raises or [])
391

392
    if docstring.returns and not docstring.many_returns:
393
        ret = docstring.returns
394
        parts.append('Yields:' if ret else 'Returns:')
395
        parts.append('-' * len(parts[-1]))
396
        process_one(ret)
397

398
    for meta in docstring.meta:
399
        if isinstance(
400
            meta, (DocstringParam, DocstringReturns, DocstringRaises),
401
        ):
402
            continue  # Already handled
403
        parts.append(meta.args[0].replace('_', '').title() + ':')
404
        if meta.description:
405
            lines = [indent + m for m in meta.description.splitlines()]
406
            parts.append('\n'.join(lines))
407
        parts.append('')
408

409
    while parts and not parts[-1]:
410
        parts.pop()
411

412
    return '\n'.join(parts)
413

414
Product

Resources

Company