Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
singlestore-labs
GitHub Repository: singlestore-labs/singlestoredb-python
Path: blob/main/singlestoredb/docstring/google.py
469 views
1
"""Google-style docstring parsing."""
2
import inspect
3
import re
4
import typing as T
5
from collections import namedtuple
6
from collections import OrderedDict
7
from enum import IntEnum
8
9
from .common import Docstring
10
from .common import DocstringExample
11
from .common import DocstringMeta
12
from .common import DocstringParam
13
from .common import DocstringRaises
14
from .common import DocstringReturns
15
from .common import DocstringStyle
16
from .common import EXAMPLES_KEYWORDS
17
from .common import PARAM_KEYWORDS
18
from .common import ParseError
19
from .common import RAISES_KEYWORDS
20
from .common import RenderingStyle
21
from .common import RETURNS_KEYWORDS
22
from .common import YIELDS_KEYWORDS
23
24
25
class SectionType(IntEnum):
26
"""Types of sections."""
27
28
SINGULAR = 0
29
"""For sections like examples."""
30
31
MULTIPLE = 1
32
"""For sections like params."""
33
34
SINGULAR_OR_MULTIPLE = 2
35
"""For sections like returns or yields."""
36
37
38
class Section(namedtuple('SectionBase', 'title key type')):
39
"""A docstring section."""
40
41
42
GOOGLE_TYPED_ARG_REGEX = re.compile(r'\s*(.+?)\s*\(\s*(.*[^\s]+)\s*\)')
43
GOOGLE_ARG_DESC_REGEX = re.compile(r'.*\. Defaults to (.+)\.')
44
MULTIPLE_PATTERN = re.compile(r'(\s*[^:\s]+:)|([^:]*\]:.*)')
45
46
DEFAULT_SECTIONS = [
47
Section('Arguments', 'param', SectionType.MULTIPLE),
48
Section('Args', 'param', SectionType.MULTIPLE),
49
Section('Parameters', 'param', SectionType.MULTIPLE),
50
Section('Params', 'param', SectionType.MULTIPLE),
51
Section('Raises', 'raises', SectionType.MULTIPLE),
52
Section('Exceptions', 'raises', SectionType.MULTIPLE),
53
Section('Except', 'raises', SectionType.MULTIPLE),
54
Section('Attributes', 'attribute', SectionType.MULTIPLE),
55
Section('Example', 'examples', SectionType.SINGULAR),
56
Section('Examples', 'examples', SectionType.SINGULAR),
57
Section('Returns', 'returns', SectionType.SINGULAR_OR_MULTIPLE),
58
Section('Yields', 'yields', SectionType.SINGULAR_OR_MULTIPLE),
59
]
60
61
62
class GoogleParser:
63
"""Parser for Google-style docstrings."""
64
65
def __init__(
66
self, sections: T.Optional[T.List[Section]] = None, title_colon: bool = True,
67
):
68
"""Setup sections.
69
70
:param sections: Recognized sections or None to defaults.
71
:param title_colon: require colon after section title.
72
"""
73
if not sections:
74
sections = DEFAULT_SECTIONS
75
self.sections = {s.title: s for s in sections}
76
self.title_colon = title_colon
77
self._setup()
78
79
def _setup(self) -> None:
80
if self.title_colon:
81
colon = ':'
82
else:
83
colon = ''
84
self.titles_re = re.compile(
85
'^('
86
+ '|'.join(f'({t})' for t in self.sections)
87
+ ')'
88
+ colon
89
+ '[ \t\r\f\v]*$',
90
flags=re.M,
91
)
92
93
def _build_meta(self, text: str, title: str) -> DocstringMeta:
94
"""Build docstring element.
95
96
:param text: docstring element text
97
:param title: title of section containing element
98
:return:
99
"""
100
101
section = self.sections[title]
102
103
if (
104
section.type == SectionType.SINGULAR_OR_MULTIPLE
105
and not MULTIPLE_PATTERN.match(text)
106
) or section.type == SectionType.SINGULAR:
107
return self._build_single_meta(section, text)
108
109
if ':' not in text:
110
raise ParseError(f'Expected a colon in {text!r}.')
111
112
# Split spec and description
113
before, desc = text.split(':', 1)
114
115
if before and '\n' in before:
116
# If there is a newline in the first line, clean it up
117
first_line, rest = before.split('\n', 1)
118
before = first_line + inspect.cleandoc(rest)
119
120
if desc:
121
desc = desc[1:] if desc[0] == ' ' else desc
122
if '\n' in desc:
123
first_line, rest = desc.split('\n', 1)
124
desc = first_line + '\n' + inspect.cleandoc(rest)
125
desc = desc.strip('\n')
126
127
return self._build_multi_meta(section, before, desc)
128
129
@staticmethod
130
def _build_single_meta(section: Section, desc: str) -> DocstringMeta:
131
if section.key in RETURNS_KEYWORDS | YIELDS_KEYWORDS:
132
return DocstringReturns(
133
args=[section.key],
134
description=desc,
135
type_name=None,
136
is_generator=section.key in YIELDS_KEYWORDS,
137
)
138
if section.key in RAISES_KEYWORDS:
139
return DocstringRaises(
140
args=[section.key], description=desc, type_name=None,
141
)
142
if section.key in EXAMPLES_KEYWORDS:
143
return DocstringExample(
144
args=[section.key], snippet=None, description=desc,
145
)
146
if section.key in PARAM_KEYWORDS:
147
raise ParseError('Expected paramenter name.')
148
return DocstringMeta(args=[section.key], description=desc)
149
150
@staticmethod
151
def _build_multi_meta(
152
section: Section, before: str, desc: str,
153
) -> DocstringMeta:
154
if section.key in PARAM_KEYWORDS:
155
match = GOOGLE_TYPED_ARG_REGEX.match(before)
156
if match:
157
arg_name, type_name = match.group(1, 2)
158
if type_name.endswith(', optional'):
159
is_optional = True
160
type_name = type_name[:-10]
161
elif type_name.endswith('?'):
162
is_optional = True
163
type_name = type_name[:-1]
164
else:
165
is_optional = False
166
else:
167
arg_name, type_name = before, None
168
is_optional = None
169
170
match = GOOGLE_ARG_DESC_REGEX.match(desc)
171
default = match.group(1) if match else None
172
173
return DocstringParam(
174
args=[section.key, before],
175
description=desc,
176
arg_name=arg_name,
177
type_name=type_name,
178
is_optional=is_optional,
179
default=default,
180
)
181
if section.key in RETURNS_KEYWORDS | YIELDS_KEYWORDS:
182
return DocstringReturns(
183
args=[section.key, before],
184
description=desc,
185
type_name=before,
186
is_generator=section.key in YIELDS_KEYWORDS,
187
)
188
if section.key in RAISES_KEYWORDS:
189
return DocstringRaises(
190
args=[section.key, before], description=desc, type_name=before,
191
)
192
return DocstringMeta(args=[section.key, before], description=desc)
193
194
def add_section(self, section: Section) -> None:
195
"""Add or replace a section.
196
197
:param section: The new section.
198
"""
199
200
self.sections[section.title] = section
201
self._setup()
202
203
def parse(self, text: T.Optional[str]) -> Docstring:
204
"""Parse the Google-style docstring into its components.
205
206
:returns: parsed docstring
207
"""
208
ret = Docstring(style=DocstringStyle.GOOGLE)
209
if not text:
210
return ret
211
212
# Clean according to PEP-0257
213
text = inspect.cleandoc(text)
214
215
# Find first title and split on its position
216
match = self.titles_re.search(text)
217
if match:
218
desc_chunk = text[:match.start()]
219
meta_chunk = text[match.start():]
220
else:
221
desc_chunk = text
222
meta_chunk = ''
223
224
# Break description into short and long parts
225
parts = desc_chunk.split('\n', 1)
226
ret.short_description = parts[0] or None
227
if len(parts) > 1:
228
long_desc_chunk = parts[1] or ''
229
ret.blank_after_short_description = long_desc_chunk.startswith(
230
'\n',
231
)
232
ret.blank_after_long_description = long_desc_chunk.endswith('\n\n')
233
ret.long_description = long_desc_chunk.strip() or None
234
235
# Split by sections determined by titles
236
matches = list(self.titles_re.finditer(meta_chunk))
237
if not matches:
238
return ret
239
splits = []
240
for j in range(len(matches) - 1):
241
splits.append((matches[j].end(), matches[j + 1].start()))
242
splits.append((matches[-1].end(), len(meta_chunk)))
243
244
chunks = OrderedDict() # type: T.MutableMapping[str,str]
245
for j, (start, end) in enumerate(splits):
246
title = matches[j].group(1)
247
if title not in self.sections:
248
continue
249
250
# Clear Any Unknown Meta
251
# Ref: https://github.com/rr-/docstring_parser/issues/29
252
meta_details = meta_chunk[start:end]
253
unknown_meta = re.search(r'\n\S', meta_details)
254
if unknown_meta is not None:
255
meta_details = meta_details[: unknown_meta.start()]
256
257
chunks[title] = meta_details.strip('\n')
258
if not chunks:
259
return ret
260
261
# Add elements from each chunk
262
for title, chunk in chunks.items():
263
# Determine indent
264
indent_match = re.search(r'^\s*', chunk)
265
if not indent_match:
266
raise ParseError(f'Can\'t infer indent from "{chunk}"')
267
indent = indent_match.group()
268
269
# Check for singular elements
270
if self.sections[title].type in [
271
SectionType.SINGULAR,
272
SectionType.SINGULAR_OR_MULTIPLE,
273
]:
274
part = inspect.cleandoc(chunk)
275
ret.meta.append(self._build_meta(part, title))
276
continue
277
278
# Split based on lines which have exactly that indent
279
_re = '^' + indent + r'(?=\S)'
280
c_matches = list(re.finditer(_re, chunk, flags=re.M))
281
if not c_matches:
282
raise ParseError(f'No specification for "{title}": "{chunk}"')
283
c_splits = []
284
for j in range(len(c_matches) - 1):
285
c_splits.append((c_matches[j].end(), c_matches[j + 1].start()))
286
c_splits.append((c_matches[-1].end(), len(chunk)))
287
for j, (start, end) in enumerate(c_splits):
288
part = chunk[start:end].strip('\n')
289
ret.meta.append(self._build_meta(part, title))
290
291
return ret
292
293
294
def parse(text: T.Optional[str]) -> Docstring:
295
"""Parse the Google-style docstring into its components.
296
297
:returns: parsed docstring
298
"""
299
return GoogleParser().parse(text)
300
301
302
def compose(
303
docstring: Docstring,
304
rendering_style: RenderingStyle = RenderingStyle.COMPACT,
305
indent: str = ' ',
306
) -> str:
307
"""Render a parsed docstring into docstring text.
308
309
:param docstring: parsed docstring representation
310
:param rendering_style: the style to render docstrings
311
:param indent: the characters used as indentation in the docstring string
312
:returns: docstring text
313
"""
314
315
def process_one(
316
one: T.Union[DocstringParam, DocstringReturns, DocstringRaises],
317
) -> None:
318
head = ''
319
320
if isinstance(one, DocstringParam):
321
head += one.arg_name or ''
322
elif isinstance(one, DocstringReturns):
323
head += one.return_name or ''
324
325
if isinstance(one, DocstringParam) and one.is_optional:
326
optional = (
327
'?'
328
if rendering_style == RenderingStyle.COMPACT
329
else ', optional'
330
)
331
else:
332
optional = ''
333
334
if one.type_name and head:
335
head += f' ({one.type_name}{optional}):'
336
elif one.type_name:
337
head += f'{one.type_name}{optional}:'
338
else:
339
head += ':'
340
head = indent + head
341
342
if one.description and rendering_style == RenderingStyle.EXPANDED:
343
body = f'\n{indent}{indent}'.join(
344
[head] + one.description.splitlines(),
345
)
346
parts.append(body)
347
elif one.description:
348
(first, *rest) = one.description.splitlines()
349
body = f'\n{indent}{indent}'.join([head + ' ' + first] + rest)
350
parts.append(body)
351
else:
352
parts.append(head)
353
354
def process_sect(name: str, args: T.List[T.Any]) -> None:
355
if args:
356
parts.append(name)
357
for arg in args:
358
process_one(arg)
359
parts.append('')
360
361
parts: T.List[str] = []
362
if docstring.short_description:
363
parts.append(docstring.short_description)
364
if docstring.blank_after_short_description:
365
parts.append('')
366
367
if docstring.long_description:
368
parts.append(docstring.long_description)
369
if docstring.blank_after_long_description:
370
parts.append('')
371
372
process_sect(
373
'Args:', [p for p in docstring.params or [] if p.args[0] == 'param'],
374
)
375
376
process_sect(
377
'Attributes:',
378
[p for p in docstring.params or [] if p.args[0] == 'attribute'],
379
)
380
381
process_sect(
382
'Returns:',
383
[p for p in docstring.many_returns or [] if not p.is_generator],
384
)
385
386
process_sect(
387
'Yields:', [p for p in docstring.many_returns or [] if p.is_generator],
388
)
389
390
process_sect('Raises:', docstring.raises or [])
391
392
if docstring.returns and not docstring.many_returns:
393
ret = docstring.returns
394
parts.append('Yields:' if ret else 'Returns:')
395
parts.append('-' * len(parts[-1]))
396
process_one(ret)
397
398
for meta in docstring.meta:
399
if isinstance(
400
meta, (DocstringParam, DocstringReturns, DocstringRaises),
401
):
402
continue # Already handled
403
parts.append(meta.args[0].replace('_', '').title() + ':')
404
if meta.description:
405
lines = [indent + m for m in meta.description.splitlines()]
406
parts.append('\n'.join(lines))
407
parts.append('')
408
409
while parts and not parts[-1]:
410
parts.pop()
411
412
return '\n'.join(parts)
413
414