Path: blob/main/singlestoredb/docstring/google.py
469 views
"""Google-style docstring parsing."""1import inspect2import re3import typing as T4from collections import namedtuple5from collections import OrderedDict6from enum import IntEnum78from .common import Docstring9from .common import DocstringExample10from .common import DocstringMeta11from .common import DocstringParam12from .common import DocstringRaises13from .common import DocstringReturns14from .common import DocstringStyle15from .common import EXAMPLES_KEYWORDS16from .common import PARAM_KEYWORDS17from .common import ParseError18from .common import RAISES_KEYWORDS19from .common import RenderingStyle20from .common import RETURNS_KEYWORDS21from .common import YIELDS_KEYWORDS222324class SectionType(IntEnum):25"""Types of sections."""2627SINGULAR = 028"""For sections like examples."""2930MULTIPLE = 131"""For sections like params."""3233SINGULAR_OR_MULTIPLE = 234"""For sections like returns or yields."""353637class Section(namedtuple('SectionBase', 'title key type')):38"""A docstring section."""394041GOOGLE_TYPED_ARG_REGEX = re.compile(r'\s*(.+?)\s*\(\s*(.*[^\s]+)\s*\)')42GOOGLE_ARG_DESC_REGEX = re.compile(r'.*\. Defaults to (.+)\.')43MULTIPLE_PATTERN = re.compile(r'(\s*[^:\s]+:)|([^:]*\]:.*)')4445DEFAULT_SECTIONS = [46Section('Arguments', 'param', SectionType.MULTIPLE),47Section('Args', 'param', SectionType.MULTIPLE),48Section('Parameters', 'param', SectionType.MULTIPLE),49Section('Params', 'param', SectionType.MULTIPLE),50Section('Raises', 'raises', SectionType.MULTIPLE),51Section('Exceptions', 'raises', SectionType.MULTIPLE),52Section('Except', 'raises', SectionType.MULTIPLE),53Section('Attributes', 'attribute', SectionType.MULTIPLE),54Section('Example', 'examples', SectionType.SINGULAR),55Section('Examples', 'examples', SectionType.SINGULAR),56Section('Returns', 'returns', SectionType.SINGULAR_OR_MULTIPLE),57Section('Yields', 'yields', SectionType.SINGULAR_OR_MULTIPLE),58]596061class GoogleParser:62"""Parser for Google-style docstrings."""6364def __init__(65self, sections: T.Optional[T.List[Section]] = None, title_colon: bool = True,66):67"""Setup sections.6869:param sections: Recognized sections or None to defaults.70:param title_colon: require colon after section title.71"""72if not sections:73sections = DEFAULT_SECTIONS74self.sections = {s.title: s for s in sections}75self.title_colon = title_colon76self._setup()7778def _setup(self) -> None:79if self.title_colon:80colon = ':'81else:82colon = ''83self.titles_re = re.compile(84'^('85+ '|'.join(f'({t})' for t in self.sections)86+ ')'87+ colon88+ '[ \t\r\f\v]*$',89flags=re.M,90)9192def _build_meta(self, text: str, title: str) -> DocstringMeta:93"""Build docstring element.9495:param text: docstring element text96:param title: title of section containing element97:return:98"""99100section = self.sections[title]101102if (103section.type == SectionType.SINGULAR_OR_MULTIPLE104and not MULTIPLE_PATTERN.match(text)105) or section.type == SectionType.SINGULAR:106return self._build_single_meta(section, text)107108if ':' not in text:109raise ParseError(f'Expected a colon in {text!r}.')110111# Split spec and description112before, desc = text.split(':', 1)113114if before and '\n' in before:115# If there is a newline in the first line, clean it up116first_line, rest = before.split('\n', 1)117before = first_line + inspect.cleandoc(rest)118119if desc:120desc = desc[1:] if desc[0] == ' ' else desc121if '\n' in desc:122first_line, rest = desc.split('\n', 1)123desc = first_line + '\n' + inspect.cleandoc(rest)124desc = desc.strip('\n')125126return self._build_multi_meta(section, before, desc)127128@staticmethod129def _build_single_meta(section: Section, desc: str) -> DocstringMeta:130if section.key in RETURNS_KEYWORDS | YIELDS_KEYWORDS:131return DocstringReturns(132args=[section.key],133description=desc,134type_name=None,135is_generator=section.key in YIELDS_KEYWORDS,136)137if section.key in RAISES_KEYWORDS:138return DocstringRaises(139args=[section.key], description=desc, type_name=None,140)141if section.key in EXAMPLES_KEYWORDS:142return DocstringExample(143args=[section.key], snippet=None, description=desc,144)145if section.key in PARAM_KEYWORDS:146raise ParseError('Expected paramenter name.')147return DocstringMeta(args=[section.key], description=desc)148149@staticmethod150def _build_multi_meta(151section: Section, before: str, desc: str,152) -> DocstringMeta:153if section.key in PARAM_KEYWORDS:154match = GOOGLE_TYPED_ARG_REGEX.match(before)155if match:156arg_name, type_name = match.group(1, 2)157if type_name.endswith(', optional'):158is_optional = True159type_name = type_name[:-10]160elif type_name.endswith('?'):161is_optional = True162type_name = type_name[:-1]163else:164is_optional = False165else:166arg_name, type_name = before, None167is_optional = None168169match = GOOGLE_ARG_DESC_REGEX.match(desc)170default = match.group(1) if match else None171172return DocstringParam(173args=[section.key, before],174description=desc,175arg_name=arg_name,176type_name=type_name,177is_optional=is_optional,178default=default,179)180if section.key in RETURNS_KEYWORDS | YIELDS_KEYWORDS:181return DocstringReturns(182args=[section.key, before],183description=desc,184type_name=before,185is_generator=section.key in YIELDS_KEYWORDS,186)187if section.key in RAISES_KEYWORDS:188return DocstringRaises(189args=[section.key, before], description=desc, type_name=before,190)191return DocstringMeta(args=[section.key, before], description=desc)192193def add_section(self, section: Section) -> None:194"""Add or replace a section.195196:param section: The new section.197"""198199self.sections[section.title] = section200self._setup()201202def parse(self, text: T.Optional[str]) -> Docstring:203"""Parse the Google-style docstring into its components.204205:returns: parsed docstring206"""207ret = Docstring(style=DocstringStyle.GOOGLE)208if not text:209return ret210211# Clean according to PEP-0257212text = inspect.cleandoc(text)213214# Find first title and split on its position215match = self.titles_re.search(text)216if match:217desc_chunk = text[:match.start()]218meta_chunk = text[match.start():]219else:220desc_chunk = text221meta_chunk = ''222223# Break description into short and long parts224parts = desc_chunk.split('\n', 1)225ret.short_description = parts[0] or None226if len(parts) > 1:227long_desc_chunk = parts[1] or ''228ret.blank_after_short_description = long_desc_chunk.startswith(229'\n',230)231ret.blank_after_long_description = long_desc_chunk.endswith('\n\n')232ret.long_description = long_desc_chunk.strip() or None233234# Split by sections determined by titles235matches = list(self.titles_re.finditer(meta_chunk))236if not matches:237return ret238splits = []239for j in range(len(matches) - 1):240splits.append((matches[j].end(), matches[j + 1].start()))241splits.append((matches[-1].end(), len(meta_chunk)))242243chunks = OrderedDict() # type: T.MutableMapping[str,str]244for j, (start, end) in enumerate(splits):245title = matches[j].group(1)246if title not in self.sections:247continue248249# Clear Any Unknown Meta250# Ref: https://github.com/rr-/docstring_parser/issues/29251meta_details = meta_chunk[start:end]252unknown_meta = re.search(r'\n\S', meta_details)253if unknown_meta is not None:254meta_details = meta_details[: unknown_meta.start()]255256chunks[title] = meta_details.strip('\n')257if not chunks:258return ret259260# Add elements from each chunk261for title, chunk in chunks.items():262# Determine indent263indent_match = re.search(r'^\s*', chunk)264if not indent_match:265raise ParseError(f'Can\'t infer indent from "{chunk}"')266indent = indent_match.group()267268# Check for singular elements269if self.sections[title].type in [270SectionType.SINGULAR,271SectionType.SINGULAR_OR_MULTIPLE,272]:273part = inspect.cleandoc(chunk)274ret.meta.append(self._build_meta(part, title))275continue276277# Split based on lines which have exactly that indent278_re = '^' + indent + r'(?=\S)'279c_matches = list(re.finditer(_re, chunk, flags=re.M))280if not c_matches:281raise ParseError(f'No specification for "{title}": "{chunk}"')282c_splits = []283for j in range(len(c_matches) - 1):284c_splits.append((c_matches[j].end(), c_matches[j + 1].start()))285c_splits.append((c_matches[-1].end(), len(chunk)))286for j, (start, end) in enumerate(c_splits):287part = chunk[start:end].strip('\n')288ret.meta.append(self._build_meta(part, title))289290return ret291292293def parse(text: T.Optional[str]) -> Docstring:294"""Parse the Google-style docstring into its components.295296:returns: parsed docstring297"""298return GoogleParser().parse(text)299300301def compose(302docstring: Docstring,303rendering_style: RenderingStyle = RenderingStyle.COMPACT,304indent: str = ' ',305) -> str:306"""Render a parsed docstring into docstring text.307308:param docstring: parsed docstring representation309:param rendering_style: the style to render docstrings310:param indent: the characters used as indentation in the docstring string311:returns: docstring text312"""313314def process_one(315one: T.Union[DocstringParam, DocstringReturns, DocstringRaises],316) -> None:317head = ''318319if isinstance(one, DocstringParam):320head += one.arg_name or ''321elif isinstance(one, DocstringReturns):322head += one.return_name or ''323324if isinstance(one, DocstringParam) and one.is_optional:325optional = (326'?'327if rendering_style == RenderingStyle.COMPACT328else ', optional'329)330else:331optional = ''332333if one.type_name and head:334head += f' ({one.type_name}{optional}):'335elif one.type_name:336head += f'{one.type_name}{optional}:'337else:338head += ':'339head = indent + head340341if one.description and rendering_style == RenderingStyle.EXPANDED:342body = f'\n{indent}{indent}'.join(343[head] + one.description.splitlines(),344)345parts.append(body)346elif one.description:347(first, *rest) = one.description.splitlines()348body = f'\n{indent}{indent}'.join([head + ' ' + first] + rest)349parts.append(body)350else:351parts.append(head)352353def process_sect(name: str, args: T.List[T.Any]) -> None:354if args:355parts.append(name)356for arg in args:357process_one(arg)358parts.append('')359360parts: T.List[str] = []361if docstring.short_description:362parts.append(docstring.short_description)363if docstring.blank_after_short_description:364parts.append('')365366if docstring.long_description:367parts.append(docstring.long_description)368if docstring.blank_after_long_description:369parts.append('')370371process_sect(372'Args:', [p for p in docstring.params or [] if p.args[0] == 'param'],373)374375process_sect(376'Attributes:',377[p for p in docstring.params or [] if p.args[0] == 'attribute'],378)379380process_sect(381'Returns:',382[p for p in docstring.many_returns or [] if not p.is_generator],383)384385process_sect(386'Yields:', [p for p in docstring.many_returns or [] if p.is_generator],387)388389process_sect('Raises:', docstring.raises or [])390391if docstring.returns and not docstring.many_returns:392ret = docstring.returns393parts.append('Yields:' if ret else 'Returns:')394parts.append('-' * len(parts[-1]))395process_one(ret)396397for meta in docstring.meta:398if isinstance(399meta, (DocstringParam, DocstringReturns, DocstringRaises),400):401continue # Already handled402parts.append(meta.args[0].replace('_', '').title() + ':')403if meta.description:404lines = [indent + m for m in meta.description.splitlines()]405parts.append('\n'.join(lines))406parts.append('')407408while parts and not parts[-1]:409parts.pop()410411return '\n'.join(parts)412413414