Path: blob/main/singlestoredb/docstring/numpydoc.py
469 views
"""Numpydoc-style docstring parsing.12:see: https://numpydoc.readthedocs.io/en/latest/format.html3"""4import inspect5import itertools6import re7import typing as T8from abc import abstractmethod9from textwrap import dedent1011from .common import Docstring12from .common import DocstringDeprecated13from .common import DocstringExample14from .common import DocstringMeta15from .common import DocstringParam16from .common import DocstringRaises17from .common import DocstringReturns18from .common import DocstringStyle19from .common import RenderingStyle202122def _pairwise(23iterable: T.Iterable[T.Any],24end: T.Optional[T.Any] = None,25) -> T.Iterable[T.Tuple[T.Any, T.Any]]:26left, right = itertools.tee(iterable)27next(right, None)28return itertools.zip_longest(left, right, fillvalue=end)293031def _clean_str(string: str) -> T.Optional[str]:32string = string.strip()33if len(string) > 0:34return string35return None363738KV_REGEX = re.compile(r'^[^\s].*$', flags=re.M)39PARAM_KEY_REGEX = re.compile(r'^(?P<name>.*?)(?:\s*:\s*(?P<type>.*?))?$')40PARAM_OPTIONAL_REGEX = re.compile(r'(?P<type>.*?)(?:, optional|\(optional\))$')4142# numpydoc format has no formal grammar for this,43# but we can make some educated guesses...44PARAM_DEFAULT_REGEX = re.compile(45r'(?<!\S)[Dd]efault(?: is | = |: |s to |)\s*(?P<value>[\w\-\.]*\w)',46)4748RETURN_KEY_REGEX = re.compile(r'^(?:(?P<name>.*?)\s*:\s*)?(?P<type>.*?)$')495051class Section:52"""Numpydoc section parser.5354:param title: section title. For most sections, this is a heading like55"Parameters" which appears on its own line, underlined by56en-dashes ('-') on the following line.57:param key: meta key string. In the parsed ``DocstringMeta`` instance this58will be the first element of the ``args`` attribute list.59"""6061def __init__(self, title: str, key: str) -> None:62self.title = title63self.key = key6465@property66def title_pattern(self) -> str:67"""Regular expression pattern matching this section's header.6869This pattern will match this instance's ``title`` attribute in70an anonymous group.71"""72dashes = '-' * len(self.title)73return rf'^({self.title})\s*?\n{dashes}\s*$'7475def parse(self, text: str) -> T.Iterable[DocstringMeta]:76"""Parse ``DocstringMeta`` objects from the body of this section.7778:param text: section body text. Should be cleaned with79``inspect.cleandoc`` before parsing.80"""81yield DocstringMeta([self.key], description=_clean_str(text))828384class _KVSection(Section):85"""Base parser for numpydoc sections with key-value syntax.8687E.g. sections that look like this:88key89value90key2 : type91values can also span...92... multiple lines93"""9495@abstractmethod96def _parse_item(self, key: str, value: str) -> DocstringMeta:97return DocstringMeta(args=[key], description=_clean_str(value))9899def parse(self, text: str) -> T.Iterable[DocstringMeta]:100for match, next_match in _pairwise(KV_REGEX.finditer(text)):101start = match.end()102end = next_match.start() if next_match is not None else None103value = text[start:end]104yield self._parse_item(105key=match.group(), value=inspect.cleandoc(value),106)107108109class _SphinxSection(Section):110"""Base parser for numpydoc sections with sphinx-style syntax.111112E.g. sections that look like this:113.. title:: something114possibly over multiple lines115"""116117@property118def title_pattern(self) -> str:119return rf'^\.\.\s*({self.title})\s*::'120121122class ParamSection(_KVSection):123"""Parser for numpydoc parameter sections.124125E.g. any section that looks like this:126arg_name127arg_description128arg_2 : type, optional129descriptions can also span...130... multiple lines131"""132133def _parse_item(self, key: str, value: str) -> DocstringParam:134match = PARAM_KEY_REGEX.match(key)135arg_name = type_name = is_optional = None136if match is not None:137arg_name = match.group('name')138type_name = match.group('type')139if type_name is not None:140optional_match = PARAM_OPTIONAL_REGEX.match(type_name)141if optional_match is not None:142type_name = optional_match.group('type')143is_optional = True144else:145is_optional = False146147default = None148if len(value) > 0:149default_match = PARAM_DEFAULT_REGEX.search(value)150if default_match is not None:151default = default_match.group('value')152153return DocstringParam(154args=[self.key, str(arg_name)],155description=_clean_str(value),156arg_name=str(arg_name),157type_name=type_name,158is_optional=is_optional,159default=default,160)161162163class RaisesSection(_KVSection):164"""Parser for numpydoc raises sections.165166E.g. any section that looks like this:167ValueError168A description of what might raise ValueError169"""170171def _parse_item(self, key: str, value: str) -> DocstringRaises:172return DocstringRaises(173args=[self.key, key],174description=_clean_str(value),175type_name=key if len(key) > 0 else None,176)177178179class ReturnsSection(_KVSection):180"""Parser for numpydoc returns sections.181182E.g. any section that looks like this:183return_name : type184A description of this returned value185another_type186Return names are optional, types are required187"""188189is_generator = False190191def _parse_item(self, key: str, value: str) -> DocstringReturns:192match = RETURN_KEY_REGEX.match(key)193if match is not None:194return_name = match.group('name')195type_name = match.group('type')196else:197return_name = None198type_name = None199200return DocstringReturns(201args=[self.key],202description=_clean_str(value),203type_name=type_name,204is_generator=self.is_generator,205return_name=return_name,206)207208209class YieldsSection(ReturnsSection):210"""Parser for numpydoc generator "yields" sections."""211212is_generator = True213214215class DeprecationSection(_SphinxSection):216"""Parser for numpydoc "deprecation warning" sections."""217218def parse(self, text: str) -> T.Iterable[DocstringDeprecated]:219version, desc, *_ = text.split(sep='\n', maxsplit=1) + [None, None]220221if desc is not None:222desc = _clean_str(inspect.cleandoc(desc))223224yield DocstringDeprecated(225args=[self.key], description=desc, version=_clean_str(str(version)),226)227228229class ExamplesSection(Section):230"""Parser for numpydoc examples sections.231232E.g. any section that looks like this:233>>> import numpy.matlib234>>> np.matlib.empty((2, 2)) # filled with random data235matrix([[ 6.76425276e-320, 9.79033856e-307], # random236[ 7.39337286e-309, 3.22135945e-309]])237>>> np.matlib.empty((2, 2), dtype=int)238matrix([[ 6600475, 0], # random239[ 6586976, 22740995]])240"""241242def parse(self, text: str) -> T.Iterable[DocstringExample]:243"""Parse ``DocstringExample`` objects from the body of this section.244245:param text: section body text. Should be cleaned with246``inspect.cleandoc`` before parsing.247"""248lines = [x.rstrip() for x in dedent(text).strip().splitlines()]249while lines:250snippet_lines = []251description_lines = []252post_snippet_lines = []253254# Parse description of snippet255while lines:256if re.match(r'^(>>>|sql>) ', lines[0]):257break258description_lines.append(lines.pop(0))259260# Parse code of snippet261while lines:262if not re.match(r'^(>>>|sql>|\.\.\.) ', lines[0]):263break264snippet_lines.append(lines.pop(0))265266# Parse output of snippet267while lines:268# Bail out at blank lines269if not lines[0]:270lines.pop(0)271break272# Bail out if a new snippet is started273elif re.match(r'^(>>>|sql>) ', lines[0]):274break275else:276snippet_lines.append(lines.pop(0))277278# if there is following text, but no more snippets,279# make this a post description.280if not [x for x in lines if re.match(r'^(>>>|sql>) ', x)]:281post_snippet_lines.extend(lines)282lines = []283284yield DocstringExample(285[self.key],286snippet='\n'.join(snippet_lines).strip() if snippet_lines else None,287description='\n'.join(description_lines).strip(),288post_snippet='\n'.join(post_snippet_lines).strip(),289)290291292DEFAULT_SECTIONS = [293ParamSection('Parameters', 'param'),294ParamSection('Params', 'param'),295ParamSection('Arguments', 'param'),296ParamSection('Args', 'param'),297ParamSection('Other Parameters', 'other_param'),298ParamSection('Other Params', 'other_param'),299ParamSection('Other Arguments', 'other_param'),300ParamSection('Other Args', 'other_param'),301ParamSection('Receives', 'receives'),302ParamSection('Receive', 'receives'),303RaisesSection('Raises', 'raises'),304RaisesSection('Raise', 'raises'),305RaisesSection('Warns', 'warns'),306RaisesSection('Warn', 'warns'),307ParamSection('Attributes', 'attribute'),308ParamSection('Attribute', 'attribute'),309ReturnsSection('Returns', 'returns'),310ReturnsSection('Return', 'returns'),311YieldsSection('Yields', 'yields'),312YieldsSection('Yield', 'yields'),313ExamplesSection('Examples', 'examples'),314ExamplesSection('Example', 'examples'),315Section('Warnings', 'warnings'),316Section('Warning', 'warnings'),317Section('See Also', 'see_also'),318Section('Related', 'see_also'),319Section('Notes', 'notes'),320Section('Note', 'notes'),321Section('References', 'references'),322Section('Reference', 'references'),323DeprecationSection('deprecated', 'deprecation'),324]325326327class NumpydocParser:328"""Parser for numpydoc-style docstrings."""329330def __init__(self, sections: T.Optional[T.List[Section]] = None):331"""Setup sections.332333:param sections: Recognized sections or None to defaults.334"""335sects = sections or DEFAULT_SECTIONS336self.sections = {s.title: s for s in sects}337self._setup()338339def _setup(self) -> None:340self.titles_re = re.compile(341r'|'.join(s.title_pattern for s in self.sections.values()),342flags=re.M,343)344345def add_section(self, section: Section) -> None:346"""Add or replace a section.347348:param section: The new section.349"""350351self.sections[section.title] = section352self._setup()353354def parse(self, text: T.Optional[str]) -> Docstring:355"""Parse the numpy-style docstring into its components.356357:returns: parsed docstring358"""359ret = Docstring(style=DocstringStyle.NUMPYDOC)360if not text:361return ret362363# Clean according to PEP-0257364text = inspect.cleandoc(text)365366# Find first title and split on its position367match = self.titles_re.search(text)368if match:369desc_chunk = text[:match.start()]370meta_chunk = text[match.start():]371else:372desc_chunk = text373meta_chunk = ''374375# Break description into short and long parts376parts = desc_chunk.split('\n', 1)377ret.short_description = parts[0] or None378if len(parts) > 1:379long_desc_chunk = parts[1] or ''380ret.blank_after_short_description = long_desc_chunk.startswith(381'\n',382)383ret.blank_after_long_description = long_desc_chunk.endswith('\n\n')384ret.long_description = long_desc_chunk.strip() or None385386for match, nextmatch in _pairwise(self.titles_re.finditer(meta_chunk)):387if not match:388raise ValueError(389'No section title found in docstring: %s' % meta_chunk,390)391title = next(g for g in match.groups() if g is not None)392factory = self.sections[title]393394# section chunk starts after the header,395# ends at the start of the next header396start = match.end()397end = nextmatch.start() if nextmatch is not None else None398ret.meta.extend(factory.parse(meta_chunk[start:end]))399400return ret401402403def parse(text: T.Optional[str]) -> Docstring:404"""Parse the numpy-style docstring into its components.405406:returns: parsed docstring407"""408return NumpydocParser().parse(text)409410411def compose(412# pylint: disable=W0613413docstring: Docstring,414rendering_style: RenderingStyle = RenderingStyle.COMPACT,415indent: str = ' ',416) -> str:417"""Render a parsed docstring into docstring text.418419:param docstring: parsed docstring representation420:param rendering_style: the style to render docstrings421:param indent: the characters used as indentation in the docstring string422:returns: docstring text423"""424425def process_one(426one: T.Union[DocstringParam, DocstringReturns, DocstringRaises],427) -> None:428head: T.Optional[str] = None429if isinstance(one, DocstringParam):430head = one.arg_name431elif isinstance(one, DocstringReturns):432head = one.return_name433434if one.type_name and head:435head += f' : {one.type_name}'436elif one.type_name:437head = one.type_name438elif not head:439head = ''440441if isinstance(one, DocstringParam) and one.is_optional:442head += ', optional'443444if one.description:445body = f'\n{indent}'.join([head] + one.description.splitlines())446parts.append(body)447else:448parts.append(head)449450def process_sect(name: str, args: T.List[T.Any]) -> None:451if args:452parts.append('')453parts.append(name)454parts.append('-' * len(parts[-1]))455for arg in args:456process_one(arg)457458parts: T.List[str] = []459if docstring.short_description:460parts.append(docstring.short_description)461if docstring.blank_after_short_description:462parts.append('')463464if docstring.deprecation:465first = '.. deprecated::'466if docstring.deprecation.version:467first += f' {docstring.deprecation.version}'468if docstring.deprecation.description:469rest = docstring.deprecation.description.splitlines()470else:471rest = []472sep = f'\n{indent}'473parts.append(sep.join([first] + rest))474475if docstring.long_description:476parts.append(docstring.long_description)477if docstring.blank_after_long_description:478parts.append('')479480process_sect(481'Parameters',482[item for item in docstring.params or [] if item.args[0] == 'param'],483)484485process_sect(486'Attributes',487[488item489for item in docstring.params or []490if item.args[0] == 'attribute'491],492)493494process_sect(495'Returns',496[497item498for item in docstring.many_returns or []499if not item.is_generator500],501)502503process_sect(504'Yields',505[item for item in docstring.many_returns or [] if item.is_generator],506)507508if docstring.returns and not docstring.many_returns:509ret = docstring.returns510parts.append('Yields' if ret else 'Returns')511parts.append('-' * len(parts[-1]))512process_one(ret)513514process_sect(515'Receives',516[517item518for item in docstring.params or []519if item.args[0] == 'receives'520],521)522523process_sect(524'Other Parameters',525[526item527for item in docstring.params or []528if item.args[0] == 'other_param'529],530)531532process_sect(533'Raises',534[item for item in docstring.raises or [] if item.args[0] == 'raises'],535)536537process_sect(538'Warns',539[item for item in docstring.raises or [] if item.args[0] == 'warns'],540)541542for meta in docstring.meta:543if isinstance(544meta,545(546DocstringDeprecated,547DocstringParam,548DocstringReturns,549DocstringRaises,550),551):552continue # Already handled553554parts.append('')555parts.append(meta.args[0].replace('_', '').title())556parts.append('-' * len(meta.args[0]))557558if meta.description:559parts.append(meta.description)560561return '\n'.join(parts)562563564