Path: blob/master/venv/Lib/site-packages/lxml/doctestcompare.py
811 views
"""1lxml-based doctest output comparison.23Note: normally, you should just import the `lxml.usedoctest` and4`lxml.html.usedoctest` modules from within a doctest, instead of this5one::67>>> import lxml.usedoctest # for XML output89>>> import lxml.html.usedoctest # for HTML output1011To use this module directly, you must call ``lxmldoctest.install()``,12which will cause doctest to use this in all subsequent calls.1314This changes the way output is checked and comparisons are made for15XML or HTML-like content.1617XML or HTML content is noticed because the example starts with ``<``18(it's HTML if it starts with ``<html``). You can also use the19``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.2021Some rough wildcard-like things are allowed. Whitespace is generally22ignored (except in attributes). In text (attributes and text in the23body) you can use ``...`` as a wildcard. In an example it also24matches any trailing tags in the element, though it does not match25leading tags. You may create a tag ``<any>`` or include an ``any``26attribute in the tag. An ``any`` tag matches any tag, while the27attribute matches any and all attributes.2829When a match fails, the reformatted example and gotten text is30displayed (indented), and a rough diff-like output is given. Anything31marked with ``+`` is in the output but wasn't supposed to be, and32similarly ``-`` means its in the example but wasn't in the output.3334You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``35"""3637from lxml import etree38import sys39import re40import doctest41try:42from html import escape as html_escape43except ImportError:44from cgi import escape as html_escape4546__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',47'LHTMLOutputChecker', 'install', 'temp_install']4849try:50_basestring = basestring51except NameError:52_basestring = (str, bytes)5354_IS_PYTHON_3 = sys.version_info[0] >= 35556PARSE_HTML = doctest.register_optionflag('PARSE_HTML')57PARSE_XML = doctest.register_optionflag('PARSE_XML')58NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')5960OutputChecker = doctest.OutputChecker6162def strip(v):63if v is None:64return None65else:66return v.strip()6768def norm_whitespace(v):69return _norm_whitespace_re.sub(' ', v)7071_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)7273def html_fromstring(html):74return etree.fromstring(html, _html_parser)7576# We use this to distinguish repr()s from elements:77_repr_re = re.compile(r'^<[^>]+ (at|object) ')78_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')7980class LXMLOutputChecker(OutputChecker):8182empty_tags = (83'param', 'img', 'area', 'br', 'basefont', 'input',84'base', 'meta', 'link', 'col')8586def get_default_parser(self):87return etree.XML8889def check_output(self, want, got, optionflags):90alt_self = getattr(self, '_temp_override_self', None)91if alt_self is not None:92super_method = self._temp_call_super_check_output93self = alt_self94else:95super_method = OutputChecker.check_output96parser = self.get_parser(want, got, optionflags)97if not parser:98return super_method(99self, want, got, optionflags)100try:101want_doc = parser(want)102except etree.XMLSyntaxError:103return False104try:105got_doc = parser(got)106except etree.XMLSyntaxError:107return False108return self.compare_docs(want_doc, got_doc)109110def get_parser(self, want, got, optionflags):111parser = None112if NOPARSE_MARKUP & optionflags:113return None114if PARSE_HTML & optionflags:115parser = html_fromstring116elif PARSE_XML & optionflags:117parser = etree.XML118elif (want.strip().lower().startswith('<html')119and got.strip().startswith('<html')):120parser = html_fromstring121elif (self._looks_like_markup(want)122and self._looks_like_markup(got)):123parser = self.get_default_parser()124return parser125126def _looks_like_markup(self, s):127s = s.strip()128return (s.startswith('<')129and not _repr_re.search(s))130131def compare_docs(self, want, got):132if not self.tag_compare(want.tag, got.tag):133return False134if not self.text_compare(want.text, got.text, True):135return False136if not self.text_compare(want.tail, got.tail, True):137return False138if 'any' not in want.attrib:139want_keys = sorted(want.attrib.keys())140got_keys = sorted(got.attrib.keys())141if want_keys != got_keys:142return False143for key in want_keys:144if not self.text_compare(want.attrib[key], got.attrib[key], False):145return False146if want.text != '...' or len(want):147want_children = list(want)148got_children = list(got)149while want_children or got_children:150if not want_children or not got_children:151return False152want_first = want_children.pop(0)153got_first = got_children.pop(0)154if not self.compare_docs(want_first, got_first):155return False156if not got_children and want_first.tail == '...':157break158return True159160def text_compare(self, want, got, strip):161want = want or ''162got = got or ''163if strip:164want = norm_whitespace(want).strip()165got = norm_whitespace(got).strip()166want = '^%s$' % re.escape(want)167want = want.replace(r'\.\.\.', '.*')168if re.search(want, got):169return True170else:171return False172173def tag_compare(self, want, got):174if want == 'any':175return True176if (not isinstance(want, _basestring)177or not isinstance(got, _basestring)):178return want == got179want = want or ''180got = got or ''181if want.startswith('{...}'):182# Ellipsis on the namespace183return want.split('}')[-1] == got.split('}')[-1]184else:185return want == got186187def output_difference(self, example, got, optionflags):188want = example.want189parser = self.get_parser(want, got, optionflags)190errors = []191if parser is not None:192try:193want_doc = parser(want)194except etree.XMLSyntaxError:195e = sys.exc_info()[1]196errors.append('In example: %s' % e)197try:198got_doc = parser(got)199except etree.XMLSyntaxError:200e = sys.exc_info()[1]201errors.append('In actual output: %s' % e)202if parser is None or errors:203value = OutputChecker.output_difference(204self, example, got, optionflags)205if errors:206errors.append(value)207return '\n'.join(errors)208else:209return value210html = parser is html_fromstring211diff_parts = ['Expected:',212self.format_doc(want_doc, html, 2),213'Got:',214self.format_doc(got_doc, html, 2),215'Diff:',216self.collect_diff(want_doc, got_doc, html, 2)]217return '\n'.join(diff_parts)218219def html_empty_tag(self, el, html=True):220if not html:221return False222if el.tag not in self.empty_tags:223return False224if el.text or len(el):225# This shouldn't happen (contents in an empty tag)226return False227return True228229def format_doc(self, doc, html, indent, prefix=''):230parts = []231if not len(doc):232# No children...233parts.append(' '*indent)234parts.append(prefix)235parts.append(self.format_tag(doc))236if not self.html_empty_tag(doc, html):237if strip(doc.text):238parts.append(self.format_text(doc.text))239parts.append(self.format_end_tag(doc))240if strip(doc.tail):241parts.append(self.format_text(doc.tail))242parts.append('\n')243return ''.join(parts)244parts.append(' '*indent)245parts.append(prefix)246parts.append(self.format_tag(doc))247if not self.html_empty_tag(doc, html):248parts.append('\n')249if strip(doc.text):250parts.append(' '*indent)251parts.append(self.format_text(doc.text))252parts.append('\n')253for el in doc:254parts.append(self.format_doc(el, html, indent+2))255parts.append(' '*indent)256parts.append(self.format_end_tag(doc))257parts.append('\n')258if strip(doc.tail):259parts.append(' '*indent)260parts.append(self.format_text(doc.tail))261parts.append('\n')262return ''.join(parts)263264def format_text(self, text, strip=True):265if text is None:266return ''267if strip:268text = text.strip()269return html_escape(text, 1)270271def format_tag(self, el):272attrs = []273if isinstance(el, etree.CommentBase):274# FIXME: probably PIs should be handled specially too?275return '<!--'276for name, value in sorted(el.attrib.items()):277attrs.append('%s="%s"' % (name, self.format_text(value, False)))278if not attrs:279return '<%s>' % el.tag280return '<%s %s>' % (el.tag, ' '.join(attrs))281282def format_end_tag(self, el):283if isinstance(el, etree.CommentBase):284# FIXME: probably PIs should be handled specially too?285return '-->'286return '</%s>' % el.tag287288def collect_diff(self, want, got, html, indent):289parts = []290if not len(want) and not len(got):291parts.append(' '*indent)292parts.append(self.collect_diff_tag(want, got))293if not self.html_empty_tag(got, html):294parts.append(self.collect_diff_text(want.text, got.text))295parts.append(self.collect_diff_end_tag(want, got))296parts.append(self.collect_diff_text(want.tail, got.tail))297parts.append('\n')298return ''.join(parts)299parts.append(' '*indent)300parts.append(self.collect_diff_tag(want, got))301parts.append('\n')302if strip(want.text) or strip(got.text):303parts.append(' '*indent)304parts.append(self.collect_diff_text(want.text, got.text))305parts.append('\n')306want_children = list(want)307got_children = list(got)308while want_children or got_children:309if not want_children:310parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+'))311continue312if not got_children:313parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-'))314continue315parts.append(self.collect_diff(316want_children.pop(0), got_children.pop(0), html, indent+2))317parts.append(' '*indent)318parts.append(self.collect_diff_end_tag(want, got))319parts.append('\n')320if strip(want.tail) or strip(got.tail):321parts.append(' '*indent)322parts.append(self.collect_diff_text(want.tail, got.tail))323parts.append('\n')324return ''.join(parts)325326def collect_diff_tag(self, want, got):327if not self.tag_compare(want.tag, got.tag):328tag = '%s (got: %s)' % (want.tag, got.tag)329else:330tag = got.tag331attrs = []332any = want.tag == 'any' or 'any' in want.attrib333for name, value in sorted(got.attrib.items()):334if name not in want.attrib and not any:335attrs.append('+%s="%s"' % (name, self.format_text(value, False)))336else:337if name in want.attrib:338text = self.collect_diff_text(want.attrib[name], value, False)339else:340text = self.format_text(value, False)341attrs.append('%s="%s"' % (name, text))342if not any:343for name, value in sorted(want.attrib.items()):344if name in got.attrib:345continue346attrs.append('-%s="%s"' % (name, self.format_text(value, False)))347if attrs:348tag = '<%s %s>' % (tag, ' '.join(attrs))349else:350tag = '<%s>' % tag351return tag352353def collect_diff_end_tag(self, want, got):354if want.tag != got.tag:355tag = '%s (got: %s)' % (want.tag, got.tag)356else:357tag = got.tag358return '</%s>' % tag359360def collect_diff_text(self, want, got, strip=True):361if self.text_compare(want, got, strip):362if not got:363return ''364return self.format_text(got, strip)365text = '%s (got: %s)' % (want, got)366return self.format_text(text, strip)367368class LHTMLOutputChecker(LXMLOutputChecker):369def get_default_parser(self):370return html_fromstring371372def install(html=False):373"""374Install doctestcompare for all future doctests.375376If html is true, then by default the HTML parser will be used;377otherwise the XML parser is used.378"""379if html:380doctest.OutputChecker = LHTMLOutputChecker381else:382doctest.OutputChecker = LXMLOutputChecker383384def temp_install(html=False, del_module=None):385"""386Use this *inside* a doctest to enable this checker for this387doctest only.388389If html is true, then by default the HTML parser will be used;390otherwise the XML parser is used.391"""392if html:393Checker = LHTMLOutputChecker394else:395Checker = LXMLOutputChecker396frame = _find_doctest_frame()397dt_self = frame.f_locals['self']398checker = Checker()399old_checker = dt_self._checker400dt_self._checker = checker401# The unfortunate thing is that there is a local variable 'check'402# in the function that runs the doctests, that is a bound method403# into the output checker. We have to update that. We can't404# modify the frame, so we have to modify the object in place. The405# only way to do this is to actually change the func_code406# attribute of the method. We change it, and then wait for407# __record_outcome to be run, which signals the end of the __run408# method, at which point we restore the previous check_output409# implementation.410if _IS_PYTHON_3:411check_func = frame.f_locals['check'].__func__412checker_check_func = checker.check_output.__func__413else:414check_func = frame.f_locals['check'].im_func415checker_check_func = checker.check_output.im_func416# Because we can't patch up func_globals, this is the only global417# in check_output that we care about:418doctest.etree = etree419_RestoreChecker(dt_self, old_checker, checker,420check_func, checker_check_func,421del_module)422423class _RestoreChecker(object):424def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,425del_module):426self.dt_self = dt_self427self.checker = old_checker428self.checker._temp_call_super_check_output = self.call_super429self.checker._temp_override_self = new_checker430self.check_func = check_func431self.clone_func = clone_func432self.del_module = del_module433self.install_clone()434self.install_dt_self()435def install_clone(self):436if _IS_PYTHON_3:437self.func_code = self.check_func.__code__438self.func_globals = self.check_func.__globals__439self.check_func.__code__ = self.clone_func.__code__440else:441self.func_code = self.check_func.func_code442self.func_globals = self.check_func.func_globals443self.check_func.func_code = self.clone_func.func_code444def uninstall_clone(self):445if _IS_PYTHON_3:446self.check_func.__code__ = self.func_code447else:448self.check_func.func_code = self.func_code449def install_dt_self(self):450self.prev_func = self.dt_self._DocTestRunner__record_outcome451self.dt_self._DocTestRunner__record_outcome = self452def uninstall_dt_self(self):453self.dt_self._DocTestRunner__record_outcome = self.prev_func454def uninstall_module(self):455if self.del_module:456import sys457del sys.modules[self.del_module]458if '.' in self.del_module:459package, module = self.del_module.rsplit('.', 1)460package_mod = sys.modules[package]461delattr(package_mod, module)462def __call__(self, *args, **kw):463self.uninstall_clone()464self.uninstall_dt_self()465del self.checker._temp_override_self466del self.checker._temp_call_super_check_output467result = self.prev_func(*args, **kw)468self.uninstall_module()469return result470def call_super(self, *args, **kw):471self.uninstall_clone()472try:473return self.check_func(*args, **kw)474finally:475self.install_clone()476477def _find_doctest_frame():478import sys479frame = sys._getframe(1)480while frame:481l = frame.f_locals482if 'BOOM' in l:483# Sign of doctest484return frame485frame = frame.f_back486raise LookupError(487"Could not find doctest (only use this function *inside* a doctest)")488489__test__ = {490'basic': '''491>>> temp_install()492>>> print """<xml a="1" b="2">stuff</xml>"""493<xml b="2" a="1">...</xml>494>>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>"""495<xml xmlns="...">496<tag attr="..." />497</xml>498>>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS499<xml>...foo /></xml>500'''}501502if __name__ == '__main__':503import doctest504doctest.testmod()505506507508509