CoCalc -- doctestcompare.py

GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/lxml/doctestcompare.py
⁸¹¹ views
1
"""
2
lxml-based doctest output comparison.
3

4
Note: normally, you should just import the `lxml.usedoctest` and
5
`lxml.html.usedoctest` modules from within a doctest, instead of this
6
one::
7

8
    >>> import lxml.usedoctest # for XML output
9

10
    >>> import lxml.html.usedoctest # for HTML output
11

12
To use this module directly, you must call ``lxmldoctest.install()``,
13
which will cause doctest to use this in all subsequent calls.
14

15
This changes the way output is checked and comparisons are made for
16
XML or HTML-like content.
17

18
XML or HTML content is noticed because the example starts with ``<``
19
(it's HTML if it starts with ``<html``).  You can also use the
20
``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
21

22
Some rough wildcard-like things are allowed.  Whitespace is generally
23
ignored (except in attributes).  In text (attributes and text in the
24
body) you can use ``...`` as a wildcard.  In an example it also
25
matches any trailing tags in the element, though it does not match
26
leading tags.  You may create a tag ``<any>`` or include an ``any``
27
attribute in the tag.  An ``any`` tag matches any tag, while the
28
attribute matches any and all attributes.
29

30
When a match fails, the reformatted example and gotten text is
31
displayed (indented), and a rough diff-like output is given.  Anything
32
marked with ``+`` is in the output but wasn't supposed to be, and
33
similarly ``-`` means its in the example but wasn't in the output.
34

35
You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
36
"""
37

38
from lxml import etree
39
import sys
40
import re
41
import doctest
42
try:
43
    from html import escape as html_escape
44
except ImportError:
45
    from cgi import escape as html_escape
46

47
__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
48
           'LHTMLOutputChecker', 'install', 'temp_install']
49

50
try:
51
    _basestring = basestring
52
except NameError:
53
    _basestring = (str, bytes)
54

55
_IS_PYTHON_3 = sys.version_info[0] >= 3
56

57
PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
58
PARSE_XML = doctest.register_optionflag('PARSE_XML')
59
NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
60

61
OutputChecker = doctest.OutputChecker
62

63
def strip(v):
64
    if v is None:
65
        return None
66
    else:
67
        return v.strip()
68

69
def norm_whitespace(v):
70
    return _norm_whitespace_re.sub(' ', v)
71

72
_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)
73

74
def html_fromstring(html):
75
    return etree.fromstring(html, _html_parser)
76

77
# We use this to distinguish repr()s from elements:
78
_repr_re = re.compile(r'^<[^>]+ (at|object) ')
79
_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
80

81
class LXMLOutputChecker(OutputChecker):
82

83
    empty_tags = (
84
        'param', 'img', 'area', 'br', 'basefont', 'input',
85
        'base', 'meta', 'link', 'col')
86

87
    def get_default_parser(self):
88
        return etree.XML
89

90
    def check_output(self, want, got, optionflags):
91
        alt_self = getattr(self, '_temp_override_self', None)
92
        if alt_self is not None:
93
            super_method = self._temp_call_super_check_output
94
            self = alt_self
95
        else:
96
            super_method = OutputChecker.check_output
97
        parser = self.get_parser(want, got, optionflags)
98
        if not parser:
99
            return super_method(
100
                self, want, got, optionflags)
101
        try:
102
            want_doc = parser(want)
103
        except etree.XMLSyntaxError:
104
            return False
105
        try:
106
            got_doc = parser(got)
107
        except etree.XMLSyntaxError:
108
            return False
109
        return self.compare_docs(want_doc, got_doc)
110

111
    def get_parser(self, want, got, optionflags):
112
        parser = None
113
        if NOPARSE_MARKUP & optionflags:
114
            return None
115
        if PARSE_HTML & optionflags:
116
            parser = html_fromstring
117
        elif PARSE_XML & optionflags:
118
            parser = etree.XML
119
        elif (want.strip().lower().startswith('<html')
120
              and got.strip().startswith('<html')):
121
            parser = html_fromstring
122
        elif (self._looks_like_markup(want)
123
              and self._looks_like_markup(got)):
124
            parser = self.get_default_parser()
125
        return parser
126

127
    def _looks_like_markup(self, s):
128
        s = s.strip()
129
        return (s.startswith('<')
130
                and not _repr_re.search(s))
131

132
    def compare_docs(self, want, got):
133
        if not self.tag_compare(want.tag, got.tag):
134
            return False
135
        if not self.text_compare(want.text, got.text, True):
136
            return False
137
        if not self.text_compare(want.tail, got.tail, True):
138
            return False
139
        if 'any' not in want.attrib:
140
            want_keys = sorted(want.attrib.keys())
141
            got_keys = sorted(got.attrib.keys())
142
            if want_keys != got_keys:
143
                return False
144
            for key in want_keys:
145
                if not self.text_compare(want.attrib[key], got.attrib[key], False):
146
                    return False
147
        if want.text != '...' or len(want):
148
            want_children = list(want)
149
            got_children = list(got)
150
            while want_children or got_children:
151
                if not want_children or not got_children:
152
                    return False
153
                want_first = want_children.pop(0)
154
                got_first = got_children.pop(0)
155
                if not self.compare_docs(want_first, got_first):
156
                    return False
157
                if not got_children and want_first.tail == '...':
158
                    break
159
        return True
160

161
    def text_compare(self, want, got, strip):
162
        want = want or ''
163
        got = got or ''
164
        if strip:
165
            want = norm_whitespace(want).strip()
166
            got = norm_whitespace(got).strip()
167
        want = '^%s$' % re.escape(want)
168
        want = want.replace(r'\.\.\.', '.*')
169
        if re.search(want, got):
170
            return True
171
        else:
172
            return False
173

174
    def tag_compare(self, want, got):
175
        if want == 'any':
176
            return True
177
        if (not isinstance(want, _basestring)
178
            or not isinstance(got, _basestring)):
179
            return want == got
180
        want = want or ''
181
        got = got or ''
182
        if want.startswith('{...}'):
183
            # Ellipsis on the namespace
184
            return want.split('}')[-1] == got.split('}')[-1]
185
        else:
186
            return want == got
187

188
    def output_difference(self, example, got, optionflags):
189
        want = example.want
190
        parser = self.get_parser(want, got, optionflags)
191
        errors = []
192
        if parser is not None:
193
            try:
194
                want_doc = parser(want)
195
            except etree.XMLSyntaxError:
196
                e = sys.exc_info()[1]
197
                errors.append('In example: %s' % e)
198
            try:
199
                got_doc = parser(got)
200
            except etree.XMLSyntaxError:
201
                e = sys.exc_info()[1]
202
                errors.append('In actual output: %s' % e)
203
        if parser is None or errors:
204
            value = OutputChecker.output_difference(
205
                self, example, got, optionflags)
206
            if errors:
207
                errors.append(value)
208
                return '\n'.join(errors)
209
            else:
210
                return value
211
        html = parser is html_fromstring
212
        diff_parts = ['Expected:',
213
                      self.format_doc(want_doc, html, 2),
214
                      'Got:',
215
                      self.format_doc(got_doc, html, 2),
216
                      'Diff:',
217
                      self.collect_diff(want_doc, got_doc, html, 2)]
218
        return '\n'.join(diff_parts)
219

220
    def html_empty_tag(self, el, html=True):
221
        if not html:
222
            return False
223
        if el.tag not in self.empty_tags:
224
            return False
225
        if el.text or len(el):
226
            # This shouldn't happen (contents in an empty tag)
227
            return False
228
        return True
229

230
    def format_doc(self, doc, html, indent, prefix=''):
231
        parts = []
232
        if not len(doc):
233
            # No children...
234
            parts.append(' '*indent)
235
            parts.append(prefix)
236
            parts.append(self.format_tag(doc))
237
            if not self.html_empty_tag(doc, html):
238
                if strip(doc.text):
239
                    parts.append(self.format_text(doc.text))
240
                parts.append(self.format_end_tag(doc))
241
            if strip(doc.tail):
242
                parts.append(self.format_text(doc.tail))
243
            parts.append('\n')
244
            return ''.join(parts)
245
        parts.append(' '*indent)
246
        parts.append(prefix)
247
        parts.append(self.format_tag(doc))
248
        if not self.html_empty_tag(doc, html):
249
            parts.append('\n')
250
            if strip(doc.text):
251
                parts.append(' '*indent)
252
                parts.append(self.format_text(doc.text))
253
                parts.append('\n')
254
            for el in doc:
255
                parts.append(self.format_doc(el, html, indent+2))
256
            parts.append(' '*indent)
257
            parts.append(self.format_end_tag(doc))
258
            parts.append('\n')
259
        if strip(doc.tail):
260
            parts.append(' '*indent)
261
            parts.append(self.format_text(doc.tail))
262
            parts.append('\n')
263
        return ''.join(parts)
264

265
    def format_text(self, text, strip=True):
266
        if text is None:
267
            return ''
268
        if strip:
269
            text = text.strip()
270
        return html_escape(text, 1)
271

272
    def format_tag(self, el):
273
        attrs = []
274
        if isinstance(el, etree.CommentBase):
275
            # FIXME: probably PIs should be handled specially too?
276
            return '<!--'
277
        for name, value in sorted(el.attrib.items()):
278
            attrs.append('%s="%s"' % (name, self.format_text(value, False)))
279
        if not attrs:
280
            return '<%s>' % el.tag
281
        return '<%s %s>' % (el.tag, ' '.join(attrs))
282
    
283
    def format_end_tag(self, el):
284
        if isinstance(el, etree.CommentBase):
285
            # FIXME: probably PIs should be handled specially too?
286
            return '-->'
287
        return '</%s>' % el.tag
288

289
    def collect_diff(self, want, got, html, indent):
290
        parts = []
291
        if not len(want) and not len(got):
292
            parts.append(' '*indent)
293
            parts.append(self.collect_diff_tag(want, got))
294
            if not self.html_empty_tag(got, html):
295
                parts.append(self.collect_diff_text(want.text, got.text))
296
                parts.append(self.collect_diff_end_tag(want, got))
297
            parts.append(self.collect_diff_text(want.tail, got.tail))
298
            parts.append('\n')
299
            return ''.join(parts)
300
        parts.append(' '*indent)
301
        parts.append(self.collect_diff_tag(want, got))
302
        parts.append('\n')
303
        if strip(want.text) or strip(got.text):
304
            parts.append(' '*indent)
305
            parts.append(self.collect_diff_text(want.text, got.text))
306
            parts.append('\n')
307
        want_children = list(want)
308
        got_children = list(got)
309
        while want_children or got_children:
310
            if not want_children:
311
                parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+'))
312
                continue
313
            if not got_children:
314
                parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-'))
315
                continue
316
            parts.append(self.collect_diff(
317
                want_children.pop(0), got_children.pop(0), html, indent+2))
318
        parts.append(' '*indent)
319
        parts.append(self.collect_diff_end_tag(want, got))
320
        parts.append('\n')
321
        if strip(want.tail) or strip(got.tail):
322
            parts.append(' '*indent)
323
            parts.append(self.collect_diff_text(want.tail, got.tail))
324
            parts.append('\n')
325
        return ''.join(parts)
326

327
    def collect_diff_tag(self, want, got):
328
        if not self.tag_compare(want.tag, got.tag):
329
            tag = '%s (got: %s)' % (want.tag, got.tag)
330
        else:
331
            tag = got.tag
332
        attrs = []
333
        any = want.tag == 'any' or 'any' in want.attrib
334
        for name, value in sorted(got.attrib.items()):
335
            if name not in want.attrib and not any:
336
                attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
337
            else:
338
                if name in want.attrib:
339
                    text = self.collect_diff_text(want.attrib[name], value, False)
340
                else:
341
                    text = self.format_text(value, False)
342
                attrs.append('%s="%s"' % (name, text))
343
        if not any:
344
            for name, value in sorted(want.attrib.items()):
345
                if name in got.attrib:
346
                    continue
347
                attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
348
        if attrs:
349
            tag = '<%s %s>' % (tag, ' '.join(attrs))
350
        else:
351
            tag = '<%s>' % tag
352
        return tag
353

354
    def collect_diff_end_tag(self, want, got):
355
        if want.tag != got.tag:
356
            tag = '%s (got: %s)' % (want.tag, got.tag)
357
        else:
358
            tag = got.tag
359
        return '</%s>' % tag
360

361
    def collect_diff_text(self, want, got, strip=True):
362
        if self.text_compare(want, got, strip):
363
            if not got:
364
                return ''
365
            return self.format_text(got, strip)
366
        text = '%s (got: %s)' % (want, got)
367
        return self.format_text(text, strip)
368

369
class LHTMLOutputChecker(LXMLOutputChecker):
370
    def get_default_parser(self):
371
        return html_fromstring
372
    
373
def install(html=False):
374
    """
375
    Install doctestcompare for all future doctests.
376

377
    If html is true, then by default the HTML parser will be used;
378
    otherwise the XML parser is used.
379
    """
380
    if html:
381
        doctest.OutputChecker = LHTMLOutputChecker
382
    else:
383
        doctest.OutputChecker = LXMLOutputChecker
384

385
def temp_install(html=False, del_module=None):
386
    """
387
    Use this *inside* a doctest to enable this checker for this
388
    doctest only.
389

390
    If html is true, then by default the HTML parser will be used;
391
    otherwise the XML parser is used.
392
    """
393
    if html:
394
        Checker = LHTMLOutputChecker
395
    else:
396
        Checker = LXMLOutputChecker
397
    frame = _find_doctest_frame()
398
    dt_self = frame.f_locals['self']
399
    checker = Checker()
400
    old_checker = dt_self._checker
401
    dt_self._checker = checker
402
    # The unfortunate thing is that there is a local variable 'check'
403
    # in the function that runs the doctests, that is a bound method
404
    # into the output checker.  We have to update that.  We can't
405
    # modify the frame, so we have to modify the object in place.  The
406
    # only way to do this is to actually change the func_code
407
    # attribute of the method.  We change it, and then wait for
408
    # __record_outcome to be run, which signals the end of the __run
409
    # method, at which point we restore the previous check_output
410
    # implementation.
411
    if _IS_PYTHON_3:
412
        check_func = frame.f_locals['check'].__func__
413
        checker_check_func = checker.check_output.__func__
414
    else:
415
        check_func = frame.f_locals['check'].im_func
416
        checker_check_func = checker.check_output.im_func
417
    # Because we can't patch up func_globals, this is the only global
418
    # in check_output that we care about:
419
    doctest.etree = etree
420
    _RestoreChecker(dt_self, old_checker, checker,
421
                    check_func, checker_check_func,
422
                    del_module)
423

424
class _RestoreChecker(object):
425
    def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
426
                 del_module):
427
        self.dt_self = dt_self
428
        self.checker = old_checker
429
        self.checker._temp_call_super_check_output = self.call_super
430
        self.checker._temp_override_self = new_checker
431
        self.check_func = check_func
432
        self.clone_func = clone_func
433
        self.del_module = del_module
434
        self.install_clone()
435
        self.install_dt_self()
436
    def install_clone(self):
437
        if _IS_PYTHON_3:
438
            self.func_code = self.check_func.__code__
439
            self.func_globals = self.check_func.__globals__
440
            self.check_func.__code__ = self.clone_func.__code__
441
        else:
442
            self.func_code = self.check_func.func_code
443
            self.func_globals = self.check_func.func_globals
444
            self.check_func.func_code = self.clone_func.func_code
445
    def uninstall_clone(self):
446
        if _IS_PYTHON_3:
447
            self.check_func.__code__ = self.func_code
448
        else:
449
            self.check_func.func_code = self.func_code
450
    def install_dt_self(self):
451
        self.prev_func = self.dt_self._DocTestRunner__record_outcome
452
        self.dt_self._DocTestRunner__record_outcome = self
453
    def uninstall_dt_self(self):
454
        self.dt_self._DocTestRunner__record_outcome = self.prev_func
455
    def uninstall_module(self):
456
        if self.del_module:
457
            import sys
458
            del sys.modules[self.del_module]
459
            if '.' in self.del_module:
460
                package, module = self.del_module.rsplit('.', 1)
461
                package_mod = sys.modules[package]
462
                delattr(package_mod, module)
463
    def __call__(self, *args, **kw):
464
        self.uninstall_clone()
465
        self.uninstall_dt_self()
466
        del self.checker._temp_override_self
467
        del self.checker._temp_call_super_check_output
468
        result = self.prev_func(*args, **kw)
469
        self.uninstall_module()
470
        return result
471
    def call_super(self, *args, **kw):
472
        self.uninstall_clone()
473
        try:
474
            return self.check_func(*args, **kw)
475
        finally:
476
            self.install_clone()
477
            
478
def _find_doctest_frame():
479
    import sys
480
    frame = sys._getframe(1)
481
    while frame:
482
        l = frame.f_locals
483
        if 'BOOM' in l:
484
            # Sign of doctest
485
            return frame
486
        frame = frame.f_back
487
    raise LookupError(
488
        "Could not find doctest (only use this function *inside* a doctest)")
489
    
490
__test__ = {
491
    'basic': '''
492
    >>> temp_install()
493
    >>> print """<xml a="1" b="2">stuff</xml>"""
494
    <xml b="2" a="1">...</xml>
495
    >>> print """<xml xmlns="http://example.com"><tag   attr="bar"   /></xml>"""
496
    <xml xmlns="...">
497
      <tag attr="..." />
498
    </xml>
499
    >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
500
    <xml>...foo /></xml>
501
    '''}
502

503
if __name__ == '__main__':
504
    import doctest
505
    doctest.testmod()
506
    
507
    
508

509
Product

Resources

Company