Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hhhrrrttt222111
GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/lxml/doctestcompare.py
811 views
1
"""
2
lxml-based doctest output comparison.
3
4
Note: normally, you should just import the `lxml.usedoctest` and
5
`lxml.html.usedoctest` modules from within a doctest, instead of this
6
one::
7
8
>>> import lxml.usedoctest # for XML output
9
10
>>> import lxml.html.usedoctest # for HTML output
11
12
To use this module directly, you must call ``lxmldoctest.install()``,
13
which will cause doctest to use this in all subsequent calls.
14
15
This changes the way output is checked and comparisons are made for
16
XML or HTML-like content.
17
18
XML or HTML content is noticed because the example starts with ``<``
19
(it's HTML if it starts with ``<html``). You can also use the
20
``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
21
22
Some rough wildcard-like things are allowed. Whitespace is generally
23
ignored (except in attributes). In text (attributes and text in the
24
body) you can use ``...`` as a wildcard. In an example it also
25
matches any trailing tags in the element, though it does not match
26
leading tags. You may create a tag ``<any>`` or include an ``any``
27
attribute in the tag. An ``any`` tag matches any tag, while the
28
attribute matches any and all attributes.
29
30
When a match fails, the reformatted example and gotten text is
31
displayed (indented), and a rough diff-like output is given. Anything
32
marked with ``+`` is in the output but wasn't supposed to be, and
33
similarly ``-`` means its in the example but wasn't in the output.
34
35
You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
36
"""
37
38
from lxml import etree
39
import sys
40
import re
41
import doctest
42
try:
43
from html import escape as html_escape
44
except ImportError:
45
from cgi import escape as html_escape
46
47
__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
48
'LHTMLOutputChecker', 'install', 'temp_install']
49
50
try:
51
_basestring = basestring
52
except NameError:
53
_basestring = (str, bytes)
54
55
_IS_PYTHON_3 = sys.version_info[0] >= 3
56
57
PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
58
PARSE_XML = doctest.register_optionflag('PARSE_XML')
59
NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
60
61
OutputChecker = doctest.OutputChecker
62
63
def strip(v):
64
if v is None:
65
return None
66
else:
67
return v.strip()
68
69
def norm_whitespace(v):
70
return _norm_whitespace_re.sub(' ', v)
71
72
_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)
73
74
def html_fromstring(html):
75
return etree.fromstring(html, _html_parser)
76
77
# We use this to distinguish repr()s from elements:
78
_repr_re = re.compile(r'^<[^>]+ (at|object) ')
79
_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
80
81
class LXMLOutputChecker(OutputChecker):
82
83
empty_tags = (
84
'param', 'img', 'area', 'br', 'basefont', 'input',
85
'base', 'meta', 'link', 'col')
86
87
def get_default_parser(self):
88
return etree.XML
89
90
def check_output(self, want, got, optionflags):
91
alt_self = getattr(self, '_temp_override_self', None)
92
if alt_self is not None:
93
super_method = self._temp_call_super_check_output
94
self = alt_self
95
else:
96
super_method = OutputChecker.check_output
97
parser = self.get_parser(want, got, optionflags)
98
if not parser:
99
return super_method(
100
self, want, got, optionflags)
101
try:
102
want_doc = parser(want)
103
except etree.XMLSyntaxError:
104
return False
105
try:
106
got_doc = parser(got)
107
except etree.XMLSyntaxError:
108
return False
109
return self.compare_docs(want_doc, got_doc)
110
111
def get_parser(self, want, got, optionflags):
112
parser = None
113
if NOPARSE_MARKUP & optionflags:
114
return None
115
if PARSE_HTML & optionflags:
116
parser = html_fromstring
117
elif PARSE_XML & optionflags:
118
parser = etree.XML
119
elif (want.strip().lower().startswith('<html')
120
and got.strip().startswith('<html')):
121
parser = html_fromstring
122
elif (self._looks_like_markup(want)
123
and self._looks_like_markup(got)):
124
parser = self.get_default_parser()
125
return parser
126
127
def _looks_like_markup(self, s):
128
s = s.strip()
129
return (s.startswith('<')
130
and not _repr_re.search(s))
131
132
def compare_docs(self, want, got):
133
if not self.tag_compare(want.tag, got.tag):
134
return False
135
if not self.text_compare(want.text, got.text, True):
136
return False
137
if not self.text_compare(want.tail, got.tail, True):
138
return False
139
if 'any' not in want.attrib:
140
want_keys = sorted(want.attrib.keys())
141
got_keys = sorted(got.attrib.keys())
142
if want_keys != got_keys:
143
return False
144
for key in want_keys:
145
if not self.text_compare(want.attrib[key], got.attrib[key], False):
146
return False
147
if want.text != '...' or len(want):
148
want_children = list(want)
149
got_children = list(got)
150
while want_children or got_children:
151
if not want_children or not got_children:
152
return False
153
want_first = want_children.pop(0)
154
got_first = got_children.pop(0)
155
if not self.compare_docs(want_first, got_first):
156
return False
157
if not got_children and want_first.tail == '...':
158
break
159
return True
160
161
def text_compare(self, want, got, strip):
162
want = want or ''
163
got = got or ''
164
if strip:
165
want = norm_whitespace(want).strip()
166
got = norm_whitespace(got).strip()
167
want = '^%s$' % re.escape(want)
168
want = want.replace(r'\.\.\.', '.*')
169
if re.search(want, got):
170
return True
171
else:
172
return False
173
174
def tag_compare(self, want, got):
175
if want == 'any':
176
return True
177
if (not isinstance(want, _basestring)
178
or not isinstance(got, _basestring)):
179
return want == got
180
want = want or ''
181
got = got or ''
182
if want.startswith('{...}'):
183
# Ellipsis on the namespace
184
return want.split('}')[-1] == got.split('}')[-1]
185
else:
186
return want == got
187
188
def output_difference(self, example, got, optionflags):
189
want = example.want
190
parser = self.get_parser(want, got, optionflags)
191
errors = []
192
if parser is not None:
193
try:
194
want_doc = parser(want)
195
except etree.XMLSyntaxError:
196
e = sys.exc_info()[1]
197
errors.append('In example: %s' % e)
198
try:
199
got_doc = parser(got)
200
except etree.XMLSyntaxError:
201
e = sys.exc_info()[1]
202
errors.append('In actual output: %s' % e)
203
if parser is None or errors:
204
value = OutputChecker.output_difference(
205
self, example, got, optionflags)
206
if errors:
207
errors.append(value)
208
return '\n'.join(errors)
209
else:
210
return value
211
html = parser is html_fromstring
212
diff_parts = ['Expected:',
213
self.format_doc(want_doc, html, 2),
214
'Got:',
215
self.format_doc(got_doc, html, 2),
216
'Diff:',
217
self.collect_diff(want_doc, got_doc, html, 2)]
218
return '\n'.join(diff_parts)
219
220
def html_empty_tag(self, el, html=True):
221
if not html:
222
return False
223
if el.tag not in self.empty_tags:
224
return False
225
if el.text or len(el):
226
# This shouldn't happen (contents in an empty tag)
227
return False
228
return True
229
230
def format_doc(self, doc, html, indent, prefix=''):
231
parts = []
232
if not len(doc):
233
# No children...
234
parts.append(' '*indent)
235
parts.append(prefix)
236
parts.append(self.format_tag(doc))
237
if not self.html_empty_tag(doc, html):
238
if strip(doc.text):
239
parts.append(self.format_text(doc.text))
240
parts.append(self.format_end_tag(doc))
241
if strip(doc.tail):
242
parts.append(self.format_text(doc.tail))
243
parts.append('\n')
244
return ''.join(parts)
245
parts.append(' '*indent)
246
parts.append(prefix)
247
parts.append(self.format_tag(doc))
248
if not self.html_empty_tag(doc, html):
249
parts.append('\n')
250
if strip(doc.text):
251
parts.append(' '*indent)
252
parts.append(self.format_text(doc.text))
253
parts.append('\n')
254
for el in doc:
255
parts.append(self.format_doc(el, html, indent+2))
256
parts.append(' '*indent)
257
parts.append(self.format_end_tag(doc))
258
parts.append('\n')
259
if strip(doc.tail):
260
parts.append(' '*indent)
261
parts.append(self.format_text(doc.tail))
262
parts.append('\n')
263
return ''.join(parts)
264
265
def format_text(self, text, strip=True):
266
if text is None:
267
return ''
268
if strip:
269
text = text.strip()
270
return html_escape(text, 1)
271
272
def format_tag(self, el):
273
attrs = []
274
if isinstance(el, etree.CommentBase):
275
# FIXME: probably PIs should be handled specially too?
276
return '<!--'
277
for name, value in sorted(el.attrib.items()):
278
attrs.append('%s="%s"' % (name, self.format_text(value, False)))
279
if not attrs:
280
return '<%s>' % el.tag
281
return '<%s %s>' % (el.tag, ' '.join(attrs))
282
283
def format_end_tag(self, el):
284
if isinstance(el, etree.CommentBase):
285
# FIXME: probably PIs should be handled specially too?
286
return '-->'
287
return '</%s>' % el.tag
288
289
def collect_diff(self, want, got, html, indent):
290
parts = []
291
if not len(want) and not len(got):
292
parts.append(' '*indent)
293
parts.append(self.collect_diff_tag(want, got))
294
if not self.html_empty_tag(got, html):
295
parts.append(self.collect_diff_text(want.text, got.text))
296
parts.append(self.collect_diff_end_tag(want, got))
297
parts.append(self.collect_diff_text(want.tail, got.tail))
298
parts.append('\n')
299
return ''.join(parts)
300
parts.append(' '*indent)
301
parts.append(self.collect_diff_tag(want, got))
302
parts.append('\n')
303
if strip(want.text) or strip(got.text):
304
parts.append(' '*indent)
305
parts.append(self.collect_diff_text(want.text, got.text))
306
parts.append('\n')
307
want_children = list(want)
308
got_children = list(got)
309
while want_children or got_children:
310
if not want_children:
311
parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+'))
312
continue
313
if not got_children:
314
parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-'))
315
continue
316
parts.append(self.collect_diff(
317
want_children.pop(0), got_children.pop(0), html, indent+2))
318
parts.append(' '*indent)
319
parts.append(self.collect_diff_end_tag(want, got))
320
parts.append('\n')
321
if strip(want.tail) or strip(got.tail):
322
parts.append(' '*indent)
323
parts.append(self.collect_diff_text(want.tail, got.tail))
324
parts.append('\n')
325
return ''.join(parts)
326
327
def collect_diff_tag(self, want, got):
328
if not self.tag_compare(want.tag, got.tag):
329
tag = '%s (got: %s)' % (want.tag, got.tag)
330
else:
331
tag = got.tag
332
attrs = []
333
any = want.tag == 'any' or 'any' in want.attrib
334
for name, value in sorted(got.attrib.items()):
335
if name not in want.attrib and not any:
336
attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
337
else:
338
if name in want.attrib:
339
text = self.collect_diff_text(want.attrib[name], value, False)
340
else:
341
text = self.format_text(value, False)
342
attrs.append('%s="%s"' % (name, text))
343
if not any:
344
for name, value in sorted(want.attrib.items()):
345
if name in got.attrib:
346
continue
347
attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
348
if attrs:
349
tag = '<%s %s>' % (tag, ' '.join(attrs))
350
else:
351
tag = '<%s>' % tag
352
return tag
353
354
def collect_diff_end_tag(self, want, got):
355
if want.tag != got.tag:
356
tag = '%s (got: %s)' % (want.tag, got.tag)
357
else:
358
tag = got.tag
359
return '</%s>' % tag
360
361
def collect_diff_text(self, want, got, strip=True):
362
if self.text_compare(want, got, strip):
363
if not got:
364
return ''
365
return self.format_text(got, strip)
366
text = '%s (got: %s)' % (want, got)
367
return self.format_text(text, strip)
368
369
class LHTMLOutputChecker(LXMLOutputChecker):
370
def get_default_parser(self):
371
return html_fromstring
372
373
def install(html=False):
374
"""
375
Install doctestcompare for all future doctests.
376
377
If html is true, then by default the HTML parser will be used;
378
otherwise the XML parser is used.
379
"""
380
if html:
381
doctest.OutputChecker = LHTMLOutputChecker
382
else:
383
doctest.OutputChecker = LXMLOutputChecker
384
385
def temp_install(html=False, del_module=None):
386
"""
387
Use this *inside* a doctest to enable this checker for this
388
doctest only.
389
390
If html is true, then by default the HTML parser will be used;
391
otherwise the XML parser is used.
392
"""
393
if html:
394
Checker = LHTMLOutputChecker
395
else:
396
Checker = LXMLOutputChecker
397
frame = _find_doctest_frame()
398
dt_self = frame.f_locals['self']
399
checker = Checker()
400
old_checker = dt_self._checker
401
dt_self._checker = checker
402
# The unfortunate thing is that there is a local variable 'check'
403
# in the function that runs the doctests, that is a bound method
404
# into the output checker. We have to update that. We can't
405
# modify the frame, so we have to modify the object in place. The
406
# only way to do this is to actually change the func_code
407
# attribute of the method. We change it, and then wait for
408
# __record_outcome to be run, which signals the end of the __run
409
# method, at which point we restore the previous check_output
410
# implementation.
411
if _IS_PYTHON_3:
412
check_func = frame.f_locals['check'].__func__
413
checker_check_func = checker.check_output.__func__
414
else:
415
check_func = frame.f_locals['check'].im_func
416
checker_check_func = checker.check_output.im_func
417
# Because we can't patch up func_globals, this is the only global
418
# in check_output that we care about:
419
doctest.etree = etree
420
_RestoreChecker(dt_self, old_checker, checker,
421
check_func, checker_check_func,
422
del_module)
423
424
class _RestoreChecker(object):
425
def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
426
del_module):
427
self.dt_self = dt_self
428
self.checker = old_checker
429
self.checker._temp_call_super_check_output = self.call_super
430
self.checker._temp_override_self = new_checker
431
self.check_func = check_func
432
self.clone_func = clone_func
433
self.del_module = del_module
434
self.install_clone()
435
self.install_dt_self()
436
def install_clone(self):
437
if _IS_PYTHON_3:
438
self.func_code = self.check_func.__code__
439
self.func_globals = self.check_func.__globals__
440
self.check_func.__code__ = self.clone_func.__code__
441
else:
442
self.func_code = self.check_func.func_code
443
self.func_globals = self.check_func.func_globals
444
self.check_func.func_code = self.clone_func.func_code
445
def uninstall_clone(self):
446
if _IS_PYTHON_3:
447
self.check_func.__code__ = self.func_code
448
else:
449
self.check_func.func_code = self.func_code
450
def install_dt_self(self):
451
self.prev_func = self.dt_self._DocTestRunner__record_outcome
452
self.dt_self._DocTestRunner__record_outcome = self
453
def uninstall_dt_self(self):
454
self.dt_self._DocTestRunner__record_outcome = self.prev_func
455
def uninstall_module(self):
456
if self.del_module:
457
import sys
458
del sys.modules[self.del_module]
459
if '.' in self.del_module:
460
package, module = self.del_module.rsplit('.', 1)
461
package_mod = sys.modules[package]
462
delattr(package_mod, module)
463
def __call__(self, *args, **kw):
464
self.uninstall_clone()
465
self.uninstall_dt_self()
466
del self.checker._temp_override_self
467
del self.checker._temp_call_super_check_output
468
result = self.prev_func(*args, **kw)
469
self.uninstall_module()
470
return result
471
def call_super(self, *args, **kw):
472
self.uninstall_clone()
473
try:
474
return self.check_func(*args, **kw)
475
finally:
476
self.install_clone()
477
478
def _find_doctest_frame():
479
import sys
480
frame = sys._getframe(1)
481
while frame:
482
l = frame.f_locals
483
if 'BOOM' in l:
484
# Sign of doctest
485
return frame
486
frame = frame.f_back
487
raise LookupError(
488
"Could not find doctest (only use this function *inside* a doctest)")
489
490
__test__ = {
491
'basic': '''
492
>>> temp_install()
493
>>> print """<xml a="1" b="2">stuff</xml>"""
494
<xml b="2" a="1">...</xml>
495
>>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>"""
496
<xml xmlns="...">
497
<tag attr="..." />
498
</xml>
499
>>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
500
<xml>...foo /></xml>
501
'''}
502
503
if __name__ == '__main__':
504
import doctest
505
doctest.testmod()
506
507
508
509