Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hhhrrrttt222111
GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/bs4/tests/test_lxml.py
811 views
1
"""Tests to ensure that the lxml tree builder generates good trees."""
2
3
import re
4
import warnings
5
6
try:
7
import lxml.etree
8
LXML_PRESENT = True
9
LXML_VERSION = lxml.etree.LXML_VERSION
10
except ImportError as e:
11
LXML_PRESENT = False
12
LXML_VERSION = (0,)
13
14
if LXML_PRESENT:
15
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
16
17
from bs4 import (
18
BeautifulSoup,
19
BeautifulStoneSoup,
20
)
21
from bs4.element import Comment, Doctype, SoupStrainer
22
from bs4.testing import skipIf
23
from bs4.tests import test_htmlparser
24
from bs4.testing import (
25
HTMLTreeBuilderSmokeTest,
26
XMLTreeBuilderSmokeTest,
27
SoupTest,
28
skipIf,
29
)
30
31
@skipIf(
32
not LXML_PRESENT,
33
"lxml seems not to be present, not testing its tree builder.")
34
class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
35
"""See ``HTMLTreeBuilderSmokeTest``."""
36
37
@property
38
def default_builder(self):
39
return LXMLTreeBuilder
40
41
def test_out_of_range_entity(self):
42
self.assertSoupEquals(
43
"<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
44
self.assertSoupEquals(
45
"<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
46
self.assertSoupEquals(
47
"<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
48
49
def test_entities_in_foreign_document_encoding(self):
50
# We can't implement this case correctly because by the time we
51
# hear about markup like "&#147;", it's been (incorrectly) converted into
52
# a string like u'\x93'
53
pass
54
55
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
56
# test if an old version of lxml is installed.
57
58
@skipIf(
59
not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
60
"Skipping doctype test for old version of lxml to avoid segfault.")
61
def test_empty_doctype(self):
62
soup = self.soup("<!DOCTYPE>")
63
doctype = soup.contents[0]
64
self.assertEqual("", doctype.strip())
65
66
def test_beautifulstonesoup_is_xml_parser(self):
67
# Make sure that the deprecated BSS class uses an xml builder
68
# if one is installed.
69
with warnings.catch_warnings(record=True) as w:
70
soup = BeautifulStoneSoup("<b />")
71
self.assertEqual("<b/>", str(soup.b))
72
self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
73
74
def test_tracking_line_numbers(self):
75
# The lxml TreeBuilder cannot keep track of line numbers from
76
# the original markup. Even if you ask for line numbers, we
77
# don't have 'em.
78
#
79
# This means that if you have a tag like <sourceline> or
80
# <sourcepos>, attribute access will find it rather than
81
# giving you a numeric answer.
82
soup = self.soup(
83
"\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>",
84
store_line_numbers=True
85
)
86
self.assertEqual("sourceline", soup.p.sourceline.name)
87
self.assertEqual("sourcepos", soup.p.sourcepos.name)
88
89
@skipIf(
90
not LXML_PRESENT,
91
"lxml seems not to be present, not testing its XML tree builder.")
92
class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
93
"""See ``HTMLTreeBuilderSmokeTest``."""
94
95
@property
96
def default_builder(self):
97
return LXMLTreeBuilderForXML
98
99
def test_namespace_indexing(self):
100
# We should not track un-prefixed namespaces as we can only hold one
101
# and it will be recognized as the default namespace by soupsieve,
102
# which may be confusing in some situations. When no namespace is provided
103
# for a selector, the default namespace (if defined) is assumed.
104
105
soup = self.soup(
106
'<?xml version="1.1"?>\n'
107
'<root>'
108
'<tag xmlns="http://unprefixed-namespace.com">content</tag>'
109
'<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>'
110
'</root>'
111
)
112
self.assertEqual(
113
soup._namespaces,
114
{'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'}
115
)
116
117