CoCalc -- test

GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/bs4/tests/test_lxml.py
⁸¹¹ views
1
"""Tests to ensure that the lxml tree builder generates good trees."""
2

3
import re
4
import warnings
5

6
try:
7
    import lxml.etree
8
    LXML_PRESENT = True
9
    LXML_VERSION = lxml.etree.LXML_VERSION
10
except ImportError as e:
11
    LXML_PRESENT = False
12
    LXML_VERSION = (0,)
13

14
if LXML_PRESENT:
15
    from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
16

17
from bs4 import (
18
    BeautifulSoup,
19
    BeautifulStoneSoup,
20
    )
21
from bs4.element import Comment, Doctype, SoupStrainer
22
from bs4.testing import skipIf
23
from bs4.tests import test_htmlparser
24
from bs4.testing import (
25
    HTMLTreeBuilderSmokeTest,
26
    XMLTreeBuilderSmokeTest,
27
    SoupTest,
28
    skipIf,
29
)
30

31
@skipIf(
32
    not LXML_PRESENT,
33
    "lxml seems not to be present, not testing its tree builder.")
34
class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
35
    """See ``HTMLTreeBuilderSmokeTest``."""
36

37
    @property
38
    def default_builder(self):
39
        return LXMLTreeBuilder
40

41
    def test_out_of_range_entity(self):
42
        self.assertSoupEquals(
43
            "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
44
        self.assertSoupEquals(
45
            "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
46
        self.assertSoupEquals(
47
            "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
48

49
    def test_entities_in_foreign_document_encoding(self):
50
        # We can't implement this case correctly because by the time we
51
        # hear about markup like "&#147;", it's been (incorrectly) converted into
52
        # a string like u'\x93'
53
        pass
54
        
55
    # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
56
    # test if an old version of lxml is installed.
57

58
    @skipIf(
59
        not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
60
        "Skipping doctype test for old version of lxml to avoid segfault.")
61
    def test_empty_doctype(self):
62
        soup = self.soup("<!DOCTYPE>")
63
        doctype = soup.contents[0]
64
        self.assertEqual("", doctype.strip())
65

66
    def test_beautifulstonesoup_is_xml_parser(self):
67
        # Make sure that the deprecated BSS class uses an xml builder
68
        # if one is installed.
69
        with warnings.catch_warnings(record=True) as w:
70
            soup = BeautifulStoneSoup("<b />")
71
        self.assertEqual("<b/>", str(soup.b))
72
        self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
73

74
    def test_tracking_line_numbers(self):
75
        # The lxml TreeBuilder cannot keep track of line numbers from
76
        # the original markup. Even if you ask for line numbers, we
77
        # don't have 'em.
78
        #
79
        # This means that if you have a tag like <sourceline> or
80
        # <sourcepos>, attribute access will find it rather than
81
        # giving you a numeric answer.
82
        soup = self.soup(
83
            "\n   <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>",
84
            store_line_numbers=True
85
        )
86
        self.assertEqual("sourceline", soup.p.sourceline.name)
87
        self.assertEqual("sourcepos", soup.p.sourcepos.name)
88
        
89
@skipIf(
90
    not LXML_PRESENT,
91
    "lxml seems not to be present, not testing its XML tree builder.")
92
class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
93
    """See ``HTMLTreeBuilderSmokeTest``."""
94

95
    @property
96
    def default_builder(self):
97
        return LXMLTreeBuilderForXML
98

99
    def test_namespace_indexing(self):
100
        # We should not track un-prefixed namespaces as we can only hold one
101
        # and it will be recognized as the default namespace by soupsieve,
102
        # which may be confusing in some situations. When no namespace is provided
103
        # for a selector, the default namespace (if defined) is assumed.
104

105
        soup = self.soup(
106
            '<?xml version="1.1"?>\n'
107
            '<root>'
108
            '<tag xmlns="http://unprefixed-namespace.com">content</tag>'
109
            '<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>'
110
            '</root>'
111
        )
112
        self.assertEqual(
113
            soup._namespaces,
114
            {'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'}
115
        )
116

117
Product

Resources

Company