Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hhhrrrttt222111
GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/lxml/html/_html5builder.py
811 views
1
"""
2
Legacy module - don't use in new code!
3
4
html5lib now has its own proper implementation.
5
6
This module implements a tree builder for html5lib that generates lxml
7
html element trees. This module uses camelCase as it follows the
8
html5lib style guide.
9
"""
10
11
from html5lib.treebuilders import _base, etree as etree_builders
12
from lxml import html, etree
13
14
15
class DocumentType(object):
16
17
def __init__(self, name, publicId, systemId):
18
self.name = name
19
self.publicId = publicId
20
self.systemId = systemId
21
22
class Document(object):
23
24
def __init__(self):
25
self._elementTree = None
26
self.childNodes = []
27
28
def appendChild(self, element):
29
self._elementTree.getroot().addnext(element._element)
30
31
32
class TreeBuilder(_base.TreeBuilder):
33
documentClass = Document
34
doctypeClass = DocumentType
35
elementClass = None
36
commentClass = None
37
fragmentClass = Document
38
39
def __init__(self, *args, **kwargs):
40
html_builder = etree_builders.getETreeModule(html, fullTree=False)
41
etree_builder = etree_builders.getETreeModule(etree, fullTree=False)
42
self.elementClass = html_builder.Element
43
self.commentClass = etree_builder.Comment
44
_base.TreeBuilder.__init__(self, *args, **kwargs)
45
46
def reset(self):
47
_base.TreeBuilder.reset(self)
48
self.rootInserted = False
49
self.initialComments = []
50
self.doctype = None
51
52
def getDocument(self):
53
return self.document._elementTree
54
55
def getFragment(self):
56
fragment = []
57
element = self.openElements[0]._element
58
if element.text:
59
fragment.append(element.text)
60
fragment.extend(element.getchildren())
61
if element.tail:
62
fragment.append(element.tail)
63
return fragment
64
65
def insertDoctype(self, name, publicId, systemId):
66
doctype = self.doctypeClass(name, publicId, systemId)
67
self.doctype = doctype
68
69
def insertComment(self, data, parent=None):
70
if not self.rootInserted:
71
self.initialComments.append(data)
72
else:
73
_base.TreeBuilder.insertComment(self, data, parent)
74
75
def insertRoot(self, name):
76
buf = []
77
if self.doctype and self.doctype.name:
78
buf.append('<!DOCTYPE %s' % self.doctype.name)
79
if self.doctype.publicId is not None or self.doctype.systemId is not None:
80
buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId,
81
self.doctype.systemId))
82
buf.append('>')
83
buf.append('<html></html>')
84
root = html.fromstring(''.join(buf))
85
86
# Append the initial comments:
87
for comment in self.initialComments:
88
root.addprevious(etree.Comment(comment))
89
90
# Create the root document and add the ElementTree to it
91
self.document = self.documentClass()
92
self.document._elementTree = root.getroottree()
93
94
# Add the root element to the internal child/open data structures
95
root_element = self.elementClass(name)
96
root_element._element = root
97
self.document.childNodes.append(root_element)
98
self.openElements.append(root_element)
99
100
self.rootInserted = True
101
102