Path: blob/master/venv/Lib/site-packages/lxml/html/_html5builder.py
811 views
"""1Legacy module - don't use in new code!23html5lib now has its own proper implementation.45This module implements a tree builder for html5lib that generates lxml6html element trees. This module uses camelCase as it follows the7html5lib style guide.8"""910from html5lib.treebuilders import _base, etree as etree_builders11from lxml import html, etree121314class DocumentType(object):1516def __init__(self, name, publicId, systemId):17self.name = name18self.publicId = publicId19self.systemId = systemId2021class Document(object):2223def __init__(self):24self._elementTree = None25self.childNodes = []2627def appendChild(self, element):28self._elementTree.getroot().addnext(element._element)293031class TreeBuilder(_base.TreeBuilder):32documentClass = Document33doctypeClass = DocumentType34elementClass = None35commentClass = None36fragmentClass = Document3738def __init__(self, *args, **kwargs):39html_builder = etree_builders.getETreeModule(html, fullTree=False)40etree_builder = etree_builders.getETreeModule(etree, fullTree=False)41self.elementClass = html_builder.Element42self.commentClass = etree_builder.Comment43_base.TreeBuilder.__init__(self, *args, **kwargs)4445def reset(self):46_base.TreeBuilder.reset(self)47self.rootInserted = False48self.initialComments = []49self.doctype = None5051def getDocument(self):52return self.document._elementTree5354def getFragment(self):55fragment = []56element = self.openElements[0]._element57if element.text:58fragment.append(element.text)59fragment.extend(element.getchildren())60if element.tail:61fragment.append(element.tail)62return fragment6364def insertDoctype(self, name, publicId, systemId):65doctype = self.doctypeClass(name, publicId, systemId)66self.doctype = doctype6768def insertComment(self, data, parent=None):69if not self.rootInserted:70self.initialComments.append(data)71else:72_base.TreeBuilder.insertComment(self, data, parent)7374def insertRoot(self, name):75buf = []76if self.doctype and self.doctype.name:77buf.append('<!DOCTYPE %s' % self.doctype.name)78if self.doctype.publicId is not None or self.doctype.systemId is not None:79buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId,80self.doctype.systemId))81buf.append('>')82buf.append('<html></html>')83root = html.fromstring(''.join(buf))8485# Append the initial comments:86for comment in self.initialComments:87root.addprevious(etree.Comment(comment))8889# Create the root document and add the ElementTree to it90self.document = self.documentClass()91self.document._elementTree = root.getroottree()9293# Add the root element to the internal child/open data structures94root_element = self.elementClass(name)95root_element._element = root96self.document.childNodes.append(root_element)97self.openElements.append(root_element)9899self.rootInserted = True100101102