Path: blob/master/venv/Lib/site-packages/lxml/sax.py
811 views
# cython: language_level=212"""3SAX-based adapter to copy trees from/to the Python standard library.45Use the `ElementTreeContentHandler` class to build an ElementTree from6SAX events.78Use the `ElementTreeProducer` class or the `saxify()` function to fire9the SAX events of an ElementTree against a SAX ContentHandler.1011See http://codespeak.net/lxml/sax.html12"""1314from __future__ import absolute_import1516from xml.sax.handler import ContentHandler17from lxml import etree18from lxml.etree import ElementTree, SubElement19from lxml.etree import Comment, ProcessingInstruction202122class SaxError(etree.LxmlError):23"""General SAX error.24"""252627def _getNsTag(tag):28if tag[0] == '{':29return tuple(tag[1:].split('}', 1))30else:31return None, tag323334class ElementTreeContentHandler(ContentHandler):35"""Build an lxml ElementTree from SAX events.36"""37def __init__(self, makeelement=None):38ContentHandler.__init__(self)39self._root = None40self._root_siblings = []41self._element_stack = []42self._default_ns = None43self._ns_mapping = { None : [None] }44self._new_mappings = {}45if makeelement is None:46makeelement = etree.Element47self._makeelement = makeelement4849def _get_etree(self):50"Contains the generated ElementTree after parsing is finished."51return ElementTree(self._root)5253etree = property(_get_etree, doc=_get_etree.__doc__)5455def setDocumentLocator(self, locator):56pass5758def startDocument(self):59pass6061def endDocument(self):62pass6364def startPrefixMapping(self, prefix, uri):65self._new_mappings[prefix] = uri66try:67self._ns_mapping[prefix].append(uri)68except KeyError:69self._ns_mapping[prefix] = [uri]70if prefix is None:71self._default_ns = uri7273def endPrefixMapping(self, prefix):74ns_uri_list = self._ns_mapping[prefix]75ns_uri_list.pop()76if prefix is None:77self._default_ns = ns_uri_list[-1]7879def _buildTag(self, ns_name_tuple):80ns_uri, local_name = ns_name_tuple81if ns_uri:82el_tag = "{%s}%s" % ns_name_tuple83elif self._default_ns:84el_tag = "{%s}%s" % (self._default_ns, local_name)85else:86el_tag = local_name87return el_tag8889def startElementNS(self, ns_name, qname, attributes=None):90el_name = self._buildTag(ns_name)91if attributes:92attrs = {}93try:94iter_attributes = attributes.iteritems()95except AttributeError:96iter_attributes = attributes.items()9798for name_tuple, value in iter_attributes:99if name_tuple[0]:100attr_name = "{%s}%s" % name_tuple101else:102attr_name = name_tuple[1]103attrs[attr_name] = value104else:105attrs = None106107element_stack = self._element_stack108if self._root is None:109element = self._root = \110self._makeelement(el_name, attrs, self._new_mappings)111if self._root_siblings and hasattr(element, 'addprevious'):112for sibling in self._root_siblings:113element.addprevious(sibling)114del self._root_siblings[:]115else:116element = SubElement(element_stack[-1], el_name,117attrs, self._new_mappings)118element_stack.append(element)119120self._new_mappings.clear()121122def processingInstruction(self, target, data):123pi = ProcessingInstruction(target, data)124if self._root is None:125self._root_siblings.append(pi)126else:127self._element_stack[-1].append(pi)128129def endElementNS(self, ns_name, qname):130element = self._element_stack.pop()131el_tag = self._buildTag(ns_name)132if el_tag != element.tag:133raise SaxError("Unexpected element closed: " + el_tag)134135def startElement(self, name, attributes=None):136if attributes:137attributes = dict(138[((None, k), v) for k, v in attributes.items()]139)140self.startElementNS((None, name), name, attributes)141142def endElement(self, name):143self.endElementNS((None, name), name)144145def characters(self, data):146last_element = self._element_stack[-1]147try:148# if there already is a child element, we must append to its tail149last_element = last_element[-1]150last_element.tail = (last_element.tail or '') + data151except IndexError:152# otherwise: append to the text153last_element.text = (last_element.text or '') + data154155ignorableWhitespace = characters156157158class ElementTreeProducer(object):159"""Produces SAX events for an element and children.160"""161def __init__(self, element_or_tree, content_handler):162try:163element = element_or_tree.getroot()164except AttributeError:165element = element_or_tree166self._element = element167self._content_handler = content_handler168from xml.sax.xmlreader import AttributesNSImpl as attr_class169self._attr_class = attr_class170self._empty_attributes = attr_class({}, {})171172def saxify(self):173self._content_handler.startDocument()174175element = self._element176if hasattr(element, 'getprevious'):177siblings = []178sibling = element.getprevious()179while getattr(sibling, 'tag', None) is ProcessingInstruction:180siblings.append(sibling)181sibling = sibling.getprevious()182for sibling in siblings[::-1]:183self._recursive_saxify(sibling, {})184185self._recursive_saxify(element, {})186187if hasattr(element, 'getnext'):188sibling = element.getnext()189while getattr(sibling, 'tag', None) is ProcessingInstruction:190self._recursive_saxify(sibling, {})191sibling = sibling.getnext()192193self._content_handler.endDocument()194195def _recursive_saxify(self, element, parent_nsmap):196content_handler = self._content_handler197tag = element.tag198if tag is Comment or tag is ProcessingInstruction:199if tag is ProcessingInstruction:200content_handler.processingInstruction(201element.target, element.text)202tail = element.tail203if tail:204content_handler.characters(tail)205return206207element_nsmap = element.nsmap208new_prefixes = []209if element_nsmap != parent_nsmap:210# There have been updates to the namespace211for prefix, ns_uri in element_nsmap.items():212if parent_nsmap.get(prefix) != ns_uri:213new_prefixes.append( (prefix, ns_uri) )214215attribs = element.items()216if attribs:217attr_values = {}218attr_qnames = {}219for attr_ns_name, value in attribs:220attr_ns_tuple = _getNsTag(attr_ns_name)221attr_values[attr_ns_tuple] = value222attr_qnames[attr_ns_tuple] = self._build_qname(223attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap,224preferred_prefix=None, is_attribute=True)225sax_attributes = self._attr_class(attr_values, attr_qnames)226else:227sax_attributes = self._empty_attributes228229ns_uri, local_name = _getNsTag(tag)230qname = self._build_qname(231ns_uri, local_name, element_nsmap, element.prefix, is_attribute=False)232233for prefix, uri in new_prefixes:234content_handler.startPrefixMapping(prefix, uri)235content_handler.startElementNS(236(ns_uri, local_name), qname, sax_attributes)237text = element.text238if text:239content_handler.characters(text)240for child in element:241self._recursive_saxify(child, element_nsmap)242content_handler.endElementNS((ns_uri, local_name), qname)243for prefix, uri in new_prefixes:244content_handler.endPrefixMapping(prefix)245tail = element.tail246if tail:247content_handler.characters(tail)248249def _build_qname(self, ns_uri, local_name, nsmap, preferred_prefix, is_attribute):250if ns_uri is None:251return local_name252253if not is_attribute and nsmap.get(preferred_prefix) == ns_uri:254prefix = preferred_prefix255else:256# Pick the first matching prefix, in alphabetical order.257candidates = [258pfx for (pfx, uri) in nsmap.items()259if pfx is not None and uri == ns_uri260]261prefix = (262candidates[0] if len(candidates) == 1263else min(candidates) if candidates264else None265)266267if prefix is None:268# Default namespace269return local_name270return prefix + ':' + local_name271272273def saxify(element_or_tree, content_handler):274"""One-shot helper to generate SAX events from an XML tree and fire275them against a SAX ContentHandler.276"""277return ElementTreeProducer(element_or_tree, content_handler).saxify()278279280