Path: blob/master/venv/Lib/site-packages/lxml/ElementInclude.py
811 views
#1# ElementTree2# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $3#4# limited xinclude support for element trees5#6# history:7# 2003-08-15 fl created8# 2003-11-14 fl fixed default loader9#10# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.11#12# [email protected]13# http://www.pythonware.com14#15# --------------------------------------------------------------------16# The ElementTree toolkit is17#18# Copyright (c) 1999-2004 by Fredrik Lundh19#20# By obtaining, using, and/or copying this software and/or its21# associated documentation, you agree that you have read, understood,22# and will comply with the following terms and conditions:23#24# Permission to use, copy, modify, and distribute this software and25# its associated documentation for any purpose and without fee is26# hereby granted, provided that the above copyright notice appears in27# all copies, and that both that copyright notice and this permission28# notice appear in supporting documentation, and that the name of29# Secret Labs AB or the author not be used in advertising or publicity30# pertaining to distribution of the software without specific, written31# prior permission.32#33# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD34# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-35# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR36# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY37# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,38# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS39# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE40# OF THIS SOFTWARE.41# --------------------------------------------------------------------4243"""44Limited XInclude support for the ElementTree package.4546While lxml.etree has full support for XInclude (see47`etree.ElementTree.xinclude()`), this module provides a simpler, pure48Python, ElementTree compatible implementation that supports a simple49form of custom URL resolvers.50"""5152from lxml import etree53try:54from urlparse import urljoin55from urllib2 import urlopen56except ImportError:57# Python 358from urllib.parse import urljoin59from urllib.request import urlopen6061XINCLUDE = "{http://www.w3.org/2001/XInclude}"6263XINCLUDE_INCLUDE = XINCLUDE + "include"64XINCLUDE_FALLBACK = XINCLUDE + "fallback"65XINCLUDE_ITER_TAG = XINCLUDE + "*"6667# For security reasons, the inclusion depth is limited to this read-only value by default.68DEFAULT_MAX_INCLUSION_DEPTH = 6697071##72# Fatal include error.7374class FatalIncludeError(etree.LxmlSyntaxError):75pass767778class LimitedRecursiveIncludeError(FatalIncludeError):79pass808182##83# ET compatible default loader.84# This loader reads an included resource from disk.85#86# @param href Resource reference.87# @param parse Parse mode. Either "xml" or "text".88# @param encoding Optional text encoding.89# @return The expanded resource. If the parse mode is "xml", this90# is an ElementTree instance. If the parse mode is "text", this91# is a Unicode string. If the loader fails, it can return None92# or raise an IOError exception.93# @throws IOError If the loader fails to load the resource.9495def default_loader(href, parse, encoding=None):96file = open(href, 'rb')97if parse == "xml":98data = etree.parse(file).getroot()99else:100data = file.read()101if not encoding:102encoding = 'utf-8'103data = data.decode(encoding)104file.close()105return data106107108##109# Default loader used by lxml.etree - handles custom resolvers properly110#111112def _lxml_default_loader(href, parse, encoding=None, parser=None):113if parse == "xml":114data = etree.parse(href, parser).getroot()115else:116if "://" in href:117f = urlopen(href)118else:119f = open(href, 'rb')120data = f.read()121f.close()122if not encoding:123encoding = 'utf-8'124data = data.decode(encoding)125return data126127128##129# Wrapper for ET compatibility - drops the parser130131def _wrap_et_loader(loader):132def load(href, parse, encoding=None, parser=None):133return loader(href, parse, encoding)134return load135136137##138# Expand XInclude directives.139#140# @param elem Root element.141# @param loader Optional resource loader. If omitted, it defaults142# to {@link default_loader}. If given, it should be a callable143# that implements the same interface as <b>default_loader</b>.144# @param base_url The base URL of the original file, to resolve145# relative include file references.146# @param max_depth The maximum number of recursive inclusions.147# Limited to reduce the risk of malicious content explosion.148# Pass None to disable the limitation.149# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.150# @throws FatalIncludeError If the function fails to include a given151# resource, or if the tree contains malformed XInclude elements.152# @throws IOError If the function fails to load a given resource.153# @returns the node or its replacement if it was an XInclude node154155def include(elem, loader=None, base_url=None,156max_depth=DEFAULT_MAX_INCLUSION_DEPTH):157if max_depth is None:158max_depth = -1159elif max_depth < 0:160raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)161162if base_url is None:163if hasattr(elem, 'getroot'):164tree = elem165elem = elem.getroot()166else:167tree = elem.getroottree()168if hasattr(tree, 'docinfo'):169base_url = tree.docinfo.URL170elif hasattr(elem, 'getroot'):171elem = elem.getroot()172_include(elem, loader, base_url, max_depth)173174175def _include(elem, loader=None, base_url=None,176max_depth=DEFAULT_MAX_INCLUSION_DEPTH, _parent_hrefs=None):177if loader is not None:178load_include = _wrap_et_loader(loader)179else:180load_include = _lxml_default_loader181182if _parent_hrefs is None:183_parent_hrefs = set()184185parser = elem.getroottree().parser186187include_elements = list(188elem.iter(XINCLUDE_ITER_TAG))189190for e in include_elements:191if e.tag == XINCLUDE_INCLUDE:192# process xinclude directive193href = urljoin(base_url, e.get("href"))194parse = e.get("parse", "xml")195parent = e.getparent()196if parse == "xml":197if href in _parent_hrefs:198raise FatalIncludeError(199"recursive include of %r detected" % href200)201if max_depth == 0:202raise LimitedRecursiveIncludeError(203"maximum xinclude depth reached when including file %s" % href)204node = load_include(href, parse, parser=parser)205if node is None:206raise FatalIncludeError(207"cannot load %r as %r" % (href, parse)208)209node = _include(node, loader, href, max_depth - 1, {href} | _parent_hrefs)210if e.tail:211node.tail = (node.tail or "") + e.tail212if parent is None:213return node # replaced the root node!214parent.replace(e, node)215elif parse == "text":216text = load_include(href, parse, encoding=e.get("encoding"))217if text is None:218raise FatalIncludeError(219"cannot load %r as %r" % (href, parse)220)221predecessor = e.getprevious()222if predecessor is not None:223predecessor.tail = (predecessor.tail or "") + text224elif parent is None:225return text # replaced the root node!226else:227parent.text = (parent.text or "") + text + (e.tail or "")228parent.remove(e)229else:230raise FatalIncludeError(231"unknown parse type in xi:include tag (%r)" % parse232)233elif e.tag == XINCLUDE_FALLBACK:234parent = e.getparent()235if parent is not None and parent.tag != XINCLUDE_INCLUDE:236raise FatalIncludeError(237"xi:fallback tag must be child of xi:include (%r)" % e.tag238)239else:240raise FatalIncludeError(241"Invalid element found in XInclude namespace (%r)" % e.tag242)243return elem244245246