Path: blob/master/venv/Lib/site-packages/lxml/isoschematron/__init__.py
811 views
"""The ``lxml.isoschematron`` package implements ISO Schematron support on top1of the pure-xslt 'skeleton' implementation.2"""34import sys5import os.path6from lxml import etree as _etree # due to validator __init__ signature789# some compat stuff, borrowed from lxml.html10try:11unicode12except NameError:13# Python 314unicode = str15try:16basestring17except NameError:18# Python 319basestring = str202122__all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',23'iso_abstract_expand', 'iso_svrl_for_xslt1',24'svrl_validation_errors', 'schematron_schema_valid',25'stylesheet_params', 'Schematron']262728# some namespaces29#FIXME: Maybe lxml should provide a dedicated place for common namespace30#FIXME: definitions?31XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"32RELAXNG_NS = "http://relaxng.org/ns/structure/1.0"33SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron"34SVRL_NS = "http://purl.oclc.org/dsdl/svrl"353637# some helpers38_schematron_root = '{%s}schema' % SCHEMATRON_NS39_xml_schema_root = '{%s}schema' % XML_SCHEMA_NS40_resources_dir = os.path.join(os.path.dirname(__file__), 'resources')414243# the iso-schematron skeleton implementation steps aka xsl transformations44extract_xsd = _etree.XSLT(_etree.parse(45os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))46extract_rng = _etree.XSLT(_etree.parse(47os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))48iso_dsdl_include = _etree.XSLT(_etree.parse(49os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',50'iso_dsdl_include.xsl')))51iso_abstract_expand = _etree.XSLT(_etree.parse(52os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',53'iso_abstract_expand.xsl')))54iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(55os.path.join(_resources_dir,56'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))575859# svrl result accessors60svrl_validation_errors = _etree.XPath(61'//svrl:failed-assert', namespaces={'svrl': SVRL_NS})626364# RelaxNG validator for schematron schemas65schematron_schema_valid = _etree.RelaxNG(66file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))676869def stylesheet_params(**kwargs):70"""Convert keyword args to a dictionary of stylesheet parameters.71XSL stylesheet parameters must be XPath expressions, i.e.:7273* string expressions, like "'5'"74* simple (number) expressions, like "5"75* valid XPath expressions, like "/a/b/text()"7677This function converts native Python keyword arguments to stylesheet78parameters following these rules:79If an arg is a string wrap it with XSLT.strparam().80If an arg is an XPath object use its path string.81If arg is None raise TypeError.82Else convert arg to string.83"""84result = {}85for key, val in kwargs.items():86if isinstance(val, basestring):87val = _etree.XSLT.strparam(val)88elif val is None:89raise TypeError('None not allowed as a stylesheet parameter')90elif not isinstance(val, _etree.XPath):91val = unicode(val)92result[key] = val93return result949596# helper function for use in Schematron __init__97def _stylesheet_param_dict(paramsDict, kwargsDict):98"""Return a copy of paramsDict, updated with kwargsDict entries, wrapped as99stylesheet arguments.100kwargsDict entries with a value of None are ignored.101"""102# beware of changing mutable default arg103paramsDict = dict(paramsDict)104for k, v in kwargsDict.items():105if v is not None: # None values do not override106paramsDict[k] = v107paramsDict = stylesheet_params(**paramsDict)108return paramsDict109110111class Schematron(_etree._Validator):112"""An ISO Schematron validator.113114Pass a root Element or an ElementTree to turn it into a validator.115Alternatively, pass a filename as keyword argument 'file' to parse from116the file system.117118Schematron is a less well known, but very powerful schema language.119The main idea is to use the capabilities of XPath to put restrictions on120the structure and the content of XML documents.121122The standard behaviour is to fail on ``failed-assert`` findings only123(``ASSERTS_ONLY``). To change this, you can either pass a report filter124function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS``125or a custom ``XPath`` object), or subclass isoschematron.Schematron for126complete control of the validation process.127128Built on the Schematron language 'reference' skeleton pure-xslt129implementation, the validator is created as an XSLT 1.0 stylesheet using130these steps:1311320) (Extract from XML Schema or RelaxNG schema)1331) Process inclusions1342) Process abstract patterns1353) Compile the schematron schema to XSLT136137The ``include`` and ``expand`` keyword arguments can be used to switch off138steps 1) and 2).139To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the140keyword arguments ``include_params``, ``expand_params`` or141``compile_params``.142For convenience, the compile-step parameter ``phase`` is also exposed as a143keyword argument ``phase``. This takes precedence if the parameter is also144given in the parameter dictionary.145146If ``store_schematron`` is set to True, the (included-and-expanded)147schematron document tree is stored and available through the ``schematron``148property.149If ``store_xslt`` is set to True, the validation XSLT document tree will be150stored and can be retrieved through the ``validator_xslt`` property.151With ``store_report`` set to True (default: False), the resulting validation152report document gets stored and can be accessed as the ``validation_report``153property.154155Here is a usage example::156157>>> from lxml import etree158>>> from lxml.isoschematron import Schematron159160>>> schematron = Schematron(etree.XML('''161... <schema xmlns="http://purl.oclc.org/dsdl/schematron" >162... <pattern id="id_only_attribute">163... <title>id is the only permitted attribute name</title>164... <rule context="*">165... <report test="@*[not(name()='id')]">Attribute166... <name path="@*[not(name()='id')]"/> is forbidden<name/>167... </report>168... </rule>169... </pattern>170... </schema>'''),171... error_finder=Schematron.ASSERTS_AND_REPORTS)172173>>> xml = etree.XML('''174... <AAA name="aaa">175... <BBB id="bbb"/>176... <CCC color="ccc"/>177... </AAA>178... ''')179180>>> schematron.validate(xml)181False182183>>> xml = etree.XML('''184... <AAA id="aaa">185... <BBB id="bbb"/>186... <CCC/>187... </AAA>188... ''')189190>>> schematron.validate(xml)191True192"""193194# libxml2 error categorization for validation errors195_domain = _etree.ErrorDomains.SCHEMATRONV196_level = _etree.ErrorLevels.ERROR197_error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT198199# convenience definitions for common behaviours200ASSERTS_ONLY = svrl_validation_errors # Default201ASSERTS_AND_REPORTS = _etree.XPath(202'//svrl:failed-assert | //svrl:successful-report',203namespaces={'svrl': SVRL_NS})204205def _extract(self, element):206"""Extract embedded schematron schema from non-schematron host schema.207This method will only be called by __init__ if the given schema document208is not a schematron schema by itself.209Must return a schematron schema document tree or None.210"""211schematron = None212if element.tag == _xml_schema_root:213schematron = self._extract_xsd(element)214elif element.nsmap[element.prefix] == RELAXNG_NS:215# RelaxNG does not have a single unique root element216schematron = self._extract_rng(element)217return schematron218219# customization points220# etree.XSLT objects that provide the extract, include, expand, compile221# steps222_extract_xsd = extract_xsd223_extract_rng = extract_rng224_include = iso_dsdl_include225_expand = iso_abstract_expand226_compile = iso_svrl_for_xslt1227228# etree.xpath object that determines input document validity when applied to229# the svrl result report; must return a list of result elements (empty if230# valid)231_validation_errors = ASSERTS_ONLY232233def __init__(self, etree=None, file=None, include=True, expand=True,234include_params={}, expand_params={}, compile_params={},235store_schematron=False, store_xslt=False, store_report=False,236phase=None, error_finder=ASSERTS_ONLY):237super(Schematron, self).__init__()238239self._store_report = store_report240self._schematron = None241self._validator_xslt = None242self._validation_report = None243if error_finder is not self.ASSERTS_ONLY:244self._validation_errors = error_finder245246# parse schema document, may be a schematron schema or an XML Schema or247# a RelaxNG schema with embedded schematron rules248root = None249try:250if etree is not None:251if _etree.iselement(etree):252root = etree253else:254root = etree.getroot()255elif file is not None:256root = _etree.parse(file).getroot()257except Exception:258raise _etree.SchematronParseError(259"No tree or file given: %s" % sys.exc_info()[1])260if root is None:261raise ValueError("Empty tree")262if root.tag == _schematron_root:263schematron = root264else:265schematron = self._extract(root)266if schematron is None:267raise _etree.SchematronParseError(268"Document is not a schematron schema or schematron-extractable")269# perform the iso-schematron skeleton implementation steps to get a270# validating xslt271if include:272schematron = self._include(schematron, **include_params)273if expand:274schematron = self._expand(schematron, **expand_params)275if not schematron_schema_valid(schematron):276raise _etree.SchematronParseError(277"invalid schematron schema: %s" %278schematron_schema_valid.error_log)279if store_schematron:280self._schematron = schematron281# add new compile keyword args here if exposing them282compile_kwargs = {'phase': phase}283compile_params = _stylesheet_param_dict(compile_params, compile_kwargs)284validator_xslt = self._compile(schematron, **compile_params)285if store_xslt:286self._validator_xslt = validator_xslt287self._validator = _etree.XSLT(validator_xslt)288289def __call__(self, etree):290"""Validate doc using Schematron.291292Returns true if document is valid, false if not.293"""294self._clear_error_log()295result = self._validator(etree)296if self._store_report:297self._validation_report = result298errors = self._validation_errors(result)299if errors:300if _etree.iselement(etree):301fname = etree.getroottree().docinfo.URL or '<file>'302else:303fname = etree.docinfo.URL or '<file>'304for error in errors:305# Does svrl report the line number, anywhere? Don't think so.306self._append_log_message(307domain=self._domain, type=self._error_type,308level=self._level, line=0,309message=_etree.tostring(error, encoding='unicode'),310filename=fname)311return False312return True313314@property315def schematron(self):316"""ISO-schematron schema document (None if object has been initialized317with store_schematron=False).318"""319return self._schematron320321@property322def validator_xslt(self):323"""ISO-schematron skeleton implementation XSLT validator document (None324if object has been initialized with store_xslt=False).325"""326return self._validator_xslt327328@property329def validation_report(self):330"""ISO-schematron validation result report (None if result-storing has331been turned off).332"""333return self._validation_report334335336