Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hhhrrrttt222111
GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/lxml/isoschematron/__init__.py
811 views
1
"""The ``lxml.isoschematron`` package implements ISO Schematron support on top
2
of the pure-xslt 'skeleton' implementation.
3
"""
4
5
import sys
6
import os.path
7
from lxml import etree as _etree # due to validator __init__ signature
8
9
10
# some compat stuff, borrowed from lxml.html
11
try:
12
unicode
13
except NameError:
14
# Python 3
15
unicode = str
16
try:
17
basestring
18
except NameError:
19
# Python 3
20
basestring = str
21
22
23
__all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',
24
'iso_abstract_expand', 'iso_svrl_for_xslt1',
25
'svrl_validation_errors', 'schematron_schema_valid',
26
'stylesheet_params', 'Schematron']
27
28
29
# some namespaces
30
#FIXME: Maybe lxml should provide a dedicated place for common namespace
31
#FIXME: definitions?
32
XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"
33
RELAXNG_NS = "http://relaxng.org/ns/structure/1.0"
34
SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron"
35
SVRL_NS = "http://purl.oclc.org/dsdl/svrl"
36
37
38
# some helpers
39
_schematron_root = '{%s}schema' % SCHEMATRON_NS
40
_xml_schema_root = '{%s}schema' % XML_SCHEMA_NS
41
_resources_dir = os.path.join(os.path.dirname(__file__), 'resources')
42
43
44
# the iso-schematron skeleton implementation steps aka xsl transformations
45
extract_xsd = _etree.XSLT(_etree.parse(
46
os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))
47
extract_rng = _etree.XSLT(_etree.parse(
48
os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))
49
iso_dsdl_include = _etree.XSLT(_etree.parse(
50
os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
51
'iso_dsdl_include.xsl')))
52
iso_abstract_expand = _etree.XSLT(_etree.parse(
53
os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
54
'iso_abstract_expand.xsl')))
55
iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
56
os.path.join(_resources_dir,
57
'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))
58
59
60
# svrl result accessors
61
svrl_validation_errors = _etree.XPath(
62
'//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
63
64
65
# RelaxNG validator for schematron schemas
66
schematron_schema_valid = _etree.RelaxNG(
67
file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
68
69
70
def stylesheet_params(**kwargs):
71
"""Convert keyword args to a dictionary of stylesheet parameters.
72
XSL stylesheet parameters must be XPath expressions, i.e.:
73
74
* string expressions, like "'5'"
75
* simple (number) expressions, like "5"
76
* valid XPath expressions, like "/a/b/text()"
77
78
This function converts native Python keyword arguments to stylesheet
79
parameters following these rules:
80
If an arg is a string wrap it with XSLT.strparam().
81
If an arg is an XPath object use its path string.
82
If arg is None raise TypeError.
83
Else convert arg to string.
84
"""
85
result = {}
86
for key, val in kwargs.items():
87
if isinstance(val, basestring):
88
val = _etree.XSLT.strparam(val)
89
elif val is None:
90
raise TypeError('None not allowed as a stylesheet parameter')
91
elif not isinstance(val, _etree.XPath):
92
val = unicode(val)
93
result[key] = val
94
return result
95
96
97
# helper function for use in Schematron __init__
98
def _stylesheet_param_dict(paramsDict, kwargsDict):
99
"""Return a copy of paramsDict, updated with kwargsDict entries, wrapped as
100
stylesheet arguments.
101
kwargsDict entries with a value of None are ignored.
102
"""
103
# beware of changing mutable default arg
104
paramsDict = dict(paramsDict)
105
for k, v in kwargsDict.items():
106
if v is not None: # None values do not override
107
paramsDict[k] = v
108
paramsDict = stylesheet_params(**paramsDict)
109
return paramsDict
110
111
112
class Schematron(_etree._Validator):
113
"""An ISO Schematron validator.
114
115
Pass a root Element or an ElementTree to turn it into a validator.
116
Alternatively, pass a filename as keyword argument 'file' to parse from
117
the file system.
118
119
Schematron is a less well known, but very powerful schema language.
120
The main idea is to use the capabilities of XPath to put restrictions on
121
the structure and the content of XML documents.
122
123
The standard behaviour is to fail on ``failed-assert`` findings only
124
(``ASSERTS_ONLY``). To change this, you can either pass a report filter
125
function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS``
126
or a custom ``XPath`` object), or subclass isoschematron.Schematron for
127
complete control of the validation process.
128
129
Built on the Schematron language 'reference' skeleton pure-xslt
130
implementation, the validator is created as an XSLT 1.0 stylesheet using
131
these steps:
132
133
0) (Extract from XML Schema or RelaxNG schema)
134
1) Process inclusions
135
2) Process abstract patterns
136
3) Compile the schematron schema to XSLT
137
138
The ``include`` and ``expand`` keyword arguments can be used to switch off
139
steps 1) and 2).
140
To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the
141
keyword arguments ``include_params``, ``expand_params`` or
142
``compile_params``.
143
For convenience, the compile-step parameter ``phase`` is also exposed as a
144
keyword argument ``phase``. This takes precedence if the parameter is also
145
given in the parameter dictionary.
146
147
If ``store_schematron`` is set to True, the (included-and-expanded)
148
schematron document tree is stored and available through the ``schematron``
149
property.
150
If ``store_xslt`` is set to True, the validation XSLT document tree will be
151
stored and can be retrieved through the ``validator_xslt`` property.
152
With ``store_report`` set to True (default: False), the resulting validation
153
report document gets stored and can be accessed as the ``validation_report``
154
property.
155
156
Here is a usage example::
157
158
>>> from lxml import etree
159
>>> from lxml.isoschematron import Schematron
160
161
>>> schematron = Schematron(etree.XML('''
162
... <schema xmlns="http://purl.oclc.org/dsdl/schematron" >
163
... <pattern id="id_only_attribute">
164
... <title>id is the only permitted attribute name</title>
165
... <rule context="*">
166
... <report test="@*[not(name()='id')]">Attribute
167
... <name path="@*[not(name()='id')]"/> is forbidden<name/>
168
... </report>
169
... </rule>
170
... </pattern>
171
... </schema>'''),
172
... error_finder=Schematron.ASSERTS_AND_REPORTS)
173
174
>>> xml = etree.XML('''
175
... <AAA name="aaa">
176
... <BBB id="bbb"/>
177
... <CCC color="ccc"/>
178
... </AAA>
179
... ''')
180
181
>>> schematron.validate(xml)
182
False
183
184
>>> xml = etree.XML('''
185
... <AAA id="aaa">
186
... <BBB id="bbb"/>
187
... <CCC/>
188
... </AAA>
189
... ''')
190
191
>>> schematron.validate(xml)
192
True
193
"""
194
195
# libxml2 error categorization for validation errors
196
_domain = _etree.ErrorDomains.SCHEMATRONV
197
_level = _etree.ErrorLevels.ERROR
198
_error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT
199
200
# convenience definitions for common behaviours
201
ASSERTS_ONLY = svrl_validation_errors # Default
202
ASSERTS_AND_REPORTS = _etree.XPath(
203
'//svrl:failed-assert | //svrl:successful-report',
204
namespaces={'svrl': SVRL_NS})
205
206
def _extract(self, element):
207
"""Extract embedded schematron schema from non-schematron host schema.
208
This method will only be called by __init__ if the given schema document
209
is not a schematron schema by itself.
210
Must return a schematron schema document tree or None.
211
"""
212
schematron = None
213
if element.tag == _xml_schema_root:
214
schematron = self._extract_xsd(element)
215
elif element.nsmap[element.prefix] == RELAXNG_NS:
216
# RelaxNG does not have a single unique root element
217
schematron = self._extract_rng(element)
218
return schematron
219
220
# customization points
221
# etree.XSLT objects that provide the extract, include, expand, compile
222
# steps
223
_extract_xsd = extract_xsd
224
_extract_rng = extract_rng
225
_include = iso_dsdl_include
226
_expand = iso_abstract_expand
227
_compile = iso_svrl_for_xslt1
228
229
# etree.xpath object that determines input document validity when applied to
230
# the svrl result report; must return a list of result elements (empty if
231
# valid)
232
_validation_errors = ASSERTS_ONLY
233
234
def __init__(self, etree=None, file=None, include=True, expand=True,
235
include_params={}, expand_params={}, compile_params={},
236
store_schematron=False, store_xslt=False, store_report=False,
237
phase=None, error_finder=ASSERTS_ONLY):
238
super(Schematron, self).__init__()
239
240
self._store_report = store_report
241
self._schematron = None
242
self._validator_xslt = None
243
self._validation_report = None
244
if error_finder is not self.ASSERTS_ONLY:
245
self._validation_errors = error_finder
246
247
# parse schema document, may be a schematron schema or an XML Schema or
248
# a RelaxNG schema with embedded schematron rules
249
root = None
250
try:
251
if etree is not None:
252
if _etree.iselement(etree):
253
root = etree
254
else:
255
root = etree.getroot()
256
elif file is not None:
257
root = _etree.parse(file).getroot()
258
except Exception:
259
raise _etree.SchematronParseError(
260
"No tree or file given: %s" % sys.exc_info()[1])
261
if root is None:
262
raise ValueError("Empty tree")
263
if root.tag == _schematron_root:
264
schematron = root
265
else:
266
schematron = self._extract(root)
267
if schematron is None:
268
raise _etree.SchematronParseError(
269
"Document is not a schematron schema or schematron-extractable")
270
# perform the iso-schematron skeleton implementation steps to get a
271
# validating xslt
272
if include:
273
schematron = self._include(schematron, **include_params)
274
if expand:
275
schematron = self._expand(schematron, **expand_params)
276
if not schematron_schema_valid(schematron):
277
raise _etree.SchematronParseError(
278
"invalid schematron schema: %s" %
279
schematron_schema_valid.error_log)
280
if store_schematron:
281
self._schematron = schematron
282
# add new compile keyword args here if exposing them
283
compile_kwargs = {'phase': phase}
284
compile_params = _stylesheet_param_dict(compile_params, compile_kwargs)
285
validator_xslt = self._compile(schematron, **compile_params)
286
if store_xslt:
287
self._validator_xslt = validator_xslt
288
self._validator = _etree.XSLT(validator_xslt)
289
290
def __call__(self, etree):
291
"""Validate doc using Schematron.
292
293
Returns true if document is valid, false if not.
294
"""
295
self._clear_error_log()
296
result = self._validator(etree)
297
if self._store_report:
298
self._validation_report = result
299
errors = self._validation_errors(result)
300
if errors:
301
if _etree.iselement(etree):
302
fname = etree.getroottree().docinfo.URL or '<file>'
303
else:
304
fname = etree.docinfo.URL or '<file>'
305
for error in errors:
306
# Does svrl report the line number, anywhere? Don't think so.
307
self._append_log_message(
308
domain=self._domain, type=self._error_type,
309
level=self._level, line=0,
310
message=_etree.tostring(error, encoding='unicode'),
311
filename=fname)
312
return False
313
return True
314
315
@property
316
def schematron(self):
317
"""ISO-schematron schema document (None if object has been initialized
318
with store_schematron=False).
319
"""
320
return self._schematron
321
322
@property
323
def validator_xslt(self):
324
"""ISO-schematron skeleton implementation XSLT validator document (None
325
if object has been initialized with store_xslt=False).
326
"""
327
return self._validator_xslt
328
329
@property
330
def validation_report(self):
331
"""ISO-schematron validation result report (None if result-storing has
332
been turned off).
333
"""
334
return self._validation_report
335
336