Path: blob/master/venv/Lib/site-packages/lxml/cssselect.py
811 views
"""CSS Selectors based on XPath.12This module supports selecting XML/HTML tags based on CSS selectors.3See the `CSSSelector` class for details.45This is a thin wrapper around cssselect 0.7 or later.6"""78from __future__ import absolute_import910from . import etree11try:12import cssselect as external_cssselect13except ImportError:14raise ImportError(15'cssselect does not seem to be installed. '16'See http://packages.python.org/cssselect/')171819SelectorSyntaxError = external_cssselect.SelectorSyntaxError20ExpressionError = external_cssselect.ExpressionError21SelectorError = external_cssselect.SelectorError222324__all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError',25'CSSSelector']262728class LxmlTranslator(external_cssselect.GenericTranslator):29"""30A custom CSS selector to XPath translator with lxml-specific extensions.31"""32def xpath_contains_function(self, xpath, function):33# Defined there, removed in later drafts:34# http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors35if function.argument_types() not in (['STRING'], ['IDENT']):36raise ExpressionError(37"Expected a single string or ident for :contains(), got %r"38% function.arguments)39value = function.arguments[0].value40return xpath.add_condition(41'contains(__lxml_internal_css:lower-case(string(.)), %s)'42% self.xpath_literal(value.lower()))434445class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):46"""47lxml extensions + HTML support.48"""495051def _make_lower_case(context, s):52return s.lower()5354ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')55ns.prefix = '__lxml_internal_css'56ns['lower-case'] = _make_lower_case575859class CSSSelector(etree.XPath):60"""A CSS selector.6162Usage::6364>>> from lxml import etree, cssselect65>>> select = cssselect.CSSSelector("a tag > child")6667>>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")68>>> [ el.tag for el in select(root) ]69['child']7071To use CSS namespaces, you need to pass a prefix-to-namespace72mapping as ``namespaces`` keyword argument::7374>>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'75>>> select_ns = cssselect.CSSSelector('root > rdf|Description',76... namespaces={'rdf': rdfns})7778>>> rdf = etree.XML((79... '<root xmlns:rdf="%s">'80... '<rdf:Description>blah</rdf:Description>'81... '</root>') % rdfns)82>>> [(el.tag, el.text) for el in select_ns(rdf)]83[('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]8485"""86def __init__(self, css, namespaces=None, translator='xml'):87if translator == 'xml':88translator = LxmlTranslator()89elif translator == 'html':90translator = LxmlHTMLTranslator()91elif translator == 'xhtml':92translator = LxmlHTMLTranslator(xhtml=True)93path = translator.css_to_xpath(css)94etree.XPath.__init__(self, path, namespaces=namespaces)95self.css = css9697def __repr__(self):98return '<%s %s for %r>' % (99self.__class__.__name__,100hex(abs(id(self)))[2:],101self.css)102103104