Path: blob/master/venv/Lib/site-packages/soupsieve/css_parser.py
811 views
"""CSS selector parser."""1import re2from functools import lru_cache3from . import util4from . import css_match as cm5from . import css_types as ct6from .util import SelectorSyntaxError78UNICODE_REPLACEMENT_CHAR = 0xFFFD910# Simple pseudo classes that take no parameters11PSEUDO_SIMPLE = {12":any-link",13":empty",14":first-child",15":first-of-type",16":in-range",17":out-of-range",18":last-child",19":last-of-type",20":link",21":only-child",22":only-of-type",23":root",24':checked',25':default',26':disabled',27':enabled',28':indeterminate',29':optional',30':placeholder-shown',31':read-only',32':read-write',33':required',34':scope',35':defined'36}3738# Supported, simple pseudo classes that match nothing in the Soup Sieve environment39PSEUDO_SIMPLE_NO_MATCH = {40':active',41':current',42':focus',43':focus-visible',44':focus-within',45':future',46':host',47':hover',48':local-link',49':past',50':paused',51':playing',52':target',53':target-within',54':user-invalid',55':visited'56}5758# Complex pseudo classes that take selector lists59PSEUDO_COMPLEX = {60':contains',61':has',62':is',63':matches',64':not',65':where'66}6768PSEUDO_COMPLEX_NO_MATCH = {69':current',70':host',71':host-context'72}7374# Complex pseudo classes that take very specific parameters and are handled special75PSEUDO_SPECIAL = {76':dir',77':lang',78':nth-child',79':nth-last-child',80':nth-last-of-type',81':nth-of-type'82}8384PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL8586# Sub-patterns parts87# Whitespace88NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])'89WS = r'(?:[ \t]|{})'.format(NEWLINE)90# Comments91COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'92# Whitespace with comments included93WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS)94# CSS escapes95CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS)96CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE)97# CSS Identifier98IDENTIFIER = r'''99(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--)100(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*)101'''.format(esc=CSS_ESCAPES)102# `nth` content103NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC)104# Value: quoted string or identifier105VALUE = r'''106(?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+)107'''.format(nl=NEWLINE, ident=IDENTIFIER)108# Attribute value comparison. `!=` is handled special as it is non-standard.109ATTR = r'''110(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]111'''.format(ws=WSC, value=VALUE)112113# Selector patterns114# IDs (`#id`)115PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)116# Classes (`.class`)117PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)118# Prefix:Tag (`prefix|tag`)119PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER)120# Attributes (`[attr]`, `[attr=value]`, etc.)121PAT_ATTR = r'''122\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}123'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)124# Pseudo class (`:pseudo-class`, `:pseudo-class(`)125PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)126# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.127PAT_PSEUDO_CLASS_SPECIAL = r'(?P<name>:{ident})(?P<open>\({ws}*)'.format(ws=WSC, ident=IDENTIFIER)128# Custom pseudo class (`:--custom-pseudo`)129PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER)130# Closing pseudo group (`)`)131PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC)132# Pseudo element (`::pseudo-element`)133PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS)134# At rule (`@page`, etc.) (not supported)135PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER)136# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.)137PAT_PSEUDO_NTH_CHILD = r'''138(?P<pseudo_nth_child>{name}139(?P<nth_child>{nth}|even|odd))(?:{wsc}*\)|(?P<of>{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*))140'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH)141# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.)142PAT_PSEUDO_NTH_TYPE = r'''143(?P<pseudo_nth_type>{name}144(?P<nth_type>{nth}|even|odd)){ws}*\)145'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH)146# Pseudo class language (`:lang("*-de", en)`)147PAT_PSEUDO_LANG = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format(148name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE149)150# Pseudo class direction (`:dir(ltr)`)151PAT_PSEUDO_DIR = r'{name}(?P<dir>ltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC)152# Combining characters (`>`, `~`, ` `, `+`, `,`)153PAT_COMBINE = r'{wsc}*?(?P<relation>[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC)154# Extra: Contains (`:contains(text)`)155PAT_PSEUDO_CONTAINS = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format(156name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE157)158159# Regular expressions160# CSS escape pattern161RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I)162RE_CSS_STR_ESC = re.compile(163r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I164)165# Pattern to break up `nth` specifiers166RE_NTH = re.compile(167r'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){ws}*(?P<s2>[-+]){ws}*(?P<b>[0-9]+))?'.format(ws=WSC),168re.I169)170# Pattern to iterate multiple values.171RE_VALUES = re.compile(r'(?:(?P<value>{value})|(?P<split>{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X)172# Whitespace checks173RE_WS = re.compile(WS)174RE_WS_BEGIN = re.compile('^{}*'.format(WSC))175RE_WS_END = re.compile('{}*$'.format(WSC))176RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X)177178# Constants179# List split token180COMMA_COMBINATOR = ','181# Relation token for descendant182WS_COMBINATOR = " "183184# Parse flags185FLG_PSEUDO = 0x01186FLG_NOT = 0x02187FLG_RELATIVE = 0x04188FLG_DEFAULT = 0x08189FLG_HTML = 0x10190FLG_INDETERMINATE = 0x20191FLG_OPEN = 0x40192FLG_IN_RANGE = 0x80193FLG_OUT_OF_RANGE = 0x100194FLG_PLACEHOLDER_SHOWN = 0x200195196# Maximum cached patterns to store197_MAXCACHE = 500198199200@lru_cache(maxsize=_MAXCACHE)201def _cached_css_compile(pattern, namespaces, custom, flags):202"""Cached CSS compile."""203204custom_selectors = process_custom(custom)205return cm.SoupSieve(206pattern,207CSSParser(pattern, custom=custom_selectors, flags=flags).process_selectors(),208namespaces,209custom,210flags211)212213214def _purge_cache():215"""Purge the cache."""216217_cached_css_compile.cache_clear()218219220def process_custom(custom):221"""Process custom."""222223custom_selectors = {}224if custom is not None:225for key, value in custom.items():226name = util.lower(key)227if RE_CUSTOM.match(name) is None:228raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name))229if name in custom_selectors:230raise KeyError("The custom selector '{}' has already been registered".format(name))231custom_selectors[css_unescape(name)] = value232return custom_selectors233234235def css_unescape(content, string=False):236"""237Unescape CSS value.238239Strings allow for spanning the value on multiple strings by escaping a new line.240"""241242def replace(m):243"""Replace with the appropriate substitute."""244245if m.group(1):246codepoint = int(m.group(1)[1:], 16)247if codepoint == 0:248codepoint = UNICODE_REPLACEMENT_CHAR249value = chr(codepoint)250elif m.group(2):251value = m.group(2)[1:]252elif m.group(3):253value = '\ufffd'254else:255value = ''256257return value258259return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content)260261262def escape(ident):263"""Escape identifier."""264265string = []266length = len(ident)267start_dash = length > 0 and ident[0] == '-'268if length == 1 and start_dash:269# Need to escape identifier that is a single `-` with no other characters270string.append('\\{}'.format(ident))271else:272for index, c in enumerate(ident):273codepoint = ord(c)274if codepoint == 0x00:275string.append('\ufffd')276elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:277string.append('\\{:x} '.format(codepoint))278elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39):279string.append('\\{:x} '.format(codepoint))280elif (281codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or282(0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A)283):284string.append(c)285else:286string.append('\\{}'.format(c))287return ''.join(string)288289290class SelectorPattern(object):291"""Selector pattern."""292293def __init__(self, name, pattern):294"""Initialize."""295296self.name = name297self.re_pattern = re.compile(pattern, re.I | re.X | re.U)298299def get_name(self):300"""Get name."""301302return self.name303304def match(self, selector, index, flags):305"""Match the selector."""306307return self.re_pattern.match(selector, index)308309310class SpecialPseudoPattern(SelectorPattern):311"""Selector pattern."""312313def __init__(self, patterns):314"""Initialize."""315316self.patterns = {}317for p in patterns:318name = p[0]319pattern = p[3](name, p[2])320for pseudo in p[1]:321self.patterns[pseudo] = pattern322323self.matched_name = None324self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)325326def get_name(self):327"""Get name."""328329return self.matched_name.get_name()330331def match(self, selector, index, flags):332"""Match the selector."""333334pseudo = None335m = self.re_pseudo_name.match(selector, index)336if m:337name = util.lower(css_unescape(m.group('name')))338pattern = self.patterns.get(name)339if pattern:340pseudo = pattern.match(selector, index, flags)341if pseudo:342self.matched_name = pattern343344return pseudo345346347class _Selector(object):348"""349Intermediate selector class.350351This stores selector data for a compound selector as we are acquiring them.352Once we are done collecting the data for a compound selector, we freeze353the data in an object that can be pickled and hashed.354"""355356def __init__(self, **kwargs):357"""Initialize."""358359self.tag = kwargs.get('tag', None)360self.ids = kwargs.get('ids', [])361self.classes = kwargs.get('classes', [])362self.attributes = kwargs.get('attributes', [])363self.nth = kwargs.get('nth', [])364self.selectors = kwargs.get('selectors', [])365self.relations = kwargs.get('relations', [])366self.rel_type = kwargs.get('rel_type', None)367self.contains = kwargs.get('contains', [])368self.lang = kwargs.get('lang', [])369self.flags = kwargs.get('flags', 0)370self.no_match = kwargs.get('no_match', False)371372def _freeze_relations(self, relations):373"""Freeze relation."""374375if relations:376sel = relations[0]377sel.relations.extend(relations[1:])378return ct.SelectorList([sel.freeze()])379else:380return ct.SelectorList()381382def freeze(self):383"""Freeze self."""384385if self.no_match:386return ct.SelectorNull()387else:388return ct.Selector(389self.tag,390tuple(self.ids),391tuple(self.classes),392tuple(self.attributes),393tuple(self.nth),394tuple(self.selectors),395self._freeze_relations(self.relations),396self.rel_type,397tuple(self.contains),398tuple(self.lang),399self.flags400)401402def __str__(self): # pragma: no cover403"""String representation."""404405return (406'_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, '407'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})'408).format(409self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors,410self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match411)412413__repr__ = __str__414415416class CSSParser(object):417"""Parse CSS selectors."""418419css_tokens = (420SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),421SpecialPseudoPattern(422(423("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS, SelectorPattern),424("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),425("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),426("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),427("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)428)429),430SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),431SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS),432SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT),433SelectorPattern("at_rule", PAT_AT_RULE),434SelectorPattern("id", PAT_ID),435SelectorPattern("class", PAT_CLASS),436SelectorPattern("tag", PAT_TAG),437SelectorPattern("attribute", PAT_ATTR),438SelectorPattern("combine", PAT_COMBINE)439)440441def __init__(self, selector, custom=None, flags=0):442"""Initialize."""443444self.pattern = selector.replace('\x00', '\ufffd')445self.flags = flags446self.debug = self.flags & util.DEBUG447self.custom = {} if custom is None else custom448449def parse_attribute_selector(self, sel, m, has_selector):450"""Create attribute selector from the returned regex match."""451452inverse = False453op = m.group('cmp')454case = util.lower(m.group('case')) if m.group('case') else None455ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''456attr = css_unescape(m.group('attr_name'))457is_type = False458pattern2 = None459460if case:461flags = re.I if case == 'i' else 0462elif util.lower(attr) == 'type':463flags = re.I464is_type = True465else:466flags = 0467468if op:469if m.group('value').startswith(('"', "'")):470value = css_unescape(m.group('value')[1:-1], True)471else:472value = css_unescape(m.group('value'))473else:474value = None475if not op:476# Attribute name477pattern = None478elif op.startswith('^'):479# Value start with480pattern = re.compile(r'^%s.*' % re.escape(value), flags)481elif op.startswith('$'):482# Value ends with483pattern = re.compile(r'.*?%s$' % re.escape(value), flags)484elif op.startswith('*'):485# Value contains486pattern = re.compile(r'.*?%s.*' % re.escape(value), flags)487elif op.startswith('~'):488# Value contains word within space separated list489# `~=` should match nothing if it is empty or contains whitespace,490# so if either of these cases is present, use `[^\s\S]` which cannot be matched.491value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value)492pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags)493elif op.startswith('|'):494# Value starts with word in dash separated list495pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)496else:497# Value matches498pattern = re.compile(r'^%s$' % re.escape(value), flags)499if op.startswith('!'):500# Equivalent to `:not([attr=value])`501inverse = True502if is_type and pattern:503pattern2 = re.compile(pattern.pattern)504505# Append the attribute selector506sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2)507if inverse:508# If we are using `!=`, we need to nest the pattern under a `:not()`.509sub_sel = _Selector()510sub_sel.attributes.append(sel_attr)511not_list = ct.SelectorList([sub_sel.freeze()], True, False)512sel.selectors.append(not_list)513else:514sel.attributes.append(sel_attr)515516has_selector = True517return has_selector518519def parse_tag_pattern(self, sel, m, has_selector):520"""Parse tag pattern from regex match."""521522prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None523tag = css_unescape(m.group('tag_name'))524sel.tag = ct.SelectorTag(tag, prefix)525has_selector = True526return has_selector527528def parse_pseudo_class_custom(self, sel, m, has_selector):529"""530Parse custom pseudo class alias.531532Compile custom selectors as we need them. When compiling a custom selector,533set it to `None` in the dictionary so we can avoid an infinite loop.534"""535536pseudo = util.lower(css_unescape(m.group('name')))537selector = self.custom.get(pseudo)538if selector is None:539raise SelectorSyntaxError(540"Undefined custom selector '{}' found at postion {}".format(pseudo, m.end(0)),541self.pattern,542m.end(0)543)544545if not isinstance(selector, ct.SelectorList):546self.custom[pseudo] = None547selector = CSSParser(548selector, custom=self.custom, flags=self.flags549).process_selectors(flags=FLG_PSEUDO)550self.custom[pseudo] = selector551552sel.selectors.append(selector)553has_selector = True554return has_selector555556def parse_pseudo_class(self, sel, m, has_selector, iselector, is_html):557"""Parse pseudo class."""558559complex_pseudo = False560pseudo = util.lower(css_unescape(m.group('name')))561if m.group('open'):562complex_pseudo = True563if complex_pseudo and pseudo in PSEUDO_COMPLEX:564has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0))565elif not complex_pseudo and pseudo in PSEUDO_SIMPLE:566if pseudo == ':root':567sel.flags |= ct.SEL_ROOT568elif pseudo == ':defined':569sel.flags |= ct.SEL_DEFINED570is_html = True571elif pseudo == ':scope':572sel.flags |= ct.SEL_SCOPE573elif pseudo == ':empty':574sel.flags |= ct.SEL_EMPTY575elif pseudo in (':link', ':any-link'):576sel.selectors.append(CSS_LINK)577elif pseudo == ':checked':578sel.selectors.append(CSS_CHECKED)579elif pseudo == ':default':580sel.selectors.append(CSS_DEFAULT)581elif pseudo == ':indeterminate':582sel.selectors.append(CSS_INDETERMINATE)583elif pseudo == ":disabled":584sel.selectors.append(CSS_DISABLED)585elif pseudo == ":enabled":586sel.selectors.append(CSS_ENABLED)587elif pseudo == ":required":588sel.selectors.append(CSS_REQUIRED)589elif pseudo == ":optional":590sel.selectors.append(CSS_OPTIONAL)591elif pseudo == ":read-only":592sel.selectors.append(CSS_READ_ONLY)593elif pseudo == ":read-write":594sel.selectors.append(CSS_READ_WRITE)595elif pseudo == ":in-range":596sel.selectors.append(CSS_IN_RANGE)597elif pseudo == ":out-of-range":598sel.selectors.append(CSS_OUT_OF_RANGE)599elif pseudo == ":placeholder-shown":600sel.selectors.append(CSS_PLACEHOLDER_SHOWN)601elif pseudo == ':first-child':602sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()))603elif pseudo == ':last-child':604sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()))605elif pseudo == ':first-of-type':606sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()))607elif pseudo == ':last-of-type':608sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()))609elif pseudo == ':only-child':610sel.nth.extend(611[612ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()),613ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())614]615)616elif pseudo == ':only-of-type':617sel.nth.extend(618[619ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()),620ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())621]622)623has_selector = True624elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH:625self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)626sel.no_match = True627has_selector = True628elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH:629sel.no_match = True630has_selector = True631elif pseudo in PSEUDO_SUPPORTED:632raise SelectorSyntaxError(633"Invalid syntax for pseudo class '{}'".format(pseudo),634self.pattern,635m.start(0)636)637else:638raise NotImplementedError(639"'{}' pseudo-class is not implemented at this time".format(pseudo)640)641642return has_selector, is_html643644def parse_pseudo_nth(self, sel, m, has_selector, iselector):645"""Parse `nth` pseudo."""646647mdict = m.groupdict()648if mdict.get('pseudo_nth_child'):649postfix = '_child'650else:651postfix = '_type'652mdict['name'] = util.lower(css_unescape(mdict['name']))653content = util.lower(mdict.get('nth' + postfix))654if content == 'even':655# 2n656s1 = 2657s2 = 0658var = True659elif content == 'odd':660# 2n+1661s1 = 2662s2 = 1663var = True664else:665nth_parts = RE_NTH.match(content)666s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else ''667a = nth_parts.group('a')668var = a.endswith('n')669if a.startswith('n'):670s1 += '1'671elif var:672s1 += a[:-1]673else:674s1 += a675s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else ''676if nth_parts.group('b'):677s2 += nth_parts.group('b')678else:679s2 = '0'680s1 = int(s1, 10)681s2 = int(s2, 10)682683pseudo_sel = mdict['name']684if postfix == '_child':685if m.group('of'):686# Parse the rest of `of S`.687nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)688else:689# Use default `*|*` for `of S`.690nth_sel = CSS_NTH_OF_S_DEFAULT691if pseudo_sel == ':nth-child':692sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel))693elif pseudo_sel == ':nth-last-child':694sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel))695else:696if pseudo_sel == ':nth-of-type':697sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList()))698elif pseudo_sel == ':nth-last-of-type':699sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList()))700has_selector = True701return has_selector702703def parse_pseudo_open(self, sel, name, has_selector, iselector, index):704"""Parse pseudo with opening bracket."""705706flags = FLG_PSEUDO | FLG_OPEN707if name == ':not':708flags |= FLG_NOT709if name == ':has':710flags |= FLG_RELATIVE711712sel.selectors.append(self.parse_selectors(iselector, index, flags))713has_selector = True714return has_selector715716def parse_has_combinator(self, sel, m, has_selector, selectors, rel_type, index):717"""Parse combinator tokens."""718719combinator = m.group('relation').strip()720if not combinator:721combinator = WS_COMBINATOR722if combinator == COMMA_COMBINATOR:723if not has_selector:724# If we've not captured any selector parts, the comma is either at the beginning of the pattern725# or following another comma, both of which are unexpected. Commas must split selectors.726raise SelectorSyntaxError(727"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),728self.pattern,729index730)731sel.rel_type = rel_type732selectors[-1].relations.append(sel)733rel_type = ":" + WS_COMBINATOR734selectors.append(_Selector())735else:736if has_selector:737# End the current selector and associate the leading combinator with this selector.738sel.rel_type = rel_type739selectors[-1].relations.append(sel)740elif rel_type[1:] != WS_COMBINATOR:741# It's impossible to have two whitespace combinators after each other as the patterns742# will gobble up trailing whitespace. It is also impossible to have a whitespace743# combinator after any other kind for the same reason. But we could have744# multiple non-whitespace combinators. So if the current combinator is not a whitespace,745# then we've hit the multiple combinator case, so we should fail.746raise SelectorSyntaxError(747'The multiple combinators at position {}'.format(index),748self.pattern,749index750)751# Set the leading combinator for the next selector.752rel_type = ':' + combinator753sel = _Selector()754755has_selector = False756return has_selector, sel, rel_type757758def parse_combinator(self, sel, m, has_selector, selectors, relations, is_pseudo, index):759"""Parse combinator tokens."""760761combinator = m.group('relation').strip()762if not combinator:763combinator = WS_COMBINATOR764if not has_selector:765raise SelectorSyntaxError(766"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),767self.pattern,768index769)770771if combinator == COMMA_COMBINATOR:772if not sel.tag and not is_pseudo:773# Implied `*`774sel.tag = ct.SelectorTag('*', None)775sel.relations.extend(relations)776selectors.append(sel)777del relations[:]778else:779sel.relations.extend(relations)780sel.rel_type = combinator781del relations[:]782relations.append(sel)783sel = _Selector()784785has_selector = False786return has_selector, sel787788def parse_class_id(self, sel, m, has_selector):789"""Parse HTML classes and ids."""790791selector = m.group(0)792if selector.startswith('.'):793sel.classes.append(css_unescape(selector[1:]))794else:795sel.ids.append(css_unescape(selector[1:]))796has_selector = True797return has_selector798799def parse_pseudo_contains(self, sel, m, has_selector):800"""Parse contains."""801802values = m.group('values')803patterns = []804for token in RE_VALUES.finditer(values):805if token.group('split'):806continue807value = token.group('value')808if value.startswith(("'", '"')):809value = css_unescape(value[1:-1], True)810else:811value = css_unescape(value)812patterns.append(value)813sel.contains.append(ct.SelectorContains(tuple(patterns)))814has_selector = True815return has_selector816817def parse_pseudo_lang(self, sel, m, has_selector):818"""Parse pseudo language."""819820values = m.group('values')821patterns = []822for token in RE_VALUES.finditer(values):823if token.group('split'):824continue825value = token.group('value')826if value.startswith(('"', "'")):827value = css_unescape(value[1:-1], True)828else:829value = css_unescape(value)830831patterns.append(value)832833sel.lang.append(ct.SelectorLang(patterns))834has_selector = True835836return has_selector837838def parse_pseudo_dir(self, sel, m, has_selector):839"""Parse pseudo direction."""840841value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL842sel.flags |= value843has_selector = True844return has_selector845846def parse_selectors(self, iselector, index=0, flags=0):847"""Parse selectors."""848849sel = _Selector()850selectors = []851has_selector = False852closed = False853relations = []854rel_type = ":" + WS_COMBINATOR855is_open = bool(flags & FLG_OPEN)856is_pseudo = bool(flags & FLG_PSEUDO)857is_relative = bool(flags & FLG_RELATIVE)858is_not = bool(flags & FLG_NOT)859is_html = bool(flags & FLG_HTML)860is_default = bool(flags & FLG_DEFAULT)861is_indeterminate = bool(flags & FLG_INDETERMINATE)862is_in_range = bool(flags & FLG_IN_RANGE)863is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)864is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)865866if self.debug: # pragma: no cover867if is_pseudo:868print(' is_pseudo: True')869if is_open:870print(' is_open: True')871if is_relative:872print(' is_relative: True')873if is_not:874print(' is_not: True')875if is_html:876print(' is_html: True')877if is_default:878print(' is_default: True')879if is_indeterminate:880print(' is_indeterminate: True')881if is_in_range:882print(' is_in_range: True')883if is_out_of_range:884print(' is_out_of_range: True')885if is_placeholder_shown:886print(' is_placeholder_shown: True')887888if is_relative:889selectors.append(_Selector())890891try:892while True:893key, m = next(iselector)894895# Handle parts896if key == "at_rule":897raise NotImplementedError("At-rules found at position {}".format(m.start(0)))898elif key == 'pseudo_class_custom':899has_selector = self.parse_pseudo_class_custom(sel, m, has_selector)900elif key == 'pseudo_class':901has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)902elif key == 'pseudo_element':903raise NotImplementedError("Psuedo-element found at position {}".format(m.start(0)))904elif key == 'pseudo_contains':905has_selector = self.parse_pseudo_contains(sel, m, has_selector)906elif key in ('pseudo_nth_type', 'pseudo_nth_child'):907has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector)908elif key == 'pseudo_lang':909has_selector = self.parse_pseudo_lang(sel, m, has_selector)910elif key == 'pseudo_dir':911has_selector = self.parse_pseudo_dir(sel, m, has_selector)912# Currently only supports HTML913is_html = True914elif key == 'pseudo_close':915if not has_selector:916raise SelectorSyntaxError(917"Expected a selector at postion {}".format(m.start(0)),918self.pattern,919m.start(0)920)921if is_open:922closed = True923break924else:925raise SelectorSyntaxError(926"Unmatched pseudo-class close at postion {}".format(m.start(0)),927self.pattern,928m.start(0)929)930elif key == 'combine':931if is_relative:932has_selector, sel, rel_type = self.parse_has_combinator(933sel, m, has_selector, selectors, rel_type, index934)935else:936has_selector, sel = self.parse_combinator(937sel, m, has_selector, selectors, relations, is_pseudo, index938)939elif key == 'attribute':940has_selector = self.parse_attribute_selector(sel, m, has_selector)941elif key == 'tag':942if has_selector:943raise SelectorSyntaxError(944"Tag name found at position {} instead of at the start".format(m.start(0)),945self.pattern,946m.start(0)947)948has_selector = self.parse_tag_pattern(sel, m, has_selector)949elif key in ('class', 'id'):950has_selector = self.parse_class_id(sel, m, has_selector)951952index = m.end(0)953except StopIteration:954pass955956if is_open and not closed:957raise SelectorSyntaxError(958"Unclosed pseudo-class at position {}".format(index),959self.pattern,960index961)962963if has_selector:964if not sel.tag and not is_pseudo:965# Implied `*`966sel.tag = ct.SelectorTag('*', None)967if is_relative:968sel.rel_type = rel_type969selectors[-1].relations.append(sel)970else:971sel.relations.extend(relations)972del relations[:]973selectors.append(sel)974else:975# We will always need to finish a selector when `:has()` is used as it leads with combining.976raise SelectorSyntaxError(977'Expected a selector at position {}'.format(index),978self.pattern,979index980)981982# Some patterns require additional logic, such as default. We try to make these the983# last pattern, and append the appropriate flag to that selector which communicates984# to the matcher what additional logic is required.985if is_default:986selectors[-1].flags = ct.SEL_DEFAULT987if is_indeterminate:988selectors[-1].flags = ct.SEL_INDETERMINATE989if is_in_range:990selectors[-1].flags = ct.SEL_IN_RANGE991if is_out_of_range:992selectors[-1].flags = ct.SEL_OUT_OF_RANGE993if is_placeholder_shown:994selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN995996return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)997998def selector_iter(self, pattern):999"""Iterate selector tokens."""10001001# Ignore whitespace and comments at start and end of pattern1002m = RE_WS_BEGIN.search(pattern)1003index = m.end(0) if m else 01004m = RE_WS_END.search(pattern)1005end = (m.start(0) - 1) if m else (len(pattern) - 1)10061007if self.debug: # pragma: no cover1008print('## PARSING: {!r}'.format(pattern))1009while index <= end:1010m = None1011for v in self.css_tokens:1012m = v.match(pattern, index, self.flags)1013if m:1014name = v.get_name()1015if self.debug: # pragma: no cover1016print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0)))1017index = m.end(0)1018yield name, m1019break1020if m is None:1021c = pattern[index]1022# If the character represents the start of one of the known selector types,1023# throw an exception mentioning that the known selector type is in error;1024# otherwise, report the invalid character.1025if c == '[':1026msg = "Malformed attribute selector at position {}".format(index)1027elif c == '.':1028msg = "Malformed class selector at position {}".format(index)1029elif c == '#':1030msg = "Malformed id selector at position {}".format(index)1031elif c == ':':1032msg = "Malformed pseudo-class selector at position {}".format(index)1033else:1034msg = "Invalid character {!r} position {}".format(c, index)1035raise SelectorSyntaxError(msg, self.pattern, index)1036if self.debug: # pragma: no cover1037print('## END PARSING')10381039def process_selectors(self, index=0, flags=0):1040"""Process selectors."""10411042return self.parse_selectors(self.selector_iter(self.pattern), index, flags)104310441045# Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern)1046# A few patterns are order dependent as they use patterns previous compiled.10471048# CSS pattern for `:link` and `:any-link`1049CSS_LINK = CSSParser(1050'html|*:is(a, area, link)[href]'1051).process_selectors(flags=FLG_PSEUDO | FLG_HTML)1052# CSS pattern for `:checked`1053CSS_CHECKED = CSSParser(1054'''1055html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected]1056'''1057).process_selectors(flags=FLG_PSEUDO | FLG_HTML)1058# CSS pattern for `:default` (must compile CSS_CHECKED first)1059CSS_DEFAULT = CSSParser(1060'''1061:checked,10621063/*1064This pattern must be at the end.1065Special logic is applied to the last selector.1066*/1067html|form html|*:is(button, input)[type="submit"]1068'''1069).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT)1070# CSS pattern for `:indeterminate`1071CSS_INDETERMINATE = CSSParser(1072'''1073html|input[type="checkbox"][indeterminate],1074html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]),1075html|progress:not([value]),10761077/*1078This pattern must be at the end.1079Special logic is applied to the last selector.1080*/1081html|input[type="radio"][name][name!='']:not([checked])1082'''1083).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)1084# CSS pattern for `:disabled`1085CSS_DISABLED = CSSParser(1086'''1087html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],1088html|optgroup[disabled] > html|option,1089html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset),1090html|fieldset[disabled] >1091html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset)1092'''1093).process_selectors(flags=FLG_PSEUDO | FLG_HTML)1094# CSS pattern for `:enabled`1095CSS_ENABLED = CSSParser(1096'''1097html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)1098'''1099).process_selectors(flags=FLG_PSEUDO | FLG_HTML)1100# CSS pattern for `:required`1101CSS_REQUIRED = CSSParser(1102'html|*:is(input, textarea, select)[required]'1103).process_selectors(flags=FLG_PSEUDO | FLG_HTML)1104# CSS pattern for `:optional`1105CSS_OPTIONAL = CSSParser(1106'html|*:is(input, textarea, select):not([required])'1107).process_selectors(flags=FLG_PSEUDO | FLG_HTML)1108# CSS pattern for `:placeholder-shown`1109CSS_PLACEHOLDER_SHOWN = CSSParser(1110'''1111html|input:is(1112:not([type]),1113[type=""],1114[type=text],1115[type=search],1116[type=url],1117[type=tel],1118[type=email],1119[type=password],1120[type=number]1121)[placeholder][placeholder!='']:is(:not([value]), [value=""]),1122html|textarea[placeholder][placeholder!='']1123'''1124).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)1125# CSS pattern default for `:nth-child` "of S" feature1126CSS_NTH_OF_S_DEFAULT = CSSParser(1127'*|*'1128).process_selectors(flags=FLG_PSEUDO)1129# CSS pattern for `:read-write` (CSS_DISABLED must be compiled first)1130CSS_READ_WRITE = CSSParser(1131'''1132html|*:is(1133textarea,1134input:is(1135:not([type]),1136[type=""],1137[type=text],1138[type=search],1139[type=url],1140[type=tel],1141[type=email],1142[type=number],1143[type=password],1144[type=date],1145[type=datetime-local],1146[type=month],1147[type=time],1148[type=week]1149)1150):not([readonly], :disabled),1151html|*:is([contenteditable=""], [contenteditable="true" i])1152'''1153).process_selectors(flags=FLG_PSEUDO | FLG_HTML)1154# CSS pattern for `:read-only`1155CSS_READ_ONLY = CSSParser(1156'''1157html|*:not(:read-write)1158'''1159).process_selectors(flags=FLG_PSEUDO | FLG_HTML)1160# CSS pattern for `:in-range`1161CSS_IN_RANGE = CSSParser(1162'''1163html|input:is(1164[type="date"],1165[type="month"],1166[type="week"],1167[type="time"],1168[type="datetime-local"],1169[type="number"],1170[type="range"]1171):is(1172[min],1173[max]1174)1175'''1176).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML)1177# CSS pattern for `:out-of-range`1178CSS_OUT_OF_RANGE = CSSParser(1179'''1180html|input:is(1181[type="date"],1182[type="month"],1183[type="week"],1184[type="time"],1185[type="datetime-local"],1186[type="number"],1187[type="range"]1188):is(1189[min],1190[max]1191)1192'''1193).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML)119411951196