Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hhhrrrttt222111
GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/soupsieve/css_parser.py
811 views
1
"""CSS selector parser."""
2
import re
3
from functools import lru_cache
4
from . import util
5
from . import css_match as cm
6
from . import css_types as ct
7
from .util import SelectorSyntaxError
8
9
UNICODE_REPLACEMENT_CHAR = 0xFFFD
10
11
# Simple pseudo classes that take no parameters
12
PSEUDO_SIMPLE = {
13
":any-link",
14
":empty",
15
":first-child",
16
":first-of-type",
17
":in-range",
18
":out-of-range",
19
":last-child",
20
":last-of-type",
21
":link",
22
":only-child",
23
":only-of-type",
24
":root",
25
':checked',
26
':default',
27
':disabled',
28
':enabled',
29
':indeterminate',
30
':optional',
31
':placeholder-shown',
32
':read-only',
33
':read-write',
34
':required',
35
':scope',
36
':defined'
37
}
38
39
# Supported, simple pseudo classes that match nothing in the Soup Sieve environment
40
PSEUDO_SIMPLE_NO_MATCH = {
41
':active',
42
':current',
43
':focus',
44
':focus-visible',
45
':focus-within',
46
':future',
47
':host',
48
':hover',
49
':local-link',
50
':past',
51
':paused',
52
':playing',
53
':target',
54
':target-within',
55
':user-invalid',
56
':visited'
57
}
58
59
# Complex pseudo classes that take selector lists
60
PSEUDO_COMPLEX = {
61
':contains',
62
':has',
63
':is',
64
':matches',
65
':not',
66
':where'
67
}
68
69
PSEUDO_COMPLEX_NO_MATCH = {
70
':current',
71
':host',
72
':host-context'
73
}
74
75
# Complex pseudo classes that take very specific parameters and are handled special
76
PSEUDO_SPECIAL = {
77
':dir',
78
':lang',
79
':nth-child',
80
':nth-last-child',
81
':nth-last-of-type',
82
':nth-of-type'
83
}
84
85
PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL
86
87
# Sub-patterns parts
88
# Whitespace
89
NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])'
90
WS = r'(?:[ \t]|{})'.format(NEWLINE)
91
# Comments
92
COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
93
# Whitespace with comments included
94
WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS)
95
# CSS escapes
96
CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS)
97
CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE)
98
# CSS Identifier
99
IDENTIFIER = r'''
100
(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--)
101
(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*)
102
'''.format(esc=CSS_ESCAPES)
103
# `nth` content
104
NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC)
105
# Value: quoted string or identifier
106
VALUE = r'''
107
(?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+)
108
'''.format(nl=NEWLINE, ident=IDENTIFIER)
109
# Attribute value comparison. `!=` is handled special as it is non-standard.
110
ATTR = r'''
111
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
112
'''.format(ws=WSC, value=VALUE)
113
114
# Selector patterns
115
# IDs (`#id`)
116
PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)
117
# Classes (`.class`)
118
PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
119
# Prefix:Tag (`prefix|tag`)
120
PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER)
121
# Attributes (`[attr]`, `[attr=value]`, etc.)
122
PAT_ATTR = r'''
123
\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
124
'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
125
# Pseudo class (`:pseudo-class`, `:pseudo-class(`)
126
PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
127
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
128
PAT_PSEUDO_CLASS_SPECIAL = r'(?P<name>:{ident})(?P<open>\({ws}*)'.format(ws=WSC, ident=IDENTIFIER)
129
# Custom pseudo class (`:--custom-pseudo`)
130
PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER)
131
# Closing pseudo group (`)`)
132
PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC)
133
# Pseudo element (`::pseudo-element`)
134
PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS)
135
# At rule (`@page`, etc.) (not supported)
136
PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER)
137
# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.)
138
PAT_PSEUDO_NTH_CHILD = r'''
139
(?P<pseudo_nth_child>{name}
140
(?P<nth_child>{nth}|even|odd))(?:{wsc}*\)|(?P<of>{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*))
141
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH)
142
# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.)
143
PAT_PSEUDO_NTH_TYPE = r'''
144
(?P<pseudo_nth_type>{name}
145
(?P<nth_type>{nth}|even|odd)){ws}*\)
146
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH)
147
# Pseudo class language (`:lang("*-de", en)`)
148
PAT_PSEUDO_LANG = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format(
149
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
150
)
151
# Pseudo class direction (`:dir(ltr)`)
152
PAT_PSEUDO_DIR = r'{name}(?P<dir>ltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC)
153
# Combining characters (`>`, `~`, ` `, `+`, `,`)
154
PAT_COMBINE = r'{wsc}*?(?P<relation>[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC)
155
# Extra: Contains (`:contains(text)`)
156
PAT_PSEUDO_CONTAINS = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format(
157
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
158
)
159
160
# Regular expressions
161
# CSS escape pattern
162
RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I)
163
RE_CSS_STR_ESC = re.compile(
164
r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I
165
)
166
# Pattern to break up `nth` specifiers
167
RE_NTH = re.compile(
168
r'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){ws}*(?P<s2>[-+]){ws}*(?P<b>[0-9]+))?'.format(ws=WSC),
169
re.I
170
)
171
# Pattern to iterate multiple values.
172
RE_VALUES = re.compile(r'(?:(?P<value>{value})|(?P<split>{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X)
173
# Whitespace checks
174
RE_WS = re.compile(WS)
175
RE_WS_BEGIN = re.compile('^{}*'.format(WSC))
176
RE_WS_END = re.compile('{}*$'.format(WSC))
177
RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X)
178
179
# Constants
180
# List split token
181
COMMA_COMBINATOR = ','
182
# Relation token for descendant
183
WS_COMBINATOR = " "
184
185
# Parse flags
186
FLG_PSEUDO = 0x01
187
FLG_NOT = 0x02
188
FLG_RELATIVE = 0x04
189
FLG_DEFAULT = 0x08
190
FLG_HTML = 0x10
191
FLG_INDETERMINATE = 0x20
192
FLG_OPEN = 0x40
193
FLG_IN_RANGE = 0x80
194
FLG_OUT_OF_RANGE = 0x100
195
FLG_PLACEHOLDER_SHOWN = 0x200
196
197
# Maximum cached patterns to store
198
_MAXCACHE = 500
199
200
201
@lru_cache(maxsize=_MAXCACHE)
202
def _cached_css_compile(pattern, namespaces, custom, flags):
203
"""Cached CSS compile."""
204
205
custom_selectors = process_custom(custom)
206
return cm.SoupSieve(
207
pattern,
208
CSSParser(pattern, custom=custom_selectors, flags=flags).process_selectors(),
209
namespaces,
210
custom,
211
flags
212
)
213
214
215
def _purge_cache():
216
"""Purge the cache."""
217
218
_cached_css_compile.cache_clear()
219
220
221
def process_custom(custom):
222
"""Process custom."""
223
224
custom_selectors = {}
225
if custom is not None:
226
for key, value in custom.items():
227
name = util.lower(key)
228
if RE_CUSTOM.match(name) is None:
229
raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name))
230
if name in custom_selectors:
231
raise KeyError("The custom selector '{}' has already been registered".format(name))
232
custom_selectors[css_unescape(name)] = value
233
return custom_selectors
234
235
236
def css_unescape(content, string=False):
237
"""
238
Unescape CSS value.
239
240
Strings allow for spanning the value on multiple strings by escaping a new line.
241
"""
242
243
def replace(m):
244
"""Replace with the appropriate substitute."""
245
246
if m.group(1):
247
codepoint = int(m.group(1)[1:], 16)
248
if codepoint == 0:
249
codepoint = UNICODE_REPLACEMENT_CHAR
250
value = chr(codepoint)
251
elif m.group(2):
252
value = m.group(2)[1:]
253
elif m.group(3):
254
value = '\ufffd'
255
else:
256
value = ''
257
258
return value
259
260
return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content)
261
262
263
def escape(ident):
264
"""Escape identifier."""
265
266
string = []
267
length = len(ident)
268
start_dash = length > 0 and ident[0] == '-'
269
if length == 1 and start_dash:
270
# Need to escape identifier that is a single `-` with no other characters
271
string.append('\\{}'.format(ident))
272
else:
273
for index, c in enumerate(ident):
274
codepoint = ord(c)
275
if codepoint == 0x00:
276
string.append('\ufffd')
277
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
278
string.append('\\{:x} '.format(codepoint))
279
elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39):
280
string.append('\\{:x} '.format(codepoint))
281
elif (
282
codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or
283
(0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A)
284
):
285
string.append(c)
286
else:
287
string.append('\\{}'.format(c))
288
return ''.join(string)
289
290
291
class SelectorPattern(object):
292
"""Selector pattern."""
293
294
def __init__(self, name, pattern):
295
"""Initialize."""
296
297
self.name = name
298
self.re_pattern = re.compile(pattern, re.I | re.X | re.U)
299
300
def get_name(self):
301
"""Get name."""
302
303
return self.name
304
305
def match(self, selector, index, flags):
306
"""Match the selector."""
307
308
return self.re_pattern.match(selector, index)
309
310
311
class SpecialPseudoPattern(SelectorPattern):
312
"""Selector pattern."""
313
314
def __init__(self, patterns):
315
"""Initialize."""
316
317
self.patterns = {}
318
for p in patterns:
319
name = p[0]
320
pattern = p[3](name, p[2])
321
for pseudo in p[1]:
322
self.patterns[pseudo] = pattern
323
324
self.matched_name = None
325
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
326
327
def get_name(self):
328
"""Get name."""
329
330
return self.matched_name.get_name()
331
332
def match(self, selector, index, flags):
333
"""Match the selector."""
334
335
pseudo = None
336
m = self.re_pseudo_name.match(selector, index)
337
if m:
338
name = util.lower(css_unescape(m.group('name')))
339
pattern = self.patterns.get(name)
340
if pattern:
341
pseudo = pattern.match(selector, index, flags)
342
if pseudo:
343
self.matched_name = pattern
344
345
return pseudo
346
347
348
class _Selector(object):
349
"""
350
Intermediate selector class.
351
352
This stores selector data for a compound selector as we are acquiring them.
353
Once we are done collecting the data for a compound selector, we freeze
354
the data in an object that can be pickled and hashed.
355
"""
356
357
def __init__(self, **kwargs):
358
"""Initialize."""
359
360
self.tag = kwargs.get('tag', None)
361
self.ids = kwargs.get('ids', [])
362
self.classes = kwargs.get('classes', [])
363
self.attributes = kwargs.get('attributes', [])
364
self.nth = kwargs.get('nth', [])
365
self.selectors = kwargs.get('selectors', [])
366
self.relations = kwargs.get('relations', [])
367
self.rel_type = kwargs.get('rel_type', None)
368
self.contains = kwargs.get('contains', [])
369
self.lang = kwargs.get('lang', [])
370
self.flags = kwargs.get('flags', 0)
371
self.no_match = kwargs.get('no_match', False)
372
373
def _freeze_relations(self, relations):
374
"""Freeze relation."""
375
376
if relations:
377
sel = relations[0]
378
sel.relations.extend(relations[1:])
379
return ct.SelectorList([sel.freeze()])
380
else:
381
return ct.SelectorList()
382
383
def freeze(self):
384
"""Freeze self."""
385
386
if self.no_match:
387
return ct.SelectorNull()
388
else:
389
return ct.Selector(
390
self.tag,
391
tuple(self.ids),
392
tuple(self.classes),
393
tuple(self.attributes),
394
tuple(self.nth),
395
tuple(self.selectors),
396
self._freeze_relations(self.relations),
397
self.rel_type,
398
tuple(self.contains),
399
tuple(self.lang),
400
self.flags
401
)
402
403
def __str__(self): # pragma: no cover
404
"""String representation."""
405
406
return (
407
'_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, '
408
'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})'
409
).format(
410
self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors,
411
self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match
412
)
413
414
__repr__ = __str__
415
416
417
class CSSParser(object):
418
"""Parse CSS selectors."""
419
420
css_tokens = (
421
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
422
SpecialPseudoPattern(
423
(
424
("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS, SelectorPattern),
425
("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
426
("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
427
("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
428
("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)
429
)
430
),
431
SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
432
SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS),
433
SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT),
434
SelectorPattern("at_rule", PAT_AT_RULE),
435
SelectorPattern("id", PAT_ID),
436
SelectorPattern("class", PAT_CLASS),
437
SelectorPattern("tag", PAT_TAG),
438
SelectorPattern("attribute", PAT_ATTR),
439
SelectorPattern("combine", PAT_COMBINE)
440
)
441
442
def __init__(self, selector, custom=None, flags=0):
443
"""Initialize."""
444
445
self.pattern = selector.replace('\x00', '\ufffd')
446
self.flags = flags
447
self.debug = self.flags & util.DEBUG
448
self.custom = {} if custom is None else custom
449
450
def parse_attribute_selector(self, sel, m, has_selector):
451
"""Create attribute selector from the returned regex match."""
452
453
inverse = False
454
op = m.group('cmp')
455
case = util.lower(m.group('case')) if m.group('case') else None
456
ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''
457
attr = css_unescape(m.group('attr_name'))
458
is_type = False
459
pattern2 = None
460
461
if case:
462
flags = re.I if case == 'i' else 0
463
elif util.lower(attr) == 'type':
464
flags = re.I
465
is_type = True
466
else:
467
flags = 0
468
469
if op:
470
if m.group('value').startswith(('"', "'")):
471
value = css_unescape(m.group('value')[1:-1], True)
472
else:
473
value = css_unescape(m.group('value'))
474
else:
475
value = None
476
if not op:
477
# Attribute name
478
pattern = None
479
elif op.startswith('^'):
480
# Value start with
481
pattern = re.compile(r'^%s.*' % re.escape(value), flags)
482
elif op.startswith('$'):
483
# Value ends with
484
pattern = re.compile(r'.*?%s$' % re.escape(value), flags)
485
elif op.startswith('*'):
486
# Value contains
487
pattern = re.compile(r'.*?%s.*' % re.escape(value), flags)
488
elif op.startswith('~'):
489
# Value contains word within space separated list
490
# `~=` should match nothing if it is empty or contains whitespace,
491
# so if either of these cases is present, use `[^\s\S]` which cannot be matched.
492
value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value)
493
pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags)
494
elif op.startswith('|'):
495
# Value starts with word in dash separated list
496
pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
497
else:
498
# Value matches
499
pattern = re.compile(r'^%s$' % re.escape(value), flags)
500
if op.startswith('!'):
501
# Equivalent to `:not([attr=value])`
502
inverse = True
503
if is_type and pattern:
504
pattern2 = re.compile(pattern.pattern)
505
506
# Append the attribute selector
507
sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2)
508
if inverse:
509
# If we are using `!=`, we need to nest the pattern under a `:not()`.
510
sub_sel = _Selector()
511
sub_sel.attributes.append(sel_attr)
512
not_list = ct.SelectorList([sub_sel.freeze()], True, False)
513
sel.selectors.append(not_list)
514
else:
515
sel.attributes.append(sel_attr)
516
517
has_selector = True
518
return has_selector
519
520
def parse_tag_pattern(self, sel, m, has_selector):
521
"""Parse tag pattern from regex match."""
522
523
prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
524
tag = css_unescape(m.group('tag_name'))
525
sel.tag = ct.SelectorTag(tag, prefix)
526
has_selector = True
527
return has_selector
528
529
def parse_pseudo_class_custom(self, sel, m, has_selector):
530
"""
531
Parse custom pseudo class alias.
532
533
Compile custom selectors as we need them. When compiling a custom selector,
534
set it to `None` in the dictionary so we can avoid an infinite loop.
535
"""
536
537
pseudo = util.lower(css_unescape(m.group('name')))
538
selector = self.custom.get(pseudo)
539
if selector is None:
540
raise SelectorSyntaxError(
541
"Undefined custom selector '{}' found at postion {}".format(pseudo, m.end(0)),
542
self.pattern,
543
m.end(0)
544
)
545
546
if not isinstance(selector, ct.SelectorList):
547
self.custom[pseudo] = None
548
selector = CSSParser(
549
selector, custom=self.custom, flags=self.flags
550
).process_selectors(flags=FLG_PSEUDO)
551
self.custom[pseudo] = selector
552
553
sel.selectors.append(selector)
554
has_selector = True
555
return has_selector
556
557
def parse_pseudo_class(self, sel, m, has_selector, iselector, is_html):
558
"""Parse pseudo class."""
559
560
complex_pseudo = False
561
pseudo = util.lower(css_unescape(m.group('name')))
562
if m.group('open'):
563
complex_pseudo = True
564
if complex_pseudo and pseudo in PSEUDO_COMPLEX:
565
has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0))
566
elif not complex_pseudo and pseudo in PSEUDO_SIMPLE:
567
if pseudo == ':root':
568
sel.flags |= ct.SEL_ROOT
569
elif pseudo == ':defined':
570
sel.flags |= ct.SEL_DEFINED
571
is_html = True
572
elif pseudo == ':scope':
573
sel.flags |= ct.SEL_SCOPE
574
elif pseudo == ':empty':
575
sel.flags |= ct.SEL_EMPTY
576
elif pseudo in (':link', ':any-link'):
577
sel.selectors.append(CSS_LINK)
578
elif pseudo == ':checked':
579
sel.selectors.append(CSS_CHECKED)
580
elif pseudo == ':default':
581
sel.selectors.append(CSS_DEFAULT)
582
elif pseudo == ':indeterminate':
583
sel.selectors.append(CSS_INDETERMINATE)
584
elif pseudo == ":disabled":
585
sel.selectors.append(CSS_DISABLED)
586
elif pseudo == ":enabled":
587
sel.selectors.append(CSS_ENABLED)
588
elif pseudo == ":required":
589
sel.selectors.append(CSS_REQUIRED)
590
elif pseudo == ":optional":
591
sel.selectors.append(CSS_OPTIONAL)
592
elif pseudo == ":read-only":
593
sel.selectors.append(CSS_READ_ONLY)
594
elif pseudo == ":read-write":
595
sel.selectors.append(CSS_READ_WRITE)
596
elif pseudo == ":in-range":
597
sel.selectors.append(CSS_IN_RANGE)
598
elif pseudo == ":out-of-range":
599
sel.selectors.append(CSS_OUT_OF_RANGE)
600
elif pseudo == ":placeholder-shown":
601
sel.selectors.append(CSS_PLACEHOLDER_SHOWN)
602
elif pseudo == ':first-child':
603
sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()))
604
elif pseudo == ':last-child':
605
sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()))
606
elif pseudo == ':first-of-type':
607
sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()))
608
elif pseudo == ':last-of-type':
609
sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()))
610
elif pseudo == ':only-child':
611
sel.nth.extend(
612
[
613
ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()),
614
ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())
615
]
616
)
617
elif pseudo == ':only-of-type':
618
sel.nth.extend(
619
[
620
ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()),
621
ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())
622
]
623
)
624
has_selector = True
625
elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH:
626
self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
627
sel.no_match = True
628
has_selector = True
629
elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH:
630
sel.no_match = True
631
has_selector = True
632
elif pseudo in PSEUDO_SUPPORTED:
633
raise SelectorSyntaxError(
634
"Invalid syntax for pseudo class '{}'".format(pseudo),
635
self.pattern,
636
m.start(0)
637
)
638
else:
639
raise NotImplementedError(
640
"'{}' pseudo-class is not implemented at this time".format(pseudo)
641
)
642
643
return has_selector, is_html
644
645
def parse_pseudo_nth(self, sel, m, has_selector, iselector):
646
"""Parse `nth` pseudo."""
647
648
mdict = m.groupdict()
649
if mdict.get('pseudo_nth_child'):
650
postfix = '_child'
651
else:
652
postfix = '_type'
653
mdict['name'] = util.lower(css_unescape(mdict['name']))
654
content = util.lower(mdict.get('nth' + postfix))
655
if content == 'even':
656
# 2n
657
s1 = 2
658
s2 = 0
659
var = True
660
elif content == 'odd':
661
# 2n+1
662
s1 = 2
663
s2 = 1
664
var = True
665
else:
666
nth_parts = RE_NTH.match(content)
667
s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else ''
668
a = nth_parts.group('a')
669
var = a.endswith('n')
670
if a.startswith('n'):
671
s1 += '1'
672
elif var:
673
s1 += a[:-1]
674
else:
675
s1 += a
676
s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else ''
677
if nth_parts.group('b'):
678
s2 += nth_parts.group('b')
679
else:
680
s2 = '0'
681
s1 = int(s1, 10)
682
s2 = int(s2, 10)
683
684
pseudo_sel = mdict['name']
685
if postfix == '_child':
686
if m.group('of'):
687
# Parse the rest of `of S`.
688
nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
689
else:
690
# Use default `*|*` for `of S`.
691
nth_sel = CSS_NTH_OF_S_DEFAULT
692
if pseudo_sel == ':nth-child':
693
sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel))
694
elif pseudo_sel == ':nth-last-child':
695
sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel))
696
else:
697
if pseudo_sel == ':nth-of-type':
698
sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList()))
699
elif pseudo_sel == ':nth-last-of-type':
700
sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList()))
701
has_selector = True
702
return has_selector
703
704
def parse_pseudo_open(self, sel, name, has_selector, iselector, index):
705
"""Parse pseudo with opening bracket."""
706
707
flags = FLG_PSEUDO | FLG_OPEN
708
if name == ':not':
709
flags |= FLG_NOT
710
if name == ':has':
711
flags |= FLG_RELATIVE
712
713
sel.selectors.append(self.parse_selectors(iselector, index, flags))
714
has_selector = True
715
return has_selector
716
717
def parse_has_combinator(self, sel, m, has_selector, selectors, rel_type, index):
718
"""Parse combinator tokens."""
719
720
combinator = m.group('relation').strip()
721
if not combinator:
722
combinator = WS_COMBINATOR
723
if combinator == COMMA_COMBINATOR:
724
if not has_selector:
725
# If we've not captured any selector parts, the comma is either at the beginning of the pattern
726
# or following another comma, both of which are unexpected. Commas must split selectors.
727
raise SelectorSyntaxError(
728
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
729
self.pattern,
730
index
731
)
732
sel.rel_type = rel_type
733
selectors[-1].relations.append(sel)
734
rel_type = ":" + WS_COMBINATOR
735
selectors.append(_Selector())
736
else:
737
if has_selector:
738
# End the current selector and associate the leading combinator with this selector.
739
sel.rel_type = rel_type
740
selectors[-1].relations.append(sel)
741
elif rel_type[1:] != WS_COMBINATOR:
742
# It's impossible to have two whitespace combinators after each other as the patterns
743
# will gobble up trailing whitespace. It is also impossible to have a whitespace
744
# combinator after any other kind for the same reason. But we could have
745
# multiple non-whitespace combinators. So if the current combinator is not a whitespace,
746
# then we've hit the multiple combinator case, so we should fail.
747
raise SelectorSyntaxError(
748
'The multiple combinators at position {}'.format(index),
749
self.pattern,
750
index
751
)
752
# Set the leading combinator for the next selector.
753
rel_type = ':' + combinator
754
sel = _Selector()
755
756
has_selector = False
757
return has_selector, sel, rel_type
758
759
def parse_combinator(self, sel, m, has_selector, selectors, relations, is_pseudo, index):
760
"""Parse combinator tokens."""
761
762
combinator = m.group('relation').strip()
763
if not combinator:
764
combinator = WS_COMBINATOR
765
if not has_selector:
766
raise SelectorSyntaxError(
767
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
768
self.pattern,
769
index
770
)
771
772
if combinator == COMMA_COMBINATOR:
773
if not sel.tag and not is_pseudo:
774
# Implied `*`
775
sel.tag = ct.SelectorTag('*', None)
776
sel.relations.extend(relations)
777
selectors.append(sel)
778
del relations[:]
779
else:
780
sel.relations.extend(relations)
781
sel.rel_type = combinator
782
del relations[:]
783
relations.append(sel)
784
sel = _Selector()
785
786
has_selector = False
787
return has_selector, sel
788
789
def parse_class_id(self, sel, m, has_selector):
790
"""Parse HTML classes and ids."""
791
792
selector = m.group(0)
793
if selector.startswith('.'):
794
sel.classes.append(css_unescape(selector[1:]))
795
else:
796
sel.ids.append(css_unescape(selector[1:]))
797
has_selector = True
798
return has_selector
799
800
def parse_pseudo_contains(self, sel, m, has_selector):
801
"""Parse contains."""
802
803
values = m.group('values')
804
patterns = []
805
for token in RE_VALUES.finditer(values):
806
if token.group('split'):
807
continue
808
value = token.group('value')
809
if value.startswith(("'", '"')):
810
value = css_unescape(value[1:-1], True)
811
else:
812
value = css_unescape(value)
813
patterns.append(value)
814
sel.contains.append(ct.SelectorContains(tuple(patterns)))
815
has_selector = True
816
return has_selector
817
818
def parse_pseudo_lang(self, sel, m, has_selector):
819
"""Parse pseudo language."""
820
821
values = m.group('values')
822
patterns = []
823
for token in RE_VALUES.finditer(values):
824
if token.group('split'):
825
continue
826
value = token.group('value')
827
if value.startswith(('"', "'")):
828
value = css_unescape(value[1:-1], True)
829
else:
830
value = css_unescape(value)
831
832
patterns.append(value)
833
834
sel.lang.append(ct.SelectorLang(patterns))
835
has_selector = True
836
837
return has_selector
838
839
def parse_pseudo_dir(self, sel, m, has_selector):
840
"""Parse pseudo direction."""
841
842
value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL
843
sel.flags |= value
844
has_selector = True
845
return has_selector
846
847
def parse_selectors(self, iselector, index=0, flags=0):
848
"""Parse selectors."""
849
850
sel = _Selector()
851
selectors = []
852
has_selector = False
853
closed = False
854
relations = []
855
rel_type = ":" + WS_COMBINATOR
856
is_open = bool(flags & FLG_OPEN)
857
is_pseudo = bool(flags & FLG_PSEUDO)
858
is_relative = bool(flags & FLG_RELATIVE)
859
is_not = bool(flags & FLG_NOT)
860
is_html = bool(flags & FLG_HTML)
861
is_default = bool(flags & FLG_DEFAULT)
862
is_indeterminate = bool(flags & FLG_INDETERMINATE)
863
is_in_range = bool(flags & FLG_IN_RANGE)
864
is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
865
is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
866
867
if self.debug: # pragma: no cover
868
if is_pseudo:
869
print(' is_pseudo: True')
870
if is_open:
871
print(' is_open: True')
872
if is_relative:
873
print(' is_relative: True')
874
if is_not:
875
print(' is_not: True')
876
if is_html:
877
print(' is_html: True')
878
if is_default:
879
print(' is_default: True')
880
if is_indeterminate:
881
print(' is_indeterminate: True')
882
if is_in_range:
883
print(' is_in_range: True')
884
if is_out_of_range:
885
print(' is_out_of_range: True')
886
if is_placeholder_shown:
887
print(' is_placeholder_shown: True')
888
889
if is_relative:
890
selectors.append(_Selector())
891
892
try:
893
while True:
894
key, m = next(iselector)
895
896
# Handle parts
897
if key == "at_rule":
898
raise NotImplementedError("At-rules found at position {}".format(m.start(0)))
899
elif key == 'pseudo_class_custom':
900
has_selector = self.parse_pseudo_class_custom(sel, m, has_selector)
901
elif key == 'pseudo_class':
902
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
903
elif key == 'pseudo_element':
904
raise NotImplementedError("Psuedo-element found at position {}".format(m.start(0)))
905
elif key == 'pseudo_contains':
906
has_selector = self.parse_pseudo_contains(sel, m, has_selector)
907
elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
908
has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector)
909
elif key == 'pseudo_lang':
910
has_selector = self.parse_pseudo_lang(sel, m, has_selector)
911
elif key == 'pseudo_dir':
912
has_selector = self.parse_pseudo_dir(sel, m, has_selector)
913
# Currently only supports HTML
914
is_html = True
915
elif key == 'pseudo_close':
916
if not has_selector:
917
raise SelectorSyntaxError(
918
"Expected a selector at postion {}".format(m.start(0)),
919
self.pattern,
920
m.start(0)
921
)
922
if is_open:
923
closed = True
924
break
925
else:
926
raise SelectorSyntaxError(
927
"Unmatched pseudo-class close at postion {}".format(m.start(0)),
928
self.pattern,
929
m.start(0)
930
)
931
elif key == 'combine':
932
if is_relative:
933
has_selector, sel, rel_type = self.parse_has_combinator(
934
sel, m, has_selector, selectors, rel_type, index
935
)
936
else:
937
has_selector, sel = self.parse_combinator(
938
sel, m, has_selector, selectors, relations, is_pseudo, index
939
)
940
elif key == 'attribute':
941
has_selector = self.parse_attribute_selector(sel, m, has_selector)
942
elif key == 'tag':
943
if has_selector:
944
raise SelectorSyntaxError(
945
"Tag name found at position {} instead of at the start".format(m.start(0)),
946
self.pattern,
947
m.start(0)
948
)
949
has_selector = self.parse_tag_pattern(sel, m, has_selector)
950
elif key in ('class', 'id'):
951
has_selector = self.parse_class_id(sel, m, has_selector)
952
953
index = m.end(0)
954
except StopIteration:
955
pass
956
957
if is_open and not closed:
958
raise SelectorSyntaxError(
959
"Unclosed pseudo-class at position {}".format(index),
960
self.pattern,
961
index
962
)
963
964
if has_selector:
965
if not sel.tag and not is_pseudo:
966
# Implied `*`
967
sel.tag = ct.SelectorTag('*', None)
968
if is_relative:
969
sel.rel_type = rel_type
970
selectors[-1].relations.append(sel)
971
else:
972
sel.relations.extend(relations)
973
del relations[:]
974
selectors.append(sel)
975
else:
976
# We will always need to finish a selector when `:has()` is used as it leads with combining.
977
raise SelectorSyntaxError(
978
'Expected a selector at position {}'.format(index),
979
self.pattern,
980
index
981
)
982
983
# Some patterns require additional logic, such as default. We try to make these the
984
# last pattern, and append the appropriate flag to that selector which communicates
985
# to the matcher what additional logic is required.
986
if is_default:
987
selectors[-1].flags = ct.SEL_DEFAULT
988
if is_indeterminate:
989
selectors[-1].flags = ct.SEL_INDETERMINATE
990
if is_in_range:
991
selectors[-1].flags = ct.SEL_IN_RANGE
992
if is_out_of_range:
993
selectors[-1].flags = ct.SEL_OUT_OF_RANGE
994
if is_placeholder_shown:
995
selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN
996
997
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
998
999
def selector_iter(self, pattern):
1000
"""Iterate selector tokens."""
1001
1002
# Ignore whitespace and comments at start and end of pattern
1003
m = RE_WS_BEGIN.search(pattern)
1004
index = m.end(0) if m else 0
1005
m = RE_WS_END.search(pattern)
1006
end = (m.start(0) - 1) if m else (len(pattern) - 1)
1007
1008
if self.debug: # pragma: no cover
1009
print('## PARSING: {!r}'.format(pattern))
1010
while index <= end:
1011
m = None
1012
for v in self.css_tokens:
1013
m = v.match(pattern, index, self.flags)
1014
if m:
1015
name = v.get_name()
1016
if self.debug: # pragma: no cover
1017
print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0)))
1018
index = m.end(0)
1019
yield name, m
1020
break
1021
if m is None:
1022
c = pattern[index]
1023
# If the character represents the start of one of the known selector types,
1024
# throw an exception mentioning that the known selector type is in error;
1025
# otherwise, report the invalid character.
1026
if c == '[':
1027
msg = "Malformed attribute selector at position {}".format(index)
1028
elif c == '.':
1029
msg = "Malformed class selector at position {}".format(index)
1030
elif c == '#':
1031
msg = "Malformed id selector at position {}".format(index)
1032
elif c == ':':
1033
msg = "Malformed pseudo-class selector at position {}".format(index)
1034
else:
1035
msg = "Invalid character {!r} position {}".format(c, index)
1036
raise SelectorSyntaxError(msg, self.pattern, index)
1037
if self.debug: # pragma: no cover
1038
print('## END PARSING')
1039
1040
def process_selectors(self, index=0, flags=0):
1041
"""Process selectors."""
1042
1043
return self.parse_selectors(self.selector_iter(self.pattern), index, flags)
1044
1045
1046
# Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern)
1047
# A few patterns are order dependent as they use patterns previous compiled.
1048
1049
# CSS pattern for `:link` and `:any-link`
1050
CSS_LINK = CSSParser(
1051
'html|*:is(a, area, link)[href]'
1052
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1053
# CSS pattern for `:checked`
1054
CSS_CHECKED = CSSParser(
1055
'''
1056
html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected]
1057
'''
1058
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1059
# CSS pattern for `:default` (must compile CSS_CHECKED first)
1060
CSS_DEFAULT = CSSParser(
1061
'''
1062
:checked,
1063
1064
/*
1065
This pattern must be at the end.
1066
Special logic is applied to the last selector.
1067
*/
1068
html|form html|*:is(button, input)[type="submit"]
1069
'''
1070
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT)
1071
# CSS pattern for `:indeterminate`
1072
CSS_INDETERMINATE = CSSParser(
1073
'''
1074
html|input[type="checkbox"][indeterminate],
1075
html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]),
1076
html|progress:not([value]),
1077
1078
/*
1079
This pattern must be at the end.
1080
Special logic is applied to the last selector.
1081
*/
1082
html|input[type="radio"][name][name!='']:not([checked])
1083
'''
1084
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
1085
# CSS pattern for `:disabled`
1086
CSS_DISABLED = CSSParser(
1087
'''
1088
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
1089
html|optgroup[disabled] > html|option,
1090
html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset),
1091
html|fieldset[disabled] >
1092
html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset)
1093
'''
1094
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1095
# CSS pattern for `:enabled`
1096
CSS_ENABLED = CSSParser(
1097
'''
1098
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
1099
'''
1100
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1101
# CSS pattern for `:required`
1102
CSS_REQUIRED = CSSParser(
1103
'html|*:is(input, textarea, select)[required]'
1104
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1105
# CSS pattern for `:optional`
1106
CSS_OPTIONAL = CSSParser(
1107
'html|*:is(input, textarea, select):not([required])'
1108
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1109
# CSS pattern for `:placeholder-shown`
1110
CSS_PLACEHOLDER_SHOWN = CSSParser(
1111
'''
1112
html|input:is(
1113
:not([type]),
1114
[type=""],
1115
[type=text],
1116
[type=search],
1117
[type=url],
1118
[type=tel],
1119
[type=email],
1120
[type=password],
1121
[type=number]
1122
)[placeholder][placeholder!='']:is(:not([value]), [value=""]),
1123
html|textarea[placeholder][placeholder!='']
1124
'''
1125
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
1126
# CSS pattern default for `:nth-child` "of S" feature
1127
CSS_NTH_OF_S_DEFAULT = CSSParser(
1128
'*|*'
1129
).process_selectors(flags=FLG_PSEUDO)
1130
# CSS pattern for `:read-write` (CSS_DISABLED must be compiled first)
1131
CSS_READ_WRITE = CSSParser(
1132
'''
1133
html|*:is(
1134
textarea,
1135
input:is(
1136
:not([type]),
1137
[type=""],
1138
[type=text],
1139
[type=search],
1140
[type=url],
1141
[type=tel],
1142
[type=email],
1143
[type=number],
1144
[type=password],
1145
[type=date],
1146
[type=datetime-local],
1147
[type=month],
1148
[type=time],
1149
[type=week]
1150
)
1151
):not([readonly], :disabled),
1152
html|*:is([contenteditable=""], [contenteditable="true" i])
1153
'''
1154
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1155
# CSS pattern for `:read-only`
1156
CSS_READ_ONLY = CSSParser(
1157
'''
1158
html|*:not(:read-write)
1159
'''
1160
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
1161
# CSS pattern for `:in-range`
1162
CSS_IN_RANGE = CSSParser(
1163
'''
1164
html|input:is(
1165
[type="date"],
1166
[type="month"],
1167
[type="week"],
1168
[type="time"],
1169
[type="datetime-local"],
1170
[type="number"],
1171
[type="range"]
1172
):is(
1173
[min],
1174
[max]
1175
)
1176
'''
1177
).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML)
1178
# CSS pattern for `:out-of-range`
1179
CSS_OUT_OF_RANGE = CSSParser(
1180
'''
1181
html|input:is(
1182
[type="date"],
1183
[type="month"],
1184
[type="week"],
1185
[type="time"],
1186
[type="datetime-local"],
1187
[type="number"],
1188
[type="range"]
1189
):is(
1190
[min],
1191
[max]
1192
)
1193
'''
1194
).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML)
1195
1196