Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
aws
GitHub Repository: aws/aws-cli
Path: blob/develop/awscli/shorthand.py
1566 views
1
# Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License"). You
4
# may not use this file except in compliance with the License. A copy of
5
# the License is located at
6
#
7
# http://aws.amazon.com/apache2.0/
8
#
9
# or in the "license" file accompanying this file. This file is
10
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11
# ANY KIND, either express or implied. See the License for the specific
12
# language governing permissions and limitations under the License.
13
"""Module for parsing shorthand syntax.
14
15
This module parses any CLI options that use a "shorthand"
16
syntax::
17
18
--foo A=b,C=d
19
|------|
20
|
21
Shorthand syntax
22
23
24
This module provides two main classes to do this.
25
First, there's a ``ShorthandParser`` class. This class works
26
on a purely syntactic level. It looks only at the string value
27
provided to it in order to figure out how the string should be parsed.
28
29
However, because there was a pre-existing shorthand parser, we need
30
to remain backwards compatible with the previous parser. One of the
31
things the previous parser did was use the associated JSON model to
32
control how the expression was parsed.
33
34
In order to accommodate this a post processing class is provided that
35
takes the parsed values from the ``ShorthandParser`` as well as the
36
corresponding JSON model for the CLI argument and makes any adjustments
37
necessary to maintain backwards compatibility. This is done in the
38
``BackCompatVisitor`` class.
39
40
"""
41
42
import re
43
import string
44
45
from awscli.paramfile import LOCAL_PREFIX_MAP, get_paramfile
46
from awscli.utils import is_document_type
47
48
_EOF = object()
49
50
51
class _NamedRegex:
52
def __init__(self, name, regex_str):
53
self.name = name
54
self.regex = re.compile(regex_str, re.UNICODE)
55
56
def match(self, value):
57
return self.regex.match(value)
58
59
60
class ShorthandParseError(Exception):
61
def _error_location(self):
62
consumed, remaining, num_spaces = self.value, '', self.index
63
if '\n' in self.value[: self.index]:
64
# If there's newlines in the consumed expression, we want
65
# to make sure we're only counting the spaces
66
# from the last newline:
67
# foo=bar,\n
68
# bar==baz
69
# ^
70
last_newline = self.value[: self.index].rindex('\n')
71
num_spaces = self.index - last_newline - 1
72
if '\n' in self.value[self.index :]:
73
# If there's newline in the remaining, divide value
74
# into consumed and remaining
75
# foo==bar,\n
76
# ^
77
# bar=baz
78
next_newline = self.index + self.value[self.index :].index('\n')
79
consumed = self.value[:next_newline]
80
remaining = self.value[next_newline:]
81
return '%s\n%s%s' % (consumed, (' ' * num_spaces) + '^', remaining)
82
83
84
class ShorthandParseSyntaxError(ShorthandParseError):
85
def __init__(self, value, expected, actual, index):
86
self.value = value
87
self.expected = expected
88
self.actual = actual
89
self.index = index
90
msg = self._construct_msg()
91
super().__init__(msg)
92
93
def _construct_msg(self):
94
return (
95
f"Expected: '{self.expected}', received: '{self.actual}' "
96
f"for input:\n" "{self._error_location()}"
97
)
98
99
100
class DuplicateKeyInObjectError(ShorthandParseError):
101
def __init__(self, key, value, index):
102
self.key = key
103
self.value = value
104
self.index = index
105
msg = self._construct_msg()
106
super().__init__(msg)
107
108
def _construct_msg(self):
109
return (
110
f"Second instance of key \"{self.key}\" encountered for input:\n"
111
f"{self._error_location()}\nThis is often because there is a "
112
"preceding \",\" instead of a space."
113
)
114
115
116
class DocumentTypesNotSupportedError(Exception):
117
pass
118
119
120
class ShorthandParser:
121
"""Parses shorthand syntax in the CLI.
122
123
Note that this parser does not rely on any JSON models to control
124
how to parse the shorthand syntax.
125
126
"""
127
128
_SINGLE_QUOTED = _NamedRegex('singled quoted', r'\'(?:\\\'|[^\'])*\'')
129
_DOUBLE_QUOTED = _NamedRegex('double quoted', r'"(?:\\"|[^"])*"')
130
_START_WORD = r'\!\#-&\(-\+\--\<\>-Z\\\\-z\u007c-\uffff'
131
_FIRST_FOLLOW_CHARS = r'\s\!\#-&\(-\+\--\\\\\^-\|~-\uffff'
132
_SECOND_FOLLOW_CHARS = r'\s\!\#-&\(-\+\--\<\>-\uffff'
133
_ESCAPED_COMMA = '(\\\\,)'
134
_FIRST_VALUE = _NamedRegex(
135
'first',
136
f'({_ESCAPED_COMMA}|[{_START_WORD}])'
137
f'({_ESCAPED_COMMA}|[{_FIRST_FOLLOW_CHARS}])*',
138
)
139
_SECOND_VALUE = _NamedRegex(
140
'second',
141
f'({_ESCAPED_COMMA}|[{_START_WORD}])'
142
f'({_ESCAPED_COMMA}|[{_SECOND_FOLLOW_CHARS}])*',
143
)
144
145
def __init__(self):
146
self._tokens = []
147
148
def parse(self, value):
149
"""Parse shorthand syntax.
150
151
For example::
152
153
parser = ShorthandParser()
154
parser.parse('a=b') # {'a': 'b'}
155
parser.parse('a=b,c') # {'a': ['b', 'c']}
156
157
:type value: str
158
:param value: Any value that needs to be parsed.
159
160
:return: Parsed value, which will be a dictionary.
161
"""
162
self._input_value = value
163
self._index = 0
164
self._should_resolve_paramfiles = False
165
return self._parameter()
166
167
def _parameter(self):
168
# parameter = keyval *("," keyval)
169
params = {}
170
key, val = self._keyval()
171
params[key] = val
172
last_index = self._index
173
while self._index < len(self._input_value):
174
self._expect(',', consume_whitespace=True)
175
key, val = self._keyval()
176
# If a key is already defined, it is likely an incorrectly written
177
# shorthand argument. Raise an error to inform the user.
178
if key in params:
179
raise DuplicateKeyInObjectError(
180
key, self._input_value, last_index + 1
181
)
182
params[key] = val
183
last_index = self._index
184
return params
185
186
def _keyval(self):
187
# keyval = key "=" [values] / key "@=" [file-optional-values]
188
# file-optional-values = file://value / fileb://value / value
189
key = self._key()
190
self._should_resolve_paramfiles = False
191
try:
192
self._expect('@', consume_whitespace=True)
193
self._should_resolve_paramfiles = True
194
except ShorthandParseSyntaxError:
195
pass
196
self._expect('=', consume_whitespace=True)
197
values = self._values()
198
return key, values
199
200
def _key(self):
201
# key = 1*(alpha / %x30-39 / %x5f / %x2e / %x23) ; [a-zA-Z0-9\-_.#/]
202
valid_chars = string.ascii_letters + string.digits + '-_.#/:'
203
start = self._index
204
while not self._at_eof():
205
if self._current() not in valid_chars:
206
break
207
self._index += 1
208
return self._input_value[start : self._index]
209
210
def _values(self):
211
# values = csv-list / explicit-list / hash-literal
212
if self._at_eof():
213
return ''
214
elif self._current() == '[':
215
return self._explicit_list()
216
elif self._current() == '{':
217
return self._hash_literal()
218
else:
219
return self._csv_value()
220
221
def _csv_value(self):
222
# Supports either:
223
# foo=bar -> 'bar'
224
# ^
225
# foo=bar,baz -> ['bar', 'baz']
226
# ^
227
first_value = self._first_value()
228
self._consume_whitespace()
229
if self._at_eof() or self._input_value[self._index] != ',':
230
return first_value
231
self._expect(',', consume_whitespace=True)
232
csv_list = [first_value]
233
# Try to parse remaining list values.
234
# It's possible we don't parse anything:
235
# a=b,c=d
236
# ^-here
237
# In the case above, we'll hit the ShorthandParser,
238
# backtrack to the comma, and return a single scalar
239
# value 'b'.
240
while True:
241
try:
242
current = self._second_value()
243
self._consume_whitespace()
244
if self._at_eof():
245
csv_list.append(current)
246
break
247
self._expect(',', consume_whitespace=True)
248
csv_list.append(current)
249
except ShorthandParseSyntaxError:
250
# Backtrack to the previous comma.
251
# This can happen when we reach this case:
252
# foo=a,b,c=d,e=f
253
# ^-start
254
# foo=a,b,c=d,e=f
255
# ^-error, "expected ',' received '='
256
# foo=a,b,c=d,e=f
257
# ^-backtrack to here.
258
if self._at_eof():
259
raise
260
self._backtrack_to(',')
261
break
262
if len(csv_list) == 1:
263
# Then this was a foo=bar case, so we expect
264
# this to parse to a scalar value 'bar', i.e
265
# {"foo": "bar"} instead of {"bar": ["bar"]}
266
return first_value
267
return csv_list
268
269
def _value(self):
270
result = self._FIRST_VALUE.match(self._input_value[self._index :])
271
if result is not None:
272
consumed = self._consume_matched_regex(result)
273
processed = consumed.replace('\\,', ',').rstrip()
274
return self._resolve_paramfiles(processed) if self._should_resolve_paramfiles else processed
275
return ''
276
277
def _explicit_list(self):
278
# explicit-list = "[" [value *(",' value)] "]"
279
self._expect('[', consume_whitespace=True)
280
values = []
281
while self._current() != ']':
282
val = self._explicit_values()
283
values.append(val)
284
self._consume_whitespace()
285
if self._current() != ']':
286
self._expect(',')
287
self._consume_whitespace()
288
self._expect(']')
289
return values
290
291
def _explicit_values(self):
292
# values = csv-list / explicit-list / hash-literal
293
if self._current() == '[':
294
return self._explicit_list()
295
elif self._current() == '{':
296
return self._hash_literal()
297
else:
298
return self._first_value()
299
300
def _hash_literal(self):
301
self._expect('{', consume_whitespace=True)
302
keyvals = {}
303
while self._current() != '}':
304
key = self._key()
305
self._should_resolve_paramfiles = False
306
try:
307
self._expect('@', consume_whitespace=True)
308
self._should_resolve_paramfiles = True
309
except ShorthandParseSyntaxError:
310
pass
311
self._expect('=', consume_whitespace=True)
312
v = self._explicit_values()
313
self._consume_whitespace()
314
if self._current() != '}':
315
self._expect(',')
316
self._consume_whitespace()
317
keyvals[key] = v
318
self._expect('}')
319
return keyvals
320
321
def _first_value(self):
322
# first-value = value / single-quoted-val / double-quoted-val
323
if self._current() == "'":
324
return self._single_quoted_value()
325
elif self._current() == '"':
326
return self._double_quoted_value()
327
return self._value()
328
329
def _single_quoted_value(self):
330
# single-quoted-value = %x27 *(val-escaped-single) %x27
331
# val-escaped-single = %x20-26 / %x28-7F / escaped-escape /
332
# (escape single-quote)
333
processed = self._consume_quoted(self._SINGLE_QUOTED, escaped_char="'")
334
return self._resolve_paramfiles(processed) if self._should_resolve_paramfiles else processed
335
336
def _consume_quoted(self, regex, escaped_char=None):
337
value = self._must_consume_regex(regex)[1:-1]
338
if escaped_char is not None:
339
value = value.replace("\\%s" % escaped_char, escaped_char)
340
value = value.replace("\\\\", "\\")
341
return value
342
343
def _double_quoted_value(self):
344
processed = self._consume_quoted(self._DOUBLE_QUOTED, escaped_char='"')
345
return self._resolve_paramfiles(processed) if self._should_resolve_paramfiles else processed
346
347
def _second_value(self):
348
if self._current() == "'":
349
return self._single_quoted_value()
350
elif self._current() == '"':
351
return self._double_quoted_value()
352
else:
353
consumed = self._must_consume_regex(self._SECOND_VALUE)
354
processed = consumed.replace('\\,', ',').rstrip()
355
return self._resolve_paramfiles(processed) if self._should_resolve_paramfiles else processed
356
357
def _resolve_paramfiles(self, val):
358
if (paramfile := get_paramfile(val, LOCAL_PREFIX_MAP)) is not None:
359
return paramfile
360
return val
361
362
def _expect(self, char, consume_whitespace=False):
363
if consume_whitespace:
364
self._consume_whitespace()
365
if self._index >= len(self._input_value):
366
raise ShorthandParseSyntaxError(
367
self._input_value, char, 'EOF', self._index
368
)
369
actual = self._input_value[self._index]
370
if actual != char:
371
raise ShorthandParseSyntaxError(
372
self._input_value, char, actual, self._index
373
)
374
self._index += 1
375
if consume_whitespace:
376
self._consume_whitespace()
377
378
def _must_consume_regex(self, regex):
379
result = regex.match(self._input_value[self._index :])
380
if result is not None:
381
return self._consume_matched_regex(result)
382
raise ShorthandParseSyntaxError(
383
self._input_value, f'<{regex.name}>', '<none>', self._index
384
)
385
386
def _consume_matched_regex(self, result):
387
start, end = result.span()
388
v = self._input_value[self._index + start : self._index + end]
389
self._index += end - start
390
return v
391
392
def _current(self):
393
# If the index is at the end of the input value,
394
# then _EOF will be returned.
395
if self._index < len(self._input_value):
396
return self._input_value[self._index]
397
return _EOF
398
399
def _at_eof(self):
400
return self._index >= len(self._input_value)
401
402
def _backtrack_to(self, char):
403
while self._index >= 0 and self._input_value[self._index] != char:
404
self._index -= 1
405
406
def _consume_whitespace(self):
407
while self._current() != _EOF and self._current() in string.whitespace:
408
self._index += 1
409
410
411
class ModelVisitor:
412
def visit(self, params, model):
413
self._visit({}, model, '', params)
414
415
def _visit(self, parent, shape, name, value):
416
method = getattr(
417
self, f'_visit_{shape.type_name}', self._visit_scalar
418
)
419
method(parent, shape, name, value)
420
421
def _visit_structure(self, parent, shape, name, value):
422
if not isinstance(value, dict):
423
return
424
for member_name, member_shape in shape.members.items():
425
self._visit(
426
value, member_shape, member_name, value.get(member_name)
427
)
428
429
def _visit_list(self, parent, shape, name, value):
430
if not isinstance(value, list):
431
return
432
for i, element in enumerate(value):
433
self._visit(value, shape.member, i, element)
434
435
def _visit_map(self, parent, shape, name, value):
436
if not isinstance(value, dict):
437
return
438
value_shape = shape.value
439
for k, v in value.items():
440
self._visit(value, value_shape, k, v)
441
442
def _visit_scalar(self, parent, shape, name, value):
443
pass
444
445
446
class BackCompatVisitor(ModelVisitor):
447
def _visit_structure(self, parent, shape, name, value):
448
self._raise_if_document_type_found(value, shape)
449
if not isinstance(value, dict):
450
return
451
for member_name, member_shape in shape.members.items():
452
try:
453
self._visit(
454
value, member_shape, member_name, value.get(member_name)
455
)
456
except DocumentTypesNotSupportedError:
457
# Catch and propagate the document type error to a better
458
# error message as when the original error is thrown there is
459
# no reference to the original member that used the document
460
# type.
461
raise ShorthandParseError(
462
'Shorthand syntax does not support document types. Use '
463
'JSON input for top-level argument to specify nested '
464
f'parameter: {member_name}'
465
)
466
467
def _visit_list(self, parent, shape, name, value):
468
if not isinstance(value, list):
469
# Convert a -> [a] because they specified
470
# "foo=bar", but "bar" should really be ["bar"].
471
if value is not None:
472
parent[name] = [value]
473
else:
474
return super()._visit_list(
475
parent, shape, name, value
476
)
477
478
def _visit_scalar(self, parent, shape, name, value):
479
if value is None:
480
return
481
type_name = shape.type_name
482
if type_name in ['integer', 'long']:
483
parent[name] = int(value)
484
elif type_name in ['double', 'float']:
485
parent[name] = float(value)
486
elif type_name == 'boolean':
487
# We want to make sure we only set a value
488
# only if "true"/"false" is specified.
489
if value.lower() == 'true':
490
parent[name] = True
491
elif value.lower() == 'false':
492
parent[name] = False
493
494
def _raise_if_document_type_found(self, value, member_shape):
495
# Shorthand syntax does not have support for explicit typing and
496
# instead relies on the model to do type coercion. However, document
497
# types are unmodeled. So using short hand syntax on a document type
498
# would result in all values being typed as strings (e.g. 1 -> "1",
499
# null -> "null") which is probably not desired. So blocking the use
500
# of document types allows us to add proper support for them in the
501
# future in a backwards compatible way.
502
if value is not None and is_document_type(member_shape):
503
raise DocumentTypesNotSupportedError()
504
505