CoCalc -- shorthand.py

GitHub Repository: aws/aws-cli
Path: blob/develop/awscli/shorthand.py
¹⁵⁶⁶ views
1
# Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License"). You
4
# may not use this file except in compliance with the License. A copy of
5
# the License is located at
6
#
7
#     http://aws.amazon.com/apache2.0/
8
#
9
# or in the "license" file accompanying this file. This file is
10
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11
# ANY KIND, either express or implied. See the License for the specific
12
# language governing permissions and limitations under the License.
13
"""Module for parsing shorthand syntax.
14

15
This module parses any CLI options that use a "shorthand"
16
syntax::
17

18
    --foo A=b,C=d
19
         |------|
20
            |
21
            Shorthand syntax
22

23

24
This module provides two main classes to do this.
25
First, there's a ``ShorthandParser`` class.  This class works
26
on a purely syntactic level.  It looks only at the string value
27
provided to it in order to figure out how the string should be parsed.
28

29
However, because there was a pre-existing shorthand parser, we need
30
to remain backwards compatible with the previous parser.  One of the
31
things the previous parser did was use the associated JSON model to
32
control how the expression was parsed.
33

34
In order to accommodate this a post processing class is provided that
35
takes the parsed values from the ``ShorthandParser`` as well as the
36
corresponding JSON model for the CLI argument and makes any adjustments
37
necessary to maintain backwards compatibility.  This is done in the
38
``BackCompatVisitor`` class.
39

40
"""
41

42
import re
43
import string
44

45
from awscli.paramfile import LOCAL_PREFIX_MAP, get_paramfile
46
from awscli.utils import is_document_type
47

48
_EOF = object()
49

50

51
class _NamedRegex:
52
    def __init__(self, name, regex_str):
53
        self.name = name
54
        self.regex = re.compile(regex_str, re.UNICODE)
55

56
    def match(self, value):
57
        return self.regex.match(value)
58

59

60
class ShorthandParseError(Exception):
61
    def _error_location(self):
62
        consumed, remaining, num_spaces = self.value, '', self.index
63
        if '\n' in self.value[: self.index]:
64
            # If there's newlines in the consumed expression, we want
65
            # to make sure we're only counting the spaces
66
            # from the last newline:
67
            # foo=bar,\n
68
            # bar==baz
69
            #     ^
70
            last_newline = self.value[: self.index].rindex('\n')
71
            num_spaces = self.index - last_newline - 1
72
        if '\n' in self.value[self.index :]:
73
            # If there's newline in the remaining, divide value
74
            # into consumed and remaining
75
            # foo==bar,\n
76
            #     ^
77
            # bar=baz
78
            next_newline = self.index + self.value[self.index :].index('\n')
79
            consumed = self.value[:next_newline]
80
            remaining = self.value[next_newline:]
81
        return '%s\n%s%s' % (consumed, (' ' * num_spaces) + '^', remaining)
82

83

84
class ShorthandParseSyntaxError(ShorthandParseError):
85
    def __init__(self, value, expected, actual, index):
86
        self.value = value
87
        self.expected = expected
88
        self.actual = actual
89
        self.index = index
90
        msg = self._construct_msg()
91
        super().__init__(msg)
92

93
    def _construct_msg(self):
94
        return (
95
            f"Expected: '{self.expected}', received: '{self.actual}' "
96
            f"for input:\n" "{self._error_location()}"
97
        )
98

99

100
class DuplicateKeyInObjectError(ShorthandParseError):
101
    def __init__(self, key, value, index):
102
        self.key = key
103
        self.value = value
104
        self.index = index
105
        msg = self._construct_msg()
106
        super().__init__(msg)
107

108
    def _construct_msg(self):
109
        return (
110
            f"Second instance of key \"{self.key}\" encountered for input:\n"
111
            f"{self._error_location()}\nThis is often because there is a "
112
            "preceding \",\" instead of a space."
113
        )
114

115

116
class DocumentTypesNotSupportedError(Exception):
117
    pass
118

119

120
class ShorthandParser:
121
    """Parses shorthand syntax in the CLI.
122

123
    Note that this parser does not rely on any JSON models to control
124
    how to parse the shorthand syntax.
125

126
    """
127

128
    _SINGLE_QUOTED = _NamedRegex('singled quoted', r'\'(?:\\\'|[^\'])*\'')
129
    _DOUBLE_QUOTED = _NamedRegex('double quoted', r'"(?:\\"|[^"])*"')
130
    _START_WORD = r'\!\#-&\(-\+\--\<\>-Z\\\\-z\u007c-\uffff'
131
    _FIRST_FOLLOW_CHARS = r'\s\!\#-&\(-\+\--\\\\\^-\|~-\uffff'
132
    _SECOND_FOLLOW_CHARS = r'\s\!\#-&\(-\+\--\<\>-\uffff'
133
    _ESCAPED_COMMA = '(\\\\,)'
134
    _FIRST_VALUE = _NamedRegex(
135
        'first',
136
        f'({_ESCAPED_COMMA}|[{_START_WORD}])'
137
        f'({_ESCAPED_COMMA}|[{_FIRST_FOLLOW_CHARS}])*',
138
    )
139
    _SECOND_VALUE = _NamedRegex(
140
        'second',
141
        f'({_ESCAPED_COMMA}|[{_START_WORD}])'
142
        f'({_ESCAPED_COMMA}|[{_SECOND_FOLLOW_CHARS}])*',
143
    )
144

145
    def __init__(self):
146
        self._tokens = []
147

148
    def parse(self, value):
149
        """Parse shorthand syntax.
150

151
        For example::
152

153
            parser = ShorthandParser()
154
            parser.parse('a=b')  # {'a': 'b'}
155
            parser.parse('a=b,c')  # {'a': ['b', 'c']}
156

157
        :type value: str
158
        :param value: Any value that needs to be parsed.
159

160
        :return: Parsed value, which will be a dictionary.
161
        """
162
        self._input_value = value
163
        self._index = 0
164
        self._should_resolve_paramfiles = False
165
        return self._parameter()
166

167
    def _parameter(self):
168
        # parameter = keyval *("," keyval)
169
        params = {}
170
        key, val = self._keyval()
171
        params[key] = val
172
        last_index = self._index
173
        while self._index < len(self._input_value):
174
            self._expect(',', consume_whitespace=True)
175
            key, val = self._keyval()
176
            # If a key is already defined, it is likely an incorrectly written
177
            # shorthand argument. Raise an error to inform the user.
178
            if key in params:
179
                raise DuplicateKeyInObjectError(
180
                    key, self._input_value, last_index + 1
181
                )
182
            params[key] = val
183
            last_index = self._index
184
        return params
185

186
    def _keyval(self):
187
        # keyval = key "=" [values] / key "@=" [file-optional-values]
188
        # file-optional-values = file://value / fileb://value / value
189
        key = self._key()
190
        self._should_resolve_paramfiles = False
191
        try:
192
            self._expect('@', consume_whitespace=True)
193
            self._should_resolve_paramfiles = True
194
        except ShorthandParseSyntaxError:
195
            pass
196
        self._expect('=', consume_whitespace=True)
197
        values = self._values()
198
        return key, values
199

200
    def _key(self):
201
        # key = 1*(alpha / %x30-39 / %x5f / %x2e / %x23)  ; [a-zA-Z0-9\-_.#/]
202
        valid_chars = string.ascii_letters + string.digits + '-_.#/:'
203
        start = self._index
204
        while not self._at_eof():
205
            if self._current() not in valid_chars:
206
                break
207
            self._index += 1
208
        return self._input_value[start : self._index]
209

210
    def _values(self):
211
        # values = csv-list / explicit-list / hash-literal
212
        if self._at_eof():
213
            return ''
214
        elif self._current() == '[':
215
            return self._explicit_list()
216
        elif self._current() == '{':
217
            return self._hash_literal()
218
        else:
219
            return self._csv_value()
220

221
    def _csv_value(self):
222
        # Supports either:
223
        # foo=bar     -> 'bar'
224
        #     ^
225
        # foo=bar,baz -> ['bar', 'baz']
226
        #     ^
227
        first_value = self._first_value()
228
        self._consume_whitespace()
229
        if self._at_eof() or self._input_value[self._index] != ',':
230
            return first_value
231
        self._expect(',', consume_whitespace=True)
232
        csv_list = [first_value]
233
        # Try to parse remaining list values.
234
        # It's possible we don't parse anything:
235
        # a=b,c=d
236
        #     ^-here
237
        # In the case above, we'll hit the ShorthandParser,
238
        # backtrack to the comma, and return a single scalar
239
        # value 'b'.
240
        while True:
241
            try:
242
                current = self._second_value()
243
                self._consume_whitespace()
244
                if self._at_eof():
245
                    csv_list.append(current)
246
                    break
247
                self._expect(',', consume_whitespace=True)
248
                csv_list.append(current)
249
            except ShorthandParseSyntaxError:
250
                # Backtrack to the previous comma.
251
                # This can happen when we reach this case:
252
                # foo=a,b,c=d,e=f
253
                #     ^-start
254
                # foo=a,b,c=d,e=f
255
                #          ^-error, "expected ',' received '='
256
                # foo=a,b,c=d,e=f
257
                #        ^-backtrack to here.
258
                if self._at_eof():
259
                    raise
260
                self._backtrack_to(',')
261
                break
262
        if len(csv_list) == 1:
263
            # Then this was a foo=bar case, so we expect
264
            # this to parse to a scalar value 'bar', i.e
265
            # {"foo": "bar"} instead of {"bar": ["bar"]}
266
            return first_value
267
        return csv_list
268

269
    def _value(self):
270
        result = self._FIRST_VALUE.match(self._input_value[self._index :])
271
        if result is not None:
272
            consumed = self._consume_matched_regex(result)
273
            processed = consumed.replace('\\,', ',').rstrip()
274
            return self._resolve_paramfiles(processed) if self._should_resolve_paramfiles else processed
275
        return ''
276

277
    def _explicit_list(self):
278
        # explicit-list = "[" [value *(",' value)] "]"
279
        self._expect('[', consume_whitespace=True)
280
        values = []
281
        while self._current() != ']':
282
            val = self._explicit_values()
283
            values.append(val)
284
            self._consume_whitespace()
285
            if self._current() != ']':
286
                self._expect(',')
287
                self._consume_whitespace()
288
        self._expect(']')
289
        return values
290

291
    def _explicit_values(self):
292
        # values = csv-list / explicit-list / hash-literal
293
        if self._current() == '[':
294
            return self._explicit_list()
295
        elif self._current() == '{':
296
            return self._hash_literal()
297
        else:
298
            return self._first_value()
299

300
    def _hash_literal(self):
301
        self._expect('{', consume_whitespace=True)
302
        keyvals = {}
303
        while self._current() != '}':
304
            key = self._key()
305
            self._should_resolve_paramfiles = False
306
            try:
307
                self._expect('@', consume_whitespace=True)
308
                self._should_resolve_paramfiles = True
309
            except ShorthandParseSyntaxError:
310
                pass
311
            self._expect('=', consume_whitespace=True)
312
            v = self._explicit_values()
313
            self._consume_whitespace()
314
            if self._current() != '}':
315
                self._expect(',')
316
                self._consume_whitespace()
317
            keyvals[key] = v
318
        self._expect('}')
319
        return keyvals
320

321
    def _first_value(self):
322
        # first-value = value / single-quoted-val / double-quoted-val
323
        if self._current() == "'":
324
            return self._single_quoted_value()
325
        elif self._current() == '"':
326
            return self._double_quoted_value()
327
        return self._value()
328

329
    def _single_quoted_value(self):
330
        # single-quoted-value = %x27 *(val-escaped-single) %x27
331
        # val-escaped-single  = %x20-26 / %x28-7F / escaped-escape /
332
        #                       (escape single-quote)
333
        processed = self._consume_quoted(self._SINGLE_QUOTED, escaped_char="'")
334
        return self._resolve_paramfiles(processed) if self._should_resolve_paramfiles else processed
335

336
    def _consume_quoted(self, regex, escaped_char=None):
337
        value = self._must_consume_regex(regex)[1:-1]
338
        if escaped_char is not None:
339
            value = value.replace("\\%s" % escaped_char, escaped_char)
340
            value = value.replace("\\\\", "\\")
341
        return value
342

343
    def _double_quoted_value(self):
344
        processed = self._consume_quoted(self._DOUBLE_QUOTED, escaped_char='"')
345
        return self._resolve_paramfiles(processed) if self._should_resolve_paramfiles else processed
346

347
    def _second_value(self):
348
        if self._current() == "'":
349
            return self._single_quoted_value()
350
        elif self._current() == '"':
351
            return self._double_quoted_value()
352
        else:
353
            consumed = self._must_consume_regex(self._SECOND_VALUE)
354
            processed = consumed.replace('\\,', ',').rstrip()
355
            return self._resolve_paramfiles(processed) if self._should_resolve_paramfiles else processed
356

357
    def _resolve_paramfiles(self, val):
358
        if (paramfile := get_paramfile(val, LOCAL_PREFIX_MAP)) is not None:
359
            return paramfile
360
        return val
361

362
    def _expect(self, char, consume_whitespace=False):
363
        if consume_whitespace:
364
            self._consume_whitespace()
365
        if self._index >= len(self._input_value):
366
            raise ShorthandParseSyntaxError(
367
                self._input_value, char, 'EOF', self._index
368
            )
369
        actual = self._input_value[self._index]
370
        if actual != char:
371
            raise ShorthandParseSyntaxError(
372
                self._input_value, char, actual, self._index
373
            )
374
        self._index += 1
375
        if consume_whitespace:
376
            self._consume_whitespace()
377

378
    def _must_consume_regex(self, regex):
379
        result = regex.match(self._input_value[self._index :])
380
        if result is not None:
381
            return self._consume_matched_regex(result)
382
        raise ShorthandParseSyntaxError(
383
            self._input_value, f'<{regex.name}>', '<none>', self._index
384
        )
385

386
    def _consume_matched_regex(self, result):
387
        start, end = result.span()
388
        v = self._input_value[self._index + start : self._index + end]
389
        self._index += end - start
390
        return v
391

392
    def _current(self):
393
        # If the index is at the end of the input value,
394
        # then _EOF will be returned.
395
        if self._index < len(self._input_value):
396
            return self._input_value[self._index]
397
        return _EOF
398

399
    def _at_eof(self):
400
        return self._index >= len(self._input_value)
401

402
    def _backtrack_to(self, char):
403
        while self._index >= 0 and self._input_value[self._index] != char:
404
            self._index -= 1
405

406
    def _consume_whitespace(self):
407
        while self._current() != _EOF and self._current() in string.whitespace:
408
            self._index += 1
409

410

411
class ModelVisitor:
412
    def visit(self, params, model):
413
        self._visit({}, model, '', params)
414

415
    def _visit(self, parent, shape, name, value):
416
        method = getattr(
417
            self, f'_visit_{shape.type_name}', self._visit_scalar
418
        )
419
        method(parent, shape, name, value)
420

421
    def _visit_structure(self, parent, shape, name, value):
422
        if not isinstance(value, dict):
423
            return
424
        for member_name, member_shape in shape.members.items():
425
            self._visit(
426
                value, member_shape, member_name, value.get(member_name)
427
            )
428

429
    def _visit_list(self, parent, shape, name, value):
430
        if not isinstance(value, list):
431
            return
432
        for i, element in enumerate(value):
433
            self._visit(value, shape.member, i, element)
434

435
    def _visit_map(self, parent, shape, name, value):
436
        if not isinstance(value, dict):
437
            return
438
        value_shape = shape.value
439
        for k, v in value.items():
440
            self._visit(value, value_shape, k, v)
441

442
    def _visit_scalar(self, parent, shape, name, value):
443
        pass
444

445

446
class BackCompatVisitor(ModelVisitor):
447
    def _visit_structure(self, parent, shape, name, value):
448
        self._raise_if_document_type_found(value, shape)
449
        if not isinstance(value, dict):
450
            return
451
        for member_name, member_shape in shape.members.items():
452
            try:
453
                self._visit(
454
                    value, member_shape, member_name, value.get(member_name)
455
                )
456
            except DocumentTypesNotSupportedError:
457
                # Catch and propagate the document type error to a better
458
                # error message as when the original error is thrown there is
459
                # no reference to the original member that used the document
460
                # type.
461
                raise ShorthandParseError(
462
                    'Shorthand syntax does not support document types. Use '
463
                    'JSON input for top-level argument to specify nested '
464
                    f'parameter: {member_name}'
465
                )
466

467
    def _visit_list(self, parent, shape, name, value):
468
        if not isinstance(value, list):
469
            # Convert a -> [a] because they specified
470
            # "foo=bar", but "bar" should really be ["bar"].
471
            if value is not None:
472
                parent[name] = [value]
473
        else:
474
            return super()._visit_list(
475
                parent, shape, name, value
476
            )
477

478
    def _visit_scalar(self, parent, shape, name, value):
479
        if value is None:
480
            return
481
        type_name = shape.type_name
482
        if type_name in ['integer', 'long']:
483
            parent[name] = int(value)
484
        elif type_name in ['double', 'float']:
485
            parent[name] = float(value)
486
        elif type_name == 'boolean':
487
            # We want to make sure we only set a value
488
            # only if "true"/"false" is specified.
489
            if value.lower() == 'true':
490
                parent[name] = True
491
            elif value.lower() == 'false':
492
                parent[name] = False
493

494
    def _raise_if_document_type_found(self, value, member_shape):
495
        # Shorthand syntax does not have support for explicit typing and
496
        # instead relies on the model to do type coercion. However, document
497
        # types are unmodeled. So using short hand syntax on a document type
498
        # would result in all values being typed as strings (e.g. 1 -> "1",
499
        # null -> "null") which is probably not desired. So blocking the use
500
        # of document types allows us to add proper support for them in the
501
        # future in a backwards compatible way.
502
        if value is not None and is_document_type(member_shape):
503
            raise DocumentTypesNotSupportedError()
504

505
Product

Resources

Company