CoCalc -- validation.py

GitHub Repository: aws/aws-cli
Path: blob/develop/awscli/customizations/cloudtrail/validation.py
¹⁵⁶⁷ views
1
# Copyright 2012-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License"). You
4
# may not use this file except in compliance with the License. A copy of
5
# the License is located at
6
#
7
# http://aws.amazon.com/apache2.0/
8
#
9
# or in the "license" file accompanying this file. This file is
10
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11
# ANY KIND, either express or implied. See the License for the specific
12
# language governing permissions and limitations under the License.
13
import base64
14
import binascii
15
import json
16
import hashlib
17
import logging
18
import re
19
import sys
20
import zlib
21
from zlib import error as ZLibError
22
from datetime import timedelta
23
from dateutil import tz, parser
24

25
from pyasn1.error import PyAsn1Error
26
import rsa
27

28
from awscli.customizations.cloudtrail.utils import get_trail_by_arn, \
29
    get_account_id_from_arn
30
from awscli.customizations.commands import BasicCommand
31
from botocore.exceptions import ClientError
32
from awscli.compat import get_current_datetime
33
from awscli.schema import ParameterRequiredError
34
from awscli.utils import create_nested_client
35

36
LOG = logging.getLogger(__name__)
37
DATE_FORMAT = '%Y%m%dT%H%M%SZ'
38
DISPLAY_DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
39

40

41
def format_date(date):
42
    """Returns a formatted date string in a CloudTrail date format"""
43
    return date.strftime(DATE_FORMAT)
44

45

46
def format_display_date(date):
47
    """Returns a formatted date string meant for CLI output"""
48
    return date.strftime(DISPLAY_DATE_FORMAT)
49

50

51
def normalize_date(date):
52
    """Returns a normalized date using a UTC timezone"""
53
    return date.replace(tzinfo=tz.tzutc())
54

55

56
def extract_digest_key_date(digest_s3_key):
57
    """Extract the timestamp portion of a manifest file.
58

59
    Manifest file names take the following form:
60
    AWSLogs/{account}/CloudTrail-Digest/{region}/{ymd}/{account}_CloudTrail \
61
    -Digest_{region}_{name}_region_{date}.json.gz
62
    """
63
    return digest_s3_key[-24:-8]
64

65

66
def parse_date(date_string):
67
    try:
68
        return parser.parse(date_string)
69
    except ValueError:
70
        raise ValueError('Unable to parse date value: %s' % date_string)
71

72

73
def assert_cloudtrail_arn_is_valid(trail_arn):
74
    """Ensures that the arn looks correct.
75

76
    ARNs look like: arn:aws:cloudtrail:us-east-1:123456789012:trail/foo"""
77
    pattern = re.compile(r'arn:.+:cloudtrail:.+:\d{12}:trail/.+')
78
    if not pattern.match(trail_arn):
79
        raise ValueError('Invalid trail ARN provided: %s' % trail_arn)
80

81

82
def create_digest_traverser(
83
    cloudtrail_client,
84
    organization_client,
85
    s3_client_provider,
86
    trail_arn,
87
    trail_source_region=None,
88
    on_invalid=None,
89
    on_gap=None,
90
    on_missing=None,
91
    bucket=None,
92
    prefix=None,
93
    account_id=None,
94
):
95
    """Creates a CloudTrail DigestTraverser and its object graph.
96

97
    :type cloudtrail_client: botocore.client.CloudTrail
98
    :param cloudtrail_client: Client used to connect to CloudTrail
99
    :type organization_client: botocore.client.organizations
100
    :param organization_client: Client used to connect to Organizations
101
    :type s3_client_provider: S3ClientProvider
102
    :param s3_client_provider: Used to create Amazon S3 client per/region.
103
    :param trail_arn: CloudTrail trail ARN
104
    :param trail_source_region: The scanned region of a trail.
105
    :param on_invalid: Callback that is invoked when validating a digest fails.
106
    :param on_gap: Callback that is invoked when a digest has no link to the
107
        previous digest, but there are more digests to validate. This can
108
        happen when a trail is disabled for a period of time.
109
    :param on_missing: Callback that is invoked when a digest file has been
110
        deleted from Amazon S3 but is supposed to be present.
111
    :param bucket: Amazon S3 bucket of the trail if it is different than the
112
        bucket that is currently associated with the trail.
113
    :param prefix: bucket: Key prefix prepended to each digest and log placed
114
        in the Amazon S3 bucket if it is different than the prefix that is
115
        currently associated with the trail.
116
    :param account_id: The account id for which the digest files are
117
        validated. For normal trails this is the caller account, for
118
        organization trails it is the member accout.
119

120
    ``on_gap``, ``on_invalid``, and ``on_missing`` callbacks are invoked with
121
    the following named arguments:
122

123
    - ``bucket`: The next S3 bucket.
124
    - ``next_key``: (optional) Next digest key that was found in the bucket.
125
    - ``next_end_date``: (optional) End date of the next found digest.
126
    - ``last_key``: The last digest key that was found.
127
    - ``last_start_date``: (optional) Start date of last found digest.
128
    - ``message``: (optional) Message string about the notification.
129
    """
130
    assert_cloudtrail_arn_is_valid(trail_arn)
131
    organization_id = None
132
    if bucket is None:
133
        # Determine the bucket and prefix based on the trail arn.
134
        trail_info = get_trail_by_arn(cloudtrail_client, trail_arn)
135
        LOG.debug('Loaded trail info: %s', trail_info)
136
        bucket = trail_info['S3BucketName']
137
        prefix = trail_info.get('S3KeyPrefix', None)
138
        is_org_trail = trail_info.get('IsOrganizationTrail')
139
        if is_org_trail:
140
            if not account_id:
141
                raise ParameterRequiredError(
142
                    "Missing required parameter for organization "
143
                    "trail: '--account-id'")
144
            organization_id = organization_client.describe_organization()[
145
                'Organization']['Id']
146

147
    # Determine the region from the ARN (e.g., arn:aws:cloudtrail:REGION:...)
148
    trail_region = trail_arn.split(':')[3]
149
    # Determine the name from the ARN (the last part after "/")
150
    trail_name = trail_arn.split('/')[-1]
151
    # If account id is not specified parse it from trail ARN
152
    if not account_id:
153
        account_id = get_account_id_from_arn(trail_arn)
154

155
    digest_provider = DigestProvider(
156
        account_id=account_id, trail_name=trail_name,
157
        s3_client_provider=s3_client_provider,
158
        trail_source_region=trail_source_region,
159
        trail_home_region=trail_region,
160
        organization_id=organization_id)
161
    return DigestTraverser(
162
        digest_provider=digest_provider, starting_bucket=bucket,
163
        starting_prefix=prefix, on_invalid=on_invalid, on_gap=on_gap,
164
        on_missing=on_missing,
165
        public_key_provider=PublicKeyProvider(cloudtrail_client))
166

167

168
class S3ClientProvider(object):
169
    """Creates Amazon S3 clients and determines the region name of a client.
170

171
    This class will cache the location constraints of previously requested
172
    buckets and cache previously created clients for the same region.
173
    """
174
    def __init__(self, session, get_bucket_location_region='us-east-1'):
175
        self._session = session
176
        self._get_bucket_location_region = get_bucket_location_region
177
        self._client_cache = {}
178
        self._region_cache = {}
179

180
    def get_client(self, bucket_name):
181
        """Creates an S3 client that can work with the given bucket name"""
182
        region_name = self._get_bucket_region(bucket_name)
183
        return self._create_client(region_name)
184

185
    def _get_bucket_region(self, bucket_name):
186
        """Returns the region of a bucket"""
187
        if bucket_name not in self._region_cache:
188
            client = self._create_client(self._get_bucket_location_region)
189
            result = client.get_bucket_location(Bucket=bucket_name)
190
            region = result['LocationConstraint'] or 'us-east-1'
191
            self._region_cache[bucket_name] = region
192
        return self._region_cache[bucket_name]
193

194
    def _create_client(self, region_name):
195
        """Creates an Amazon S3 client for the given region name"""
196
        if region_name not in self._client_cache:
197
            client = create_nested_client(self._session, 's3', region_name=region_name)
198
            # Remove the CLI error event that prevents exceptions.
199
            self._client_cache[region_name] = client
200
        return self._client_cache[region_name]
201

202

203
class DigestError(ValueError):
204
    """Exception raised when a digest fails to validate"""
205
    pass
206

207

208
class DigestSignatureError(DigestError):
209
    """Exception raised when a digest signature is invalid"""
210
    def __init__(self, bucket, key):
211
        message = ('Digest file\ts3://%s/%s\tINVALID: signature verification '
212
                   'failed') % (bucket, key)
213
        super(DigestSignatureError, self).__init__(message)
214

215

216
class InvalidDigestFormat(DigestError):
217
    """Exception raised when a digest has an invalid format"""
218
    def __init__(self, bucket, key):
219
        message = 'Digest file\ts3://%s/%s\tINVALID: invalid format' % (bucket,
220
                                                                        key)
221
        super(InvalidDigestFormat, self).__init__(message)
222

223

224
class PublicKeyProvider(object):
225
    """Retrieves public keys from CloudTrail within a date range."""
226
    def __init__(self, cloudtrail_client):
227
        self._cloudtrail_client = cloudtrail_client
228

229
    def get_public_keys(self, start_date, end_date):
230
        """Loads public keys in a date range into a returned dict.
231

232
        :type start_date: datetime
233
        :param start_date: Start date of a date range.
234
        :type end_date: datetime
235
        :param end_date: End date of a date range.
236
        :rtype: dict
237
        :return: Returns a dict where each key is the fingerprint of the
238
            public key, and each value is a dict of public key data.
239
        """
240
        public_keys = self._cloudtrail_client.list_public_keys(
241
            StartTime=start_date, EndTime=end_date)
242
        public_keys_in_range = public_keys['PublicKeyList']
243
        LOG.debug('Loaded public keys in range: %s', public_keys_in_range)
244
        return dict((key['Fingerprint'], key) for key in public_keys_in_range)
245

246

247
class DigestProvider(object):
248
    """
249
    Retrieves digest keys and digests from Amazon S3.
250

251
    This class is responsible for determining the full list of digest files
252
    in a bucket and loading digests from the bucket into a JSON decoded
253
    dict. This class is not responsible for validation or iterating from
254
    one digest to the next.
255
    """
256

257
    def __init__(
258
        self,
259
        s3_client_provider,
260
        account_id,
261
        trail_name,
262
        trail_home_region,
263
        trail_source_region=None,
264
        organization_id=None,
265
    ):
266
        self._client_provider = s3_client_provider
267
        self.trail_name = trail_name
268
        self.account_id = account_id
269
        self.trail_home_region = trail_home_region
270
        self.trail_source_region = trail_source_region or trail_home_region
271
        self.organization_id = organization_id
272

273
    def load_digest_keys_in_range(self, bucket, prefix, start_date, end_date):
274
        """Returns a list of digest keys in the date range.
275

276
        This method uses a list_objects API call and provides a Marker
277
        parameter that is calculated based on the start_date provided.
278
        Amazon S3 then returns all keys in the bucket that start after
279
        the given key (non-inclusive). We then iterate over the keys
280
        until the date extracted from the yielded keys is greater than
281
        the given end_date.
282
        """
283
        digests = []
284
        marker = self._create_digest_key(start_date, prefix)
285
        client = self._client_provider.get_client(bucket)
286
        paginator = client.get_paginator('list_objects')
287
        page_iterator = paginator.paginate(Bucket=bucket, Marker=marker)
288
        key_filter = page_iterator.search('Contents[*].Key')
289
        # Create a target start end end date
290
        target_start_date = format_date(normalize_date(start_date))
291
        # Add one hour to the end_date to get logs that spilled over to next.
292
        target_end_date = format_date(
293
            normalize_date(end_date + timedelta(hours=1)))
294
        # Ensure digests are from the same trail.
295
        digest_key_regex = re.compile(self._create_digest_key_regex(prefix))
296
        for key in key_filter:
297
            if digest_key_regex.match(key):
298
                # Use a lexicographic comparison to know when to stop.
299
                extracted_date = extract_digest_key_date(key)
300
                if extracted_date > target_end_date:
301
                    break
302
                # Only append digests after the start date.
303
                if extracted_date >= target_start_date:
304
                    digests.append(key)
305
        return digests
306

307
    def fetch_digest(self, bucket, key):
308
        """Loads a digest by key from S3.
309

310
        Returns the JSON decode data and GZIP inflated raw content.
311
        """
312
        client = self._client_provider.get_client(bucket)
313
        result = client.get_object(Bucket=bucket, Key=key)
314
        try:
315
            digest = zlib.decompress(result['Body'].read(),
316
                                     zlib.MAX_WBITS | 16)
317
            digest_data = json.loads(digest.decode())
318
        except (ValueError, ZLibError):
319
            # Cannot gzip decode or JSON parse.
320
            raise InvalidDigestFormat(bucket, key)
321
        # Add the expected digest signature and algorithm to the dict.
322
        if 'signature' not in result['Metadata'] \
323
                or 'signature-algorithm' not in result['Metadata']:
324
            raise DigestSignatureError(bucket, key)
325
        digest_data['_signature'] = result['Metadata']['signature']
326
        digest_data['_signature_algorithm'] = \
327
            result['Metadata']['signature-algorithm']
328
        return digest_data, digest
329

330
    def _create_digest_key(self, start_date, key_prefix):
331
        """Computes an Amazon S3 key based on the provided data.
332

333
        The computed is what would have been placed in the S3 bucket if
334
        a log digest were created at a specific time. This computed key
335
        does not have to actually exist as it will only be used to as
336
        a Marker parameter in a list_objects call.
337

338
        :return: Returns a computed key as a string.
339
        """
340
        # Subtract one minute to ensure the dates are inclusive.
341
        date = start_date - timedelta(minutes=1)
342
        template = 'AWSLogs/'
343
        template_params = {
344
            'account_id': self.account_id,
345
            'date': format_date(date),
346
            'ymd': date.strftime('%Y/%m/%d'),
347
            'source_region': self.trail_source_region,
348
            'home_region': self.trail_home_region,
349
            'name': self.trail_name
350
        }
351
        if self.organization_id:
352
            template += '{organization_id}/'
353
            template_params['organization_id'] = self.organization_id
354
        template += (
355
            '{account_id}/CloudTrail-Digest/{source_region}/'
356
            '{ymd}/{account_id}_CloudTrail-Digest_{source_region}_{name}_'
357
            '{home_region}_{date}.json.gz'
358
        )
359
        key = template.format(**template_params)
360
        if key_prefix:
361
            key = key_prefix + '/' + key
362
        return key
363

364
    def _create_digest_key_regex(self, key_prefix):
365
        """Creates a regular expression used to match against S3 keys"""
366
        template = 'AWSLogs/'
367
        template_params = {
368
            'account_id': re.escape(self.account_id),
369
            'source_region': re.escape(self.trail_source_region),
370
            'home_region': re.escape(self.trail_home_region),
371
            'name': re.escape(self.trail_name)
372
        }
373
        if self.organization_id:
374
            template += '{organization_id}/'
375
            template_params['organization_id'] = self.organization_id
376
        template += (
377
            '{account_id}/CloudTrail\\-Digest/{source_region}/'
378
            '\\d+/\\d+/\\d+/{account_id}_CloudTrail\\-Digest_'
379
            '{source_region}_{name}_{home_region}_.+\\.json\\.gz'
380
        )
381
        key = template.format(**template_params)
382
        if key_prefix:
383
            key = re.escape(key_prefix) + '/' + key
384
        return '^' + key + '$'
385

386

387
class DigestTraverser(object):
388
    """Retrieves and validates digests within a date range."""
389
    # These keys are required to be present before validating the contents
390
    # of a digest.
391
    required_digest_keys = ['digestPublicKeyFingerprint', 'digestS3Bucket',
392
                            'digestS3Object', 'previousDigestSignature',
393
                            'digestEndTime', 'digestStartTime']
394

395
    def __init__(self, digest_provider, starting_bucket, starting_prefix,
396
                 public_key_provider, digest_validator=None,
397
                 on_invalid=None, on_gap=None, on_missing=None):
398
        """
399
        :type digest_provider: DigestProvider
400
        :param digest_provider: DigestProvider object
401
        :param starting_bucket: S3 bucket where the digests are stored.
402
        :param starting_prefix: An optional prefix applied to each S3 key.
403
        :param public_key_provider: Provides public keys for a range.
404
        :param digest_validator: Validates digest using a validate method.
405
        :param on_invalid: Callback invoked when a digest is invalid.
406
        :param on_gap: Callback invoked when a digest has no parent, but
407
            there are still more digests to validate.
408
        :param on_missing: Callback invoked when a digest file is missing.
409
        """
410
        self.starting_bucket = starting_bucket
411
        self.starting_prefix = starting_prefix
412
        self.digest_provider = digest_provider
413
        self._public_key_provider = public_key_provider
414
        self._on_gap = on_gap
415
        self._on_invalid = on_invalid
416
        self._on_missing = on_missing
417
        if digest_validator is None:
418
            digest_validator = Sha256RSADigestValidator()
419
        self._digest_validator = digest_validator
420

421
    def traverse(self, start_date, end_date=None):
422
        """Creates and returns a generator that yields validated digest data.
423

424
        Each yielded digest dictionary contains information about the digest
425
        and the log file associated with the digest. Digest files are validated
426
        before they are yielded. Whether or not the digest is successfully
427
        validated is stated in the "isValid" key value pair of the yielded
428
        dictionary.
429

430
        :type start_date: datetime
431
        :param start_date: Date to start validating from (inclusive).
432
        :type start_date: datetime
433
        :param end_date: Date to stop validating at (inclusive).
434
        """
435
        if end_date is None:
436
            end_date = get_current_datetime()
437
        end_date = normalize_date(end_date)
438
        start_date = normalize_date(start_date)
439
        bucket = self.starting_bucket
440
        prefix = self.starting_prefix
441
        digests = self._load_digests(bucket, prefix, start_date, end_date)
442
        public_keys = self._load_public_keys(start_date, end_date)
443
        key, end_date = self._get_last_digest(digests)
444
        last_start_date = end_date
445
        while key and start_date <= last_start_date:
446
            try:
447
                digest, end_date = self._load_and_validate_digest(
448
                    public_keys, bucket, key)
449
                last_start_date = normalize_date(
450
                    parse_date(digest['digestStartTime']))
451
                previous_bucket = digest.get('previousDigestS3Bucket', None)
452
                yield digest
453
                if previous_bucket is None:
454
                    # The chain is broken, so find next in digest store.
455
                    key, end_date = self._find_next_digest(
456
                        digests=digests, bucket=bucket, last_key=key,
457
                        last_start_date=last_start_date, cb=self._on_gap,
458
                        is_cb_conditional=True)
459
                else:
460
                    key = digest['previousDigestS3Object']
461
                    if previous_bucket != bucket:
462
                        bucket = previous_bucket
463
                        # The bucket changed so reload the digest list.
464
                        digests = self._load_digests(
465
                            bucket, prefix, start_date, end_date)
466
            except ClientError as e:
467
                if e.response['Error']['Code'] != 'NoSuchKey':
468
                    raise e
469
                key, end_date = self._find_next_digest(
470
                    digests=digests, bucket=bucket, last_key=key,
471
                    last_start_date=last_start_date, cb=self._on_missing,
472
                    message=str(e))
473
            except DigestError as e:
474
                key, end_date = self._find_next_digest(
475
                    digests=digests, bucket=bucket, last_key=key,
476
                    last_start_date=last_start_date, cb=self._on_invalid,
477
                    message=str(e))
478
            except Exception as e:
479
                # Any other unexpected errors.
480
                key, end_date = self._find_next_digest(
481
                    digests=digests, bucket=bucket, last_key=key,
482
                    last_start_date=last_start_date, cb=self._on_invalid,
483
                    message='Digest file\ts3://%s/%s\tINVALID: %s'
484
                            % (bucket, key, str(e)))
485

486
    def _load_digests(self, bucket, prefix, start_date, end_date):
487
        return self.digest_provider.load_digest_keys_in_range(
488
            bucket=bucket, prefix=prefix,
489
            start_date=start_date, end_date=end_date)
490

491
    def _find_next_digest(self, digests, bucket, last_key, last_start_date,
492
                          cb=None, is_cb_conditional=False, message=None):
493
        """Finds the next digest in the bucket and invokes any callback."""
494
        next_key, next_end_date = self._get_last_digest(digests, last_key)
495
        if cb and (not is_cb_conditional or next_key):
496
            cb(bucket=bucket, next_key=next_key, last_key=last_key,
497
               next_end_date=next_end_date, last_start_date=last_start_date,
498
               message=message)
499
        return next_key, next_end_date
500

501
    def _get_last_digest(self, digests, before_key=None):
502
        """Finds the previous digest key (either the last or before before_key)
503

504
        If no key is provided, the last digest is used. If a digest is found,
505
        the end date of the provider is adjusted to match the found key's end
506
        date.
507
        """
508
        if not digests:
509
            return None, None
510
        elif before_key is None:
511
            next_key = digests.pop()
512
            next_key_date = normalize_date(
513
                parse_date(extract_digest_key_date(next_key)))
514
            return next_key, next_key_date
515
        # find a key before the given key.
516
        before_key_date = parse_date(extract_digest_key_date(before_key))
517
        while digests:
518
            next_key = digests.pop()
519
            next_key_date = normalize_date(
520
                parse_date(extract_digest_key_date(next_key)))
521
            if next_key_date < before_key_date:
522
                LOG.debug("Next found key: %s", next_key)
523
                return next_key, next_key_date
524
        return None, None
525

526
    def _load_and_validate_digest(self, public_keys, bucket, key):
527
        """Loads and validates a digest from S3.
528

529
        :param public_keys: Public key dictionary of fingerprint to dict.
530
        :return: Returns a tuple of the digest data as a dict and end_date
531
        :rtype: tuple
532
        """
533
        digest_data, digest = self.digest_provider.fetch_digest(bucket, key)
534
        for required_key in self.required_digest_keys:
535
            if required_key not in digest_data:
536
                raise InvalidDigestFormat(bucket, key)
537
        # Ensure the bucket and key are the same as what's expected.
538
        if digest_data['digestS3Bucket'] != bucket \
539
                or digest_data['digestS3Object'] != key:
540
            raise DigestError(
541
                ('Digest file\ts3://%s/%s\tINVALID: has been moved from its '
542
                 'original location') % (bucket, key))
543
        # Get the public keys in the given time range.
544
        fingerprint = digest_data['digestPublicKeyFingerprint']
545
        if fingerprint not in public_keys:
546
            raise DigestError(
547
                ('Digest file\ts3://%s/%s\tINVALID: public key not found in '
548
                 'region %s for fingerprint %s') %
549
                (bucket, key, self.digest_provider.trail_home_region,
550
                 fingerprint))
551
        public_key_hex = public_keys[fingerprint]['Value']
552
        self._digest_validator.validate(
553
            bucket, key, public_key_hex, digest_data, digest)
554
        end_date = normalize_date(parse_date(digest_data['digestEndTime']))
555
        return digest_data, end_date
556

557
    def _load_public_keys(self, start_date, end_date):
558
        public_keys = self._public_key_provider.get_public_keys(
559
            start_date, end_date)
560
        if not public_keys:
561
            raise RuntimeError(
562
                'No public keys found between %s and %s' %
563
                (format_display_date(start_date),
564
                 format_display_date(end_date)))
565
        return public_keys
566

567

568
class Sha256RSADigestValidator(object):
569
    """
570
    Validates SHA256withRSA signed digests.
571

572
    The result of validating the digest is inserted into the digest_data
573
    dictionary using the isValid key value pair.
574
    """
575

576
    def validate(self, bucket, key, public_key, digest_data, inflated_digest):
577
        """Validates a digest file.
578

579
        Throws a DigestError when the digest is invalid.
580

581
        :param bucket: Bucket of the digest file
582
        :param key: Key of the digest file
583
        :param public_key: Public key bytes.
584
        :param digest_data: Dict of digest data returned when JSON
585
            decoding a manifest.
586
        :param inflated_digest: Inflated digest file contents as bytes.
587
        """
588
        try:
589
            decoded_key = base64.b64decode(public_key)
590
            public_key = rsa.PublicKey.load_pkcs1(decoded_key, format='DER')
591
            to_sign = self._create_string_to_sign(digest_data, inflated_digest)
592
            signature_bytes = binascii.unhexlify(digest_data['_signature'])
593
            rsa.verify(to_sign, signature_bytes, public_key)
594
        except PyAsn1Error:
595
            raise DigestError(
596
                ('Digest file\ts3://%s/%s\tINVALID: Unable to load PKCS #1 key'
597
                 ' with fingerprint %s')
598
                % (bucket, key, digest_data['digestPublicKeyFingerprint']))
599
        except rsa.pkcs1.VerificationError:
600
            # Note from the Python-RSA docs: Never display the stack trace of
601
            # a rsa.pkcs1.VerificationError exception. It shows where in the
602
            # code the exception occurred, and thus leaks information about
603
            # the key.
604
            raise DigestSignatureError(bucket, key)
605

606
    def _create_string_to_sign(self, digest_data, inflated_digest):
607
        previous_signature = digest_data['previousDigestSignature']
608
        if previous_signature is None:
609
            # The value must be 'null' to match the Java implementation.
610
            previous_signature = 'null'
611
        string_to_sign = "%s\n%s/%s\n%s\n%s" % (
612
            digest_data['digestEndTime'],
613
            digest_data['digestS3Bucket'],
614
            digest_data['digestS3Object'],
615
            hashlib.sha256(inflated_digest).hexdigest(),
616
            previous_signature)
617
        LOG.debug('Digest string to sign: %s', string_to_sign)
618
        return string_to_sign.encode()
619

620

621
class CloudTrailValidateLogs(BasicCommand):
622
    """
623
    Validates log digests and log files, optionally saving them to disk.
624
    """
625
    NAME = 'validate-logs'
626
    DESCRIPTION = """
627
    Validates CloudTrail logs for a given period of time.
628

629
    This command uses the digest files delivered to your S3 bucket to perform
630
    the validation.
631

632
    The AWS CLI allows you to detect the following types of changes:
633

634
    - Modification or deletion of CloudTrail log files.
635
    - Modification or deletion of CloudTrail digest files.
636

637
    To validate log files with the AWS CLI, the following preconditions must
638
    be met:
639

640
    - You must have online connectivity to AWS.
641
    - You must have read access to the S3 bucket that contains the digest and
642
      log files.
643
    - The digest and log files must not have been moved from the original S3
644
      location where CloudTrail delivered them.
645
    - For organization trails you must have access to describe-organization to
646
      validate digest files
647

648
    When you disable Log File Validation, the chain of digest files is broken
649
    after one hour. CloudTrail will not digest log files that were delivered
650
    during a period in which the Log File Validation feature was disabled.
651
    For example, if you enable Log File Validation on January 1, disable it
652
    on January 2, and re-enable it on January 10, digest files will not be
653
    created for the log files delivered from January 3 to January 9. The same
654
    applies whenever you stop CloudTrail logging or delete a trail.
655

656
    .. note::
657

658
        Log files that have been downloaded to local disk cannot be validated
659
        with the AWS CLI. The CLI will download all log files each time this
660
        command is executed.
661

662
    .. note::
663

664
        This command requires that the role executing the command has
665
        permission to call ListObjects, GetObject, and GetBucketLocation for
666
        each bucket referenced by the trail.
667

668
    """
669

670
    ARG_TABLE = [
671
        {'name': 'trail-arn', 'required': True, 'cli_type_name': 'string',
672
         'help_text': 'Specifies the ARN of the trail to be validated'},
673
        {'name': 'start-time', 'required': True, 'cli_type_name': 'string',
674
         'help_text': ('Specifies that log files delivered on or after the '
675
                       'specified UTC timestamp value will be validated. '
676
                       'Example: "2015-01-08T05:21:42Z".')},
677
        {'name': 'end-time', 'cli_type_name': 'string',
678
         'help_text': ('Optionally specifies that log files delivered on or '
679
                       'before the specified UTC timestamp value will be '
680
                       'validated. The default value is the current time. '
681
                       'Example: "2015-01-08T12:31:41Z".')},
682
        {'name': 's3-bucket', 'cli_type_name': 'string',
683
         'help_text': ('Optionally specifies the S3 bucket where the digest '
684
                       'files are stored. If a bucket name is not specified, '
685
                       'the CLI will retrieve it by calling describe_trails')},
686
        {'name': 's3-prefix', 'cli_type_name': 'string',
687
         'help_text': ('Optionally specifies the optional S3 prefix where the '
688
                       'digest files are stored. If not specified, the CLI '
689
                       'will determine the prefix automatically by calling '
690
                       'describe_trails.')},
691
        {'name': 'account-id', 'cli_type_name': 'string',
692
         'help_text': ('Optionally specifies the account for validating logs. '
693
                       'This parameter is needed for organization trails '
694
                       'for validating logs for specific account inside an '
695
                       'organization')},
696
        {'name': 'verbose', 'cli_type_name': 'boolean',
697
         'action': 'store_true',
698
         'help_text': 'Display verbose log validation information'}
699
    ]
700

701
    def __init__(self, session):
702
        super(CloudTrailValidateLogs, self).__init__(session)
703
        self.trail_arn = None
704
        self.is_verbose = False
705
        self.start_time = None
706
        self.end_time = None
707
        self.s3_bucket = None
708
        self.s3_prefix = None
709
        self.s3_client_provider = None
710
        self.cloudtrail_client = None
711
        self.account_id = None
712
        self._source_region = None
713
        self._valid_digests = 0
714
        self._invalid_digests = 0
715
        self._valid_logs = 0
716
        self._invalid_logs = 0
717
        self._is_last_status_double_space = True
718
        self._found_start_time = None
719
        self._found_end_time = None
720

721
    def _run_main(self, args, parsed_globals):
722
        self.handle_args(args)
723
        self.setup_services(parsed_globals)
724
        self._call()
725
        if self._invalid_digests > 0 or self._invalid_logs > 0:
726
            return 1
727
        return 0
728

729
    def handle_args(self, args):
730
        self.trail_arn = args.trail_arn
731
        self.is_verbose = args.verbose
732
        self.s3_bucket = args.s3_bucket
733
        self.s3_prefix = args.s3_prefix
734
        self.account_id = args.account_id
735
        self.start_time = normalize_date(parse_date(args.start_time))
736
        if args.end_time:
737
            self.end_time = normalize_date(parse_date(args.end_time))
738
        else:
739
            self.end_time = normalize_date(get_current_datetime())
740
        if self.start_time > self.end_time:
741
            raise ValueError(('Invalid time range specified: start-time must '
742
                              'occur before end-time'))
743
        # Found start time always defaults to the given start time. This value
744
        # may change if the earliest found digest is after the given start
745
        # time. Note that the summary output report of what date ranges were
746
        # actually found is only shown if a valid digest is encountered,
747
        # thereby setting self._found_end_time to a value.
748
        self._found_start_time = self.start_time
749

750
    def setup_services(self, parsed_globals):
751
        self._source_region = parsed_globals.region
752
        # Use the the same region as the region of the CLI to get locations.
753
        self.s3_client_provider = S3ClientProvider(
754
            self._session, self._source_region)
755
        client_args = {'region_name': parsed_globals.region,
756
                       'verify': parsed_globals.verify_ssl}
757
        self.organization_client = create_nested_client(
758
            self._session, 'organizations', **client_args)
759

760
        if parsed_globals.endpoint_url is not None:
761
            client_args['endpoint_url'] = parsed_globals.endpoint_url
762
        self.cloudtrail_client = create_nested_client(
763
            self._session, 'cloudtrail', **client_args)
764

765
    def _call(self):
766
        traverser = create_digest_traverser(
767
            trail_arn=self.trail_arn, cloudtrail_client=self.cloudtrail_client,
768
            organization_client=self.organization_client,
769
            trail_source_region=self._source_region,
770
            s3_client_provider=self.s3_client_provider, bucket=self.s3_bucket,
771
            prefix=self.s3_prefix, on_missing=self._on_missing_digest,
772
            on_invalid=self._on_invalid_digest, on_gap=self._on_digest_gap,
773
            account_id=self.account_id)
774
        self._write_startup_text()
775
        digests = traverser.traverse(self.start_time, self.end_time)
776
        for digest in digests:
777
            # Only valid digests are yielded and only valid digests can adjust
778
            # the found times that are reported in the CLI output summary.
779
            self._track_found_times(digest)
780
            self._valid_digests += 1
781
            self._write_status(
782
                'Digest file\ts3://%s/%s\tvalid'
783
                % (digest['digestS3Bucket'], digest['digestS3Object']))
784
            if not digest['logFiles']:
785
                continue
786
            for log in digest['logFiles']:
787
                self._download_log(log)
788
        self._write_summary_text()
789

790
    def _track_found_times(self, digest):
791
        # Track the earliest found start time, but do not use a date before
792
        # the user supplied start date.
793
        digest_start_time = parse_date(digest['digestStartTime'])
794
        if digest_start_time > self.start_time:
795
            self._found_start_time = digest_start_time
796
        # Only use the last found end time if it is less than the
797
        # user supplied end time (or the current date).
798
        if not self._found_end_time:
799
            digest_end_time = parse_date(digest['digestEndTime'])
800
            self._found_end_time = min(digest_end_time, self.end_time)
801

802
    def _download_log(self, log):
803
        """ Download a log, decompress, and compare SHA256 checksums"""
804
        try:
805
            # Create a client that can work with this bucket.
806
            client = self.s3_client_provider.get_client(log['s3Bucket'])
807
            response = client.get_object(
808
                Bucket=log['s3Bucket'], Key=log['s3Object'])
809
            gzip_inflater = zlib.decompressobj(zlib.MAX_WBITS | 16)
810
            rolling_hash = hashlib.sha256()
811
            for chunk in iter(lambda: response['Body'].read(2048), b""):
812
                data = gzip_inflater.decompress(chunk)
813
                rolling_hash.update(data)
814
            remaining_data = gzip_inflater.flush()
815
            if remaining_data:
816
                rolling_hash.update(remaining_data)
817
            computed_hash = rolling_hash.hexdigest()
818
            if computed_hash != log['hashValue']:
819
                self._on_log_invalid(log)
820
            else:
821
                self._valid_logs += 1
822
                self._write_status(('Log file\ts3://%s/%s\tvalid'
823
                                    % (log['s3Bucket'], log['s3Object'])))
824
        except ClientError as e:
825
            if e.response['Error']['Code'] != 'NoSuchKey':
826
                raise
827
            self._on_missing_log(log)
828
        except Exception:
829
            self._on_invalid_log_format(log)
830

831
    def _write_status(self, message, is_error=False):
832
        if is_error:
833
            if self._is_last_status_double_space:
834
                sys.stderr.write("%s\n\n" % message)
835
            else:
836
                sys.stderr.write("\n%s\n\n" % message)
837
            self._is_last_status_double_space = True
838
        elif self.is_verbose:
839
            self._is_last_status_double_space = False
840
            sys.stdout.write("%s\n" % message)
841

842
    def _write_startup_text(self):
843
        sys.stdout.write(
844
            'Validating log files for trail %s between %s and %s\n\n'
845
            % (self.trail_arn, format_display_date(self.start_time),
846
               format_display_date(self.end_time)))
847

848
    def _write_summary_text(self):
849
        if not self._is_last_status_double_space:
850
            sys.stdout.write('\n')
851
        sys.stdout.write('Results requested for %s to %s\n'
852
                         % (format_display_date(self.start_time),
853
                            format_display_date(self.end_time)))
854
        if not self._valid_digests and not self._invalid_digests:
855
            sys.stdout.write('No digests found\n')
856
            return
857
        if not self._found_start_time or not self._found_end_time:
858
            sys.stdout.write('No valid digests found in range\n')
859
        else:
860
            sys.stdout.write('Results found for %s to %s:\n'
861
                             % (format_display_date(self._found_start_time),
862
                                format_display_date(self._found_end_time)))
863
        self._write_ratio(self._valid_digests, self._invalid_digests, 'digest')
864
        self._write_ratio(self._valid_logs, self._invalid_logs, 'log')
865
        sys.stdout.write('\n')
866

867
    def _write_ratio(self, valid, invalid, name):
868
        total = valid + invalid
869
        if total > 0:
870
            sys.stdout.write('\n%d/%d %s files valid' % (valid, total, name))
871
            if invalid > 0:
872
                sys.stdout.write(', %d/%d %s files INVALID' % (invalid, total,
873
                                                               name))
874

875
    def _on_missing_digest(self, bucket, last_key, **kwargs):
876
        self._invalid_digests += 1
877
        self._write_status('Digest file\ts3://%s/%s\tINVALID: not found'
878
                           % (bucket, last_key), True)
879

880
    def _on_digest_gap(self, **kwargs):
881
        self._write_status(
882
            'No log files were delivered by CloudTrail between %s and %s'
883
            % (format_display_date(kwargs['next_end_date']),
884
               format_display_date(kwargs['last_start_date'])), True)
885

886
    def _on_invalid_digest(self, message, **kwargs):
887
        self._invalid_digests += 1
888
        self._write_status(message, True)
889

890
    def _on_invalid_log_format(self, log_data):
891
        self._invalid_logs += 1
892
        self._write_status(
893
            ('Log file\ts3://%s/%s\tINVALID: invalid format'
894
             % (log_data['s3Bucket'], log_data['s3Object'])), True)
895

896
    def _on_log_invalid(self, log_data):
897
        self._invalid_logs += 1
898
        self._write_status(
899
            "Log file\ts3://%s/%s\tINVALID: hash value doesn't match"
900
            % (log_data['s3Bucket'], log_data['s3Object']), True)
901

902
    def _on_missing_log(self, log_data):
903
        self._invalid_logs += 1
904
        self._write_status(
905
            'Log file\ts3://%s/%s\tINVALID: not found'
906
            % (log_data['s3Bucket'], log_data['s3Object']), True)
907

908
Product

Resources

Company